diff options
author | Huacai Chen <chenhuacai@loongson.cn> | 2023-11-01 10:55:00 +0800 |
---|---|---|
committer | Huacai Chen <chenhuacai@loongson.cn> | 2023-11-01 10:55:00 +0800 |
commit | a6bdc082ad1c91d389a6ba0c7a1945818f732114 (patch) | |
tree | fa630701d5d2a8bc1ab8c4abf759663bbb81aeeb | |
parent | ffc253263a1375a65fa6c9f62a893e9767fbebfa (diff) | |
parent | 99c9991f4e5d77328187187d0c921a3b62bfa998 (diff) |
Merge 'bpf-next 2023-10-16' into loongarch-next
LoongArch architecture changes for 6.7 (BPF CPU v4 support) depend on
the bpf changes to fix conflictions in selftests and work, so merge them
to create a base.
558 files changed, 44893 insertions, 4883 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 4877563241f3..c7525942f12c 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -71,6 +71,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - s390x - riscv64 - riscv32 + - loongarch64 And the older cBPF JIT supported on the following archs: diff --git a/Documentation/bpf/libbpf/program_types.rst b/Documentation/bpf/libbpf/program_types.rst index ad4d4d5eecb0..63bb88846e50 100644 --- a/Documentation/bpf/libbpf/program_types.rst +++ b/Documentation/bpf/libbpf/program_types.rst @@ -56,6 +56,16 @@ described in more detail in the footnotes. | | ``BPF_CGROUP_UDP6_RECVMSG`` | ``cgroup/recvmsg6`` | | + +----------------------------------------+----------------------------------+-----------+ | | ``BPF_CGROUP_UDP6_SENDMSG`` | ``cgroup/sendmsg6`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_CONNECT`` | ``cgroup/connect_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_SENDMSG`` | ``cgroup/sendmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_RECVMSG`` | ``cgroup/recvmsg_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETPEERNAME`` | ``cgroup/getpeername_unix`` | | +| +----------------------------------------+----------------------------------+-----------+ +| | ``BPF_CGROUP_UNIX_GETSOCKNAME`` | ``cgroup/getsockname_unix`` | | +-------------------------------------------+----------------------------------------+----------------------------------+-----------+ | ``BPF_PROG_TYPE_CGROUP_SOCK`` | ``BPF_CGROUP_INET4_POST_BIND`` | ``cgroup/post_bind4`` | | + +----------------------------------------+----------------------------------+-----------+ diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst index 4d86780ab0f1..f24270b8b034 100644 --- a/Documentation/bpf/prog_flow_dissector.rst +++ b/Documentation/bpf/prog_flow_dissector.rst @@ -113,7 +113,7 @@ Flags used by ``eth_get_headlen`` to estimate length of all headers for GRO. * ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to stop parsing as soon as it reaches IPv6 flow label; used by - ``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash. + ``___skb_get_hash`` to get flow hash. * ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop parsing as soon as it reaches encapsulated headers; used by routing infrastructure. diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml index 28ded09d72e3..e7720caf31b3 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml @@ -22,6 +22,7 @@ properties: - mediatek,mt7622-wed - mediatek,mt7981-wed - mediatek,mt7986-wed + - mediatek,mt7988-wed - const: syscon reg: diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml index 8103154bbb52..c77d7b155a4c 100644 --- a/Documentation/devicetree/bindings/mfd/syscon.yaml +++ b/Documentation/devicetree/bindings/mfd/syscon.yaml @@ -49,6 +49,8 @@ properties: - hisilicon,peri-subctrl - hpe,gxp-sysreg - intel,lgm-syscon + - loongson,ls1b-syscon + - loongson,ls1c-syscon - marvell,armada-3700-usb2-host-misc - mediatek,mt8135-pctl-a-syscfg - mediatek,mt8135-pctl-b-syscfg diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml index 03b5567be389..41014f5c01c4 100644 --- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml +++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml @@ -49,6 +49,26 @@ properties: Set if the output SYNCLKO clock should be disabled. Do not mix with microchip,synclko-125. + microchip,io-drive-strength-microamp: + description: + IO Pad Drive Strength + enum: [8000, 16000] + default: 16000 + + microchip,hi-drive-strength-microamp: + description: + High Speed Drive Strength. Controls drive strength of GMII / RGMII / + MII / RMII (except TX_CLK/REFCLKI, COL and CRS) and CLKO_25_125 lines. + enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000] + default: 24000 + + microchip,lo-drive-strength-microamp: + description: + Low Speed Drive Strength. Controls drive strength of TX_CLK / REFCLKI, + COL, CRS, LEDs, PME_N, NTRP_N, SDO and SDI/SDA/MDIO lines. + enum: [2000, 4000, 8000, 12000, 16000, 20000, 24000, 28000] + default: 8000 + interrupts: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml new file mode 100644 index 000000000000..c4f3224bad38 --- /dev/null +++ b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/loongson,ls1b-gmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson-1B Gigabit Ethernet MAC Controller + +maintainers: + - Keguang Zhang <keguang.zhang@gmail.com> + +description: | + Loongson-1B Gigabit Ethernet MAC Controller is based on + Synopsys DesignWare MAC (version 3.50a). + + Main features + - Dual 10/100/1000Mbps GMAC controllers + - Full-duplex operation (IEEE 802.3x flow control automatic transmission) + - Half-duplex operation (CSMA/CD Protocol and back-pressure support) + - RX Checksum Offload + - TX Checksum insertion + - MII interface + - RGMII interface + +select: + properties: + compatible: + contains: + enum: + - loongson,ls1b-gmac + required: + - compatible + +properties: + compatible: + items: + - enum: + - loongson,ls1b-gmac + - const: snps,dwmac-3.50a + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: stmmaceth + + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: macirq + + loongson,ls1-syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the syscon containing some extra configurations + including PHY interface mode. + + phy-mode: + enum: + - mii + - rgmii-id + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - loongson,ls1-syscon + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/loongson,ls1x-clk.h> + #include <dt-bindings/interrupt-controller/irq.h> + + gmac0: ethernet@1fe10000 { + compatible = "loongson,ls1b-gmac", "snps,dwmac-3.50a"; + reg = <0x1fe10000 0x10000>; + + clocks = <&clkc LS1X_CLKID_AHB>; + clock-names = "stmmaceth"; + + interrupt-parent = <&intc1>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + + loongson,ls1-syscon = <&syscon>; + + phy-handle = <&phy0>; + phy-mode = "mii"; + snps,pbl = <1>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@0 { + reg = <0x0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml new file mode 100644 index 000000000000..99001b940b83 --- /dev/null +++ b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/loongson,ls1c-emac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson-1C Ethernet MAC Controller + +maintainers: + - Keguang Zhang <keguang.zhang@gmail.com> + +description: | + Loongson-1C Ethernet MAC Controller is based on + Synopsys DesignWare MAC (version 3.50a). + + Main features + - 10/100Mbps + - Full-duplex operation (IEEE 802.3x flow control automatic transmission) + - Half-duplex operation (CSMA/CD Protocol and back-pressure support) + - IEEE 802.1Q VLAN tag detection for reception frames + - MII interface + - RMII interface + +select: + properties: + compatible: + contains: + enum: + - loongson,ls1c-emac + required: + - compatible + +properties: + compatible: + items: + - enum: + - loongson,ls1c-emac + - const: snps,dwmac-3.50a + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: stmmaceth + + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: macirq + + loongson,ls1-syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the syscon containing some extra configurations + including PHY interface mode. + + phy-mode: + enum: + - mii + - rmii + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + - interrupt-names + - loongson,ls1-syscon + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/loongson,ls1x-clk.h> + #include <dt-bindings/interrupt-controller/irq.h> + + emac: ethernet@1fe10000 { + compatible = "loongson,ls1c-emac", "snps,dwmac-3.50a"; + reg = <0x1fe10000 0x10000>; + + clocks = <&clkc LS1X_CLKID_AHB>; + clock-names = "stmmaceth"; + + interrupt-parent = <&intc1>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq"; + + loongson,ls1-syscon = <&syscon>; + + phy-handle = <&phy0>; + phy-mode = "mii"; + snps,pbl = <1>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@13 { + reg = <0x13>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index ddf9522a5dc2..5c2769dc689a 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -394,6 +394,11 @@ properties: When a PFC frame is received with priorities matching the bitmask, the queue is blocked from transmitting for the pause time specified in the PFC frame. + + snps,coe-unsupported: + type: boolean + description: TX checksum offload is unsupported by the TX queue. + allOf: - if: required: diff --git a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml index 311c570165f9..229c8f32019f 100644 --- a/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml +++ b/Documentation/devicetree/bindings/net/ti,icssg-prueth.yaml @@ -19,6 +19,7 @@ allOf: properties: compatible: enum: + - ti,am642-icssg-prueth # for AM64x SoC family - ti,am654-icssg-prueth # for AM65x SoC family sram: @@ -106,6 +107,13 @@ properties: phandle to system controller node and register offset to ICSSG control register for RGMII transmit delay + ti,half-duplex-capable: + type: boolean + description: + Indicates that the PHY output pin COL is routed to ICSSG GPIO pin + (PRGx_PRU0/1_GPIO10) as input so that the ICSSG MII port is + capable of half duplex operations. + required: - reg anyOf: diff --git a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml index f0fa92b04b32..3b212f26abc5 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml +++ b/Documentation/devicetree/bindings/soc/mediatek/mediatek,mt7986-wo-ccif.yaml @@ -20,6 +20,7 @@ properties: items: - enum: - mediatek,mt7986-wo-ccif + - mediatek,mt7988-wo-ccif - const: syscon reg: diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst new file mode 100644 index 000000000000..bb52f1b8c0be --- /dev/null +++ b/Documentation/driver-api/dpll.rst @@ -0,0 +1,497 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +The Linux kernel dpll subsystem +=============================== + +DPLL +==== + +PLL - Phase Locked Loop is an electronic circuit which syntonizes clock +signal of a device with an external clock signal. Effectively enabling +device to run on the same clock signal beat as provided on a PLL input. + +DPLL - Digital Phase Locked Loop is an integrated circuit which in +addition to plain PLL behavior incorporates a digital phase detector +and may have digital divider in the loop. As a result, the frequency on +DPLL's input and output may be configurable. + +Subsystem +========= + +The main purpose of dpll subsystem is to provide general interface +to configure devices that use any kind of Digital PLL and could use +different sources of input signal to synchronize to, as well as +different types of outputs. +The main interface is NETLINK_GENERIC based protocol with an event +monitoring multicast group defined. + +Device object +============= + +Single dpll device object means single Digital PLL circuit and bunch of +connected pins. +It reports the supported modes of operation and current status to the +user in response to the `do` request of netlink command +``DPLL_CMD_DEVICE_GET`` and list of dplls registered in the subsystem +with `dump` netlink request of the same command. +Changing the configuration of dpll device is done with `do` request of +netlink ``DPLL_CMD_DEVICE_SET`` command. +A device handle is ``DPLL_A_ID``, it shall be provided to get or set +configuration of particular device in the system. It can be obtained +with a ``DPLL_CMD_DEVICE_GET`` `dump` request or +a ``DPLL_CMD_DEVICE_ID_GET`` `do` request, where the one must provide +attributes that result in single device match. + +Pin object +========== + +A pin is amorphic object which represents either input or output, it +could be internal component of the device, as well as externally +connected. +The number of pins per dpll vary, but usually multiple pins shall be +provided for a single dpll device. +Pin's properties, capabilities and status is provided to the user in +response to `do` request of netlink ``DPLL_CMD_PIN_GET`` command. +It is also possible to list all the pins that were registered in the +system with `dump` request of ``DPLL_CMD_PIN_GET`` command. +Configuration of a pin can be changed by `do` request of netlink +``DPLL_CMD_PIN_SET`` command. +Pin handle is a ``DPLL_A_PIN_ID``, it shall be provided to get or set +configuration of particular pin in the system. It can be obtained with +``DPLL_CMD_PIN_GET`` `dump` request or ``DPLL_CMD_PIN_ID_GET`` `do` +request, where user provides attributes that result in single pin match. + +Pin selection +============= + +In general, selected pin (the one which signal is driving the dpll +device) can be obtained from ``DPLL_A_PIN_STATE`` attribute, and only +one pin shall be in ``DPLL_PIN_STATE_CONNECTED`` state for any dpll +device. + +Pin selection can be done either manually or automatically, depending +on hardware capabilities and active dpll device work mode +(``DPLL_A_MODE`` attribute). The consequence is that there are +differences for each mode in terms of available pin states, as well as +for the states the user can request for a dpll device. + +In manual mode (``DPLL_MODE_MANUAL``) the user can request or receive +one of following pin states: + +- ``DPLL_PIN_STATE_CONNECTED`` - the pin is used to drive dpll device +- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin is not used to drive dpll + device + +In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can request or +receive one of following pin states: + +- ``DPLL_PIN_STATE_SELECTABLE`` - the pin shall be considered as valid + input for automatic selection algorithm +- ``DPLL_PIN_STATE_DISCONNECTED`` - the pin shall be not considered as + a valid input for automatic selection algorithm + +In automatic mode (``DPLL_MODE_AUTOMATIC``) the user can only receive +pin state ``DPLL_PIN_STATE_CONNECTED`` once automatic selection +algorithm locks a dpll device with one of the inputs. + +Shared pins +=========== + +A single pin object can be attached to multiple dpll devices. +Then there are two groups of configuration knobs: + +1) Set on a pin - the configuration affects all dpll devices pin is + registered to (i.e., ``DPLL_A_PIN_FREQUENCY``), +2) Set on a pin-dpll tuple - the configuration affects only selected + dpll device (i.e., ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE``, + ``DPLL_A_PIN_DIRECTION``). + +MUX-type pins +============= + +A pin can be MUX-type, it aggregates child pins and serves as a pin +multiplexer. One or more pins are registered with MUX-type instead of +being directly registered to a dpll device. +Pins registered with a MUX-type pin provide user with additional nested +attribute ``DPLL_A_PIN_PARENT_PIN`` for each parent they were registered +with. +If a pin was registered with multiple parent pins, they behave like a +multiple output multiplexer. In this case output of a +``DPLL_CMD_PIN_GET`` would contain multiple pin-parent nested +attributes with current state related to each parent, like: + +'pin': [{{ + 'clock-id': 282574471561216, + 'module-name': 'ice', + 'capabilities': 4, + 'id': 13, + 'parent-pin': [ + {'parent-id': 2, 'state': 'connected'}, + {'parent-id': 3, 'state': 'disconnected'} + ], + 'type': 'synce-eth-port' + }}] + +Only one child pin can provide its signal to the parent MUX-type pin at +a time, the selection is done by requesting change of a child pin state +on desired parent, with the use of ``DPLL_A_PIN_PARENT`` nested +attribute. Example of netlink `set state on parent pin` message format: + + ========================== ============================================= + ``DPLL_A_PIN_ID`` child pin id + ``DPLL_A_PIN_PARENT_PIN`` nested attribute for requesting configuration + related to parent pin + ``DPLL_A_PIN_PARENT_ID`` parent pin id + ``DPLL_A_PIN_STATE`` requested pin state on parent + ========================== ============================================= + +Pin priority +============ + +Some devices might offer a capability of automatic pin selection mode +(enum value ``DPLL_MODE_AUTOMATIC`` of ``DPLL_A_MODE`` attribute). +Usually, automatic selection is performed on the hardware level, which +means only pins directly connected to the dpll can be used for automatic +input pin selection. +In automatic selection mode, the user cannot manually select a input +pin for the device, instead the user shall provide all directly +connected pins with a priority ``DPLL_A_PIN_PRIO``, the device would +pick a highest priority valid signal and use it to control the DPLL +device. Example of netlink `set priority on parent pin` message format: + + ============================ ============================================= + ``DPLL_A_PIN_ID`` configured pin id + ``DPLL_A_PIN_PARENT_DEVICE`` nested attribute for requesting configuration + related to parent dpll device + ``DPLL_A_PIN_PARENT_ID`` parent dpll device id + ``DPLL_A_PIN_PRIO`` requested pin prio on parent dpll + ============================ ============================================= + +Child pin of MUX-type pin is not capable of automatic input pin selection, +in order to configure active input of a MUX-type pin, the user needs to +request desired pin state of the child pin on the parent pin, +as described in the ``MUX-type pins`` chapter. + +Configuration commands group +============================ + +Configuration commands are used to get information about registered +dpll devices (and pins), as well as set configuration of device or pins. +As dpll devices must be abstracted and reflect real hardware, +there is no way to add new dpll device via netlink from user space and +each device should be registered by its driver. + +All netlink commands require ``GENL_ADMIN_PERM``. This is to prevent +any spamming/DoS from unauthorized userspace applications. + +List of netlink commands with possible attributes +================================================= + +Constants identifying command types for dpll device uses a +``DPLL_CMD_`` prefix and suffix according to command purpose. +The dpll device related attributes use a ``DPLL_A_`` prefix and +suffix according to attribute purpose. + + ==================================== ================================= + ``DPLL_CMD_DEVICE_ID_GET`` command to get device ID + ``DPLL_A_MODULE_NAME`` attr module name of registerer + ``DPLL_A_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_TYPE`` attr type of dpll device + ==================================== ================================= + + ==================================== ================================= + ``DPLL_CMD_DEVICE_GET`` command to get device info or + dump list of available devices + ``DPLL_A_ID`` attr unique dpll device ID + ``DPLL_A_MODULE_NAME`` attr module name of registerer + ``DPLL_A_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_MODE`` attr selection mode + ``DPLL_A_MODE_SUPPORTED`` attr available selection modes + ``DPLL_A_LOCK_STATUS`` attr dpll device lock status + ``DPLL_A_TEMP`` attr device temperature info + ``DPLL_A_TYPE`` attr type of dpll device + ==================================== ================================= + + ==================================== ================================= + ``DPLL_CMD_DEVICE_SET`` command to set dpll device config + ``DPLL_A_ID`` attr internal dpll device index + ``DPLL_A_MODE`` attr selection mode to configure + ==================================== ================================= + +Constants identifying command types for pins uses a +``DPLL_CMD_PIN_`` prefix and suffix according to command purpose. +The pin related attributes use a ``DPLL_A_PIN_`` prefix and suffix +according to attribute purpose. + + ==================================== ================================= + ``DPLL_CMD_PIN_ID_GET`` command to get pin ID + ``DPLL_A_PIN_MODULE_NAME`` attr module name of registerer + ``DPLL_A_PIN_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_PIN_BOARD_LABEL`` attr pin board label provided + by registerer + ``DPLL_A_PIN_PANEL_LABEL`` attr pin panel label provided + by registerer + ``DPLL_A_PIN_PACKAGE_LABEL`` attr pin package label provided + by registerer + ``DPLL_A_PIN_TYPE`` attr type of a pin + ==================================== ================================= + + ==================================== ================================== + ``DPLL_CMD_PIN_GET`` command to get pin info or dump + list of available pins + ``DPLL_A_PIN_ID`` attr unique a pin ID + ``DPLL_A_PIN_MODULE_NAME`` attr module name of registerer + ``DPLL_A_PIN_CLOCK_ID`` attr Unique Clock Identifier + (EUI-64), as defined by the + IEEE 1588 standard + ``DPLL_A_PIN_BOARD_LABEL`` attr pin board label provided + by registerer + ``DPLL_A_PIN_PANEL_LABEL`` attr pin panel label provided + by registerer + ``DPLL_A_PIN_PACKAGE_LABEL`` attr pin package label provided + by registerer + ``DPLL_A_PIN_TYPE`` attr type of a pin + ``DPLL_A_PIN_FREQUENCY`` attr current frequency of a pin + ``DPLL_A_PIN_FREQUENCY_SUPPORTED`` nested attr provides supported + frequencies + ``DPLL_A_PIN_ANY_FREQUENCY_MIN`` attr minimum value of frequency + ``DPLL_A_PIN_ANY_FREQUENCY_MAX`` attr maximum value of frequency + ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent device + the pin is connected with + ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id + ``DPLL_A_PIN_PRIO`` attr priority of pin on the + dpll device + ``DPLL_A_PIN_STATE`` attr state of pin on the parent + dpll device + ``DPLL_A_PIN_DIRECTION`` attr direction of a pin on the + parent dpll device + ``DPLL_A_PIN_PARENT_PIN`` nested attr for each parent pin + the pin is connected with + ``DPLL_A_PIN_PARENT_ID`` attr parent pin id + ``DPLL_A_PIN_STATE`` attr state of pin on the parent + pin + ``DPLL_A_PIN_CAPABILITIES`` attr bitmask of pin capabilities + ==================================== ================================== + + ==================================== ================================= + ``DPLL_CMD_PIN_SET`` command to set pins configuration + ``DPLL_A_PIN_ID`` attr unique a pin ID + ``DPLL_A_PIN_FREQUENCY`` attr requested frequency of a pin + ``DPLL_A_PIN_PARENT_DEVICE`` nested attr for each parent dpll + device configuration request + ``DPLL_A_PIN_PARENT_ID`` attr parent dpll device id + ``DPLL_A_PIN_DIRECTION`` attr requested direction of a pin + ``DPLL_A_PIN_PRIO`` attr requested priority of pin on + the dpll device + ``DPLL_A_PIN_STATE`` attr requested state of pin on + the dpll device + ``DPLL_A_PIN_PARENT_PIN`` nested attr for each parent pin + configuration request + ``DPLL_A_PIN_PARENT_ID`` attr parent pin id + ``DPLL_A_PIN_STATE`` attr requested state of pin on + parent pin + ==================================== ================================= + +Netlink dump requests +===================== + +The ``DPLL_CMD_DEVICE_GET`` and ``DPLL_CMD_PIN_GET`` commands are +capable of dump type netlink requests, in which case the response is in +the same format as for their ``do`` request, but every device or pin +registered in the system is returned. + +SET commands format +=================== + +``DPLL_CMD_DEVICE_SET`` - to target a dpll device, the user provides +``DPLL_A_ID``, which is unique identifier of dpll device in the system, +as well as parameter being configured (``DPLL_A_MODE``). + +``DPLL_CMD_PIN_SET`` - to target a pin user must provide a +``DPLL_A_PIN_ID``, which is unique identifier of a pin in the system. +Also configured pin parameters must be added. +If ``DPLL_A_PIN_FREQUENCY`` is configured, this affects all the dpll +devices that are connected with the pin, that is why frequency attribute +shall not be enclosed in ``DPLL_A_PIN_PARENT_DEVICE``. +Other attributes: ``DPLL_A_PIN_PRIO``, ``DPLL_A_PIN_STATE`` or +``DPLL_A_PIN_DIRECTION`` must be enclosed in +``DPLL_A_PIN_PARENT_DEVICE`` as their configuration relates to only one +of parent dplls, targeted by ``DPLL_A_PIN_PARENT_ID`` attribute which is +also required inside that nest. +For MUX-type pins the ``DPLL_A_PIN_STATE`` attribute is configured in +similar way, by enclosing required state in ``DPLL_A_PIN_PARENT_PIN`` +nested attribute and targeted parent pin id in ``DPLL_A_PIN_PARENT_ID``. + +In general, it is possible to configure multiple parameters at once, but +internally each parameter change will be invoked separately, where order +of configuration is not guaranteed by any means. + +Configuration pre-defined enums +=============================== + +.. kernel-doc:: include/uapi/linux/dpll.h + +Notifications +============= + +dpll device can provide notifications regarding status changes of the +device, i.e. lock status changes, input/output changes or other alarms. +There is one multicast group that is used to notify user-space apps via +netlink socket: ``DPLL_MCGRP_MONITOR`` + +Notifications messages: + + ============================== ===================================== + ``DPLL_CMD_DEVICE_CREATE_NTF`` dpll device was created + ``DPLL_CMD_DEVICE_DELETE_NTF`` dpll device was deleted + ``DPLL_CMD_DEVICE_CHANGE_NTF`` dpll device has changed + ``DPLL_CMD_PIN_CREATE_NTF`` dpll pin was created + ``DPLL_CMD_PIN_DELETE_NTF`` dpll pin was deleted + ``DPLL_CMD_PIN_CHANGE_NTF`` dpll pin has changed + ============================== ===================================== + +Events format is the same as for the corresponding get command. +Format of ``DPLL_CMD_DEVICE_`` events is the same as response of +``DPLL_CMD_DEVICE_GET``. +Format of ``DPLL_CMD_PIN_`` events is same as response of +``DPLL_CMD_PIN_GET``. + +Device driver implementation +============================ + +Device is allocated by dpll_device_get() call. Second call with the +same arguments will not create new object but provides pointer to +previously created device for given arguments, it also increases +refcount of that object. +Device is deallocated by dpll_device_put() call, which first +decreases the refcount, once refcount is cleared the object is +destroyed. + +Device should implement set of operations and register device via +dpll_device_register() at which point it becomes available to the +users. Multiple driver instances can obtain reference to it with +dpll_device_get(), as well as register dpll device with their own +ops and priv. + +The pins are allocated separately with dpll_pin_get(), it works +similarly to dpll_device_get(). Function first creates object and then +for each call with the same arguments only the object refcount +increases. Also dpll_pin_put() works similarly to dpll_device_put(). + +A pin can be registered with parent dpll device or parent pin, depending +on hardware needs. Each registration requires registerer to provide set +of pin callbacks, and private data pointer for calling them: + +- dpll_pin_register() - register pin with a dpll device, +- dpll_pin_on_pin_register() - register pin with another MUX type pin. + +Notifications of adding or removing dpll devices are created within +subsystem itself. +Notifications about registering/deregistering pins are also invoked by +the subsystem. +Notifications about status changes either of dpll device or a pin are +invoked in two ways: + +- after successful change was requested on dpll subsystem, the subsystem + calls corresponding notification, +- requested by device driver with dpll_device_change_ntf() or + dpll_pin_change_ntf() when driver informs about the status change. + +The device driver using dpll interface is not required to implement all +the callback operation. Nevertheless, there are few required to be +implemented. +Required dpll device level callback operations: + +- ``.mode_get``, +- ``.lock_status_get``. + +Required pin level callback operations: + +- ``.state_on_dpll_get`` (pins registered with dpll device), +- ``.state_on_pin_get`` (pins registered with parent pin), +- ``.direction_get``. + +Every other operation handler is checked for existence and +``-EOPNOTSUPP`` is returned in case of absence of specific handler. + +The simplest implementation is in the OCP TimeCard driver. The ops +structures are defined like this: + +.. code-block:: c + static const struct dpll_device_ops dpll_ops = { + .lock_status_get = ptp_ocp_dpll_lock_status_get, + .mode_get = ptp_ocp_dpll_mode_get, + .mode_supported = ptp_ocp_dpll_mode_supported, + }; + + static const struct dpll_pin_ops dpll_pins_ops = { + .frequency_get = ptp_ocp_dpll_frequency_get, + .frequency_set = ptp_ocp_dpll_frequency_set, + .direction_get = ptp_ocp_dpll_direction_get, + .direction_set = ptp_ocp_dpll_direction_set, + .state_on_dpll_get = ptp_ocp_dpll_state_get, + }; + +The registration part is then looks like this part: + +.. code-block:: c + clkid = pci_get_dsn(pdev); + bp->dpll = dpll_device_get(clkid, 0, THIS_MODULE); + if (IS_ERR(bp->dpll)) { + err = PTR_ERR(bp->dpll); + dev_err(&pdev->dev, "dpll_device_alloc failed\n"); + goto out; + } + + err = dpll_device_register(bp->dpll, DPLL_TYPE_PPS, &dpll_ops, bp); + if (err) + goto out; + + for (i = 0; i < OCP_SMA_NUM; i++) { + bp->sma[i].dpll_pin = dpll_pin_get(clkid, i, THIS_MODULE, &bp->sma[i].dpll_prop); + if (IS_ERR(bp->sma[i].dpll_pin)) { + err = PTR_ERR(bp->dpll); + goto out_dpll; + } + + err = dpll_pin_register(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, + &bp->sma[i]); + if (err) { + dpll_pin_put(bp->sma[i].dpll_pin); + goto out_dpll; + } + } + +In the error path we have to rewind every allocation in the reverse order: + +.. code-block:: c + while (i) { + --i; + dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]); + dpll_pin_put(bp->sma[i].dpll_pin); + } + dpll_device_put(bp->dpll); + +More complex example can be found in Intel's ICE driver or nVidia's mlx5 driver. + +SyncE enablement +================ +For SyncE enablement it is required to allow control over dpll device +for a software application which monitors and configures the inputs of +dpll device in response to current state of a dpll device and its +inputs. +In such scenario, dpll device input signal shall be also configurable +to drive dpll with signal recovered from the PHY netdevice. +This is done by exposing a pin to the netdevice - attaching pin to the +netdevice itself with +``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. +Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user +as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in +nested attribute ``IFLA_DPLL_PIN``. diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst index 1e16a40da3ba..f549a68951d7 100644 --- a/Documentation/driver-api/index.rst +++ b/Documentation/driver-api/index.rst @@ -114,6 +114,7 @@ available subsections can be seen below. zorro hte/index wmi + dpll .. only:: subproject and html diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml new file mode 100644 index 000000000000..8b86b28b47a6 --- /dev/null +++ b/Documentation/netlink/specs/dpll.yaml @@ -0,0 +1,488 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: dpll + +doc: DPLL subsystem. + +definitions: + - + type: enum + name: mode + doc: | + working modes a dpll can support, differentiates if and how dpll selects + one of its inputs to syntonize with it, valid values for DPLL_A_MODE + attribute + entries: + - + name: manual + doc: input can be only selected by sending a request to dpll + value: 1 + - + name: automatic + doc: highest prio input pin auto selected by dpll + render-max: true + - + type: enum + name: lock-status + doc: | + provides information of dpll device lock status, valid values for + DPLL_A_LOCK_STATUS attribute + entries: + - + name: unlocked + doc: | + dpll was not yet locked to any valid input (or forced by setting + DPLL_A_MODE to DPLL_MODE_DETACHED) + value: 1 + - + name: locked + doc: | + dpll is locked to a valid signal, but no holdover available + - + name: locked-ho-acq + doc: | + dpll is locked and holdover acquired + - + name: holdover + doc: | + dpll is in holdover state - lost a valid lock or was forced + by disconnecting all the pins (latter possible only + when dpll lock-state was already DPLL_LOCK_STATUS_LOCKED_HO_ACQ, + if dpll lock-state was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the + dpll's lock-state shall remain DPLL_LOCK_STATUS_UNLOCKED) + render-max: true + - + type: const + name: temp-divider + value: 1000 + doc: | + temperature divider allowing userspace to calculate the + temperature as float with three digit decimal precision. + Value of (DPLL_A_TEMP / DPLL_TEMP_DIVIDER) is integer part of + temperature value. + Value of (DPLL_A_TEMP % DPLL_TEMP_DIVIDER) is fractional part of + temperature value. + - + type: enum + name: type + doc: type of dpll, valid values for DPLL_A_TYPE attribute + entries: + - + name: pps + doc: dpll produces Pulse-Per-Second signal + value: 1 + - + name: eec + doc: dpll drives the Ethernet Equipment Clock + render-max: true + - + type: enum + name: pin-type + doc: | + defines possible types of a pin, valid values for DPLL_A_PIN_TYPE + attribute + entries: + - + name: mux + doc: aggregates another layer of selectable pins + value: 1 + - + name: ext + doc: external input + - + name: synce-eth-port + doc: ethernet port PHY's recovered clock + - + name: int-oscillator + doc: device internal oscillator + - + name: gnss + doc: GNSS recovered clock + render-max: true + - + type: enum + name: pin-direction + doc: | + defines possible direction of a pin, valid values for + DPLL_A_PIN_DIRECTION attribute + entries: + - + name: input + doc: pin used as a input of a signal + value: 1 + - + name: output + doc: pin used to output the signal + render-max: true + - + type: const + name: pin-frequency-1-hz + value: 1 + - + type: const + name: pin-frequency-10-khz + value: 10000 + - + type: const + name: pin-frequency-77_5-khz + value: 77500 + - + type: const + name: pin-frequency-10-mhz + value: 10000000 + - + type: enum + name: pin-state + doc: | + defines possible states of a pin, valid values for + DPLL_A_PIN_STATE attribute + entries: + - + name: connected + doc: pin connected, active input of phase locked loop + value: 1 + - + name: disconnected + doc: pin disconnected, not considered as a valid input + - + name: selectable + doc: pin enabled for automatic input selection + render-max: true + - + type: flags + name: pin-capabilities + doc: | + defines possible capabilities of a pin, valid flags on + DPLL_A_PIN_CAPABILITIES attribute + entries: + - + name: direction-can-change + doc: pin direction can be changed + - + name: priority-can-change + doc: pin priority can be changed + - + name: state-can-change + doc: pin state can be changed + +attribute-sets: + - + name: dpll + enum-name: dpll_a + attributes: + - + name: id + type: u32 + - + name: module-name + type: string + - + name: pad + type: pad + - + name: clock-id + type: u64 + - + name: mode + type: u32 + enum: mode + - + name: mode-supported + type: u32 + enum: mode + multi-attr: true + - + name: lock-status + type: u32 + enum: lock-status + - + name: temp + type: s32 + - + name: type + type: u32 + enum: type + - + name: pin + enum-name: dpll_a_pin + attributes: + - + name: id + type: u32 + - + name: parent-id + type: u32 + - + name: module-name + type: string + - + name: pad + type: pad + - + name: clock-id + type: u64 + - + name: board-label + type: string + - + name: panel-label + type: string + - + name: package-label + type: string + - + name: type + type: u32 + enum: pin-type + - + name: direction + type: u32 + enum: pin-direction + - + name: frequency + type: u64 + - + name: frequency-supported + type: nest + multi-attr: true + nested-attributes: frequency-range + - + name: frequency-min + type: u64 + - + name: frequency-max + type: u64 + - + name: prio + type: u32 + - + name: state + type: u32 + enum: pin-state + - + name: capabilities + type: u32 + - + name: parent-device + type: nest + multi-attr: true + nested-attributes: pin-parent-device + - + name: parent-pin + type: nest + multi-attr: true + nested-attributes: pin-parent-pin + - + name: pin-parent-device + subset-of: pin + attributes: + - + name: parent-id + type: u32 + - + name: direction + type: u32 + - + name: prio + type: u32 + - + name: state + type: u32 + - + name: pin-parent-pin + subset-of: pin + attributes: + - + name: parent-id + type: u32 + - + name: state + type: u32 + - + name: frequency-range + subset-of: pin + attributes: + - + name: frequency-min + type: u64 + - + name: frequency-max + type: u64 + +operations: + enum-name: dpll_cmd + list: + - + name: device-id-get + doc: | + Get id of dpll device that matches given attributes + attribute-set: dpll + flags: [ admin-perm ] + + do: + pre: dpll-lock-doit + post: dpll-unlock-doit + request: + attributes: + - module-name + - clock-id + - type + reply: + attributes: + - id + + - + name: device-get + doc: | + Get list of DPLL devices (dump) or attributes of a single dpll device + attribute-set: dpll + flags: [ admin-perm ] + + do: + pre: dpll-pre-doit + post: dpll-post-doit + request: + attributes: + - id + reply: &dev-attrs + attributes: + - id + - module-name + - mode + - mode-supported + - lock-status + - temp + - clock-id + - type + + dump: + pre: dpll-lock-dumpit + post: dpll-unlock-dumpit + reply: *dev-attrs + + - + name: device-set + doc: Set attributes for a DPLL device + attribute-set: dpll + flags: [ admin-perm ] + + do: + pre: dpll-pre-doit + post: dpll-post-doit + request: + attributes: + - id + - + name: device-create-ntf + doc: Notification about device appearing + notify: device-get + mcgrp: monitor + - + name: device-delete-ntf + doc: Notification about device disappearing + notify: device-get + mcgrp: monitor + - + name: device-change-ntf + doc: Notification about device configuration being changed + notify: device-get + mcgrp: monitor + - + name: pin-id-get + doc: | + Get id of a pin that matches given attributes + attribute-set: pin + flags: [ admin-perm ] + + do: + pre: dpll-lock-doit + post: dpll-unlock-doit + request: + attributes: + - module-name + - clock-id + - board-label + - panel-label + - package-label + - type + reply: + attributes: + - id + + - + name: pin-get + doc: | + Get list of pins and its attributes. + - dump request without any attributes given - list all the pins in the + system + - dump request with target dpll - list all the pins registered with + a given dpll device + - do request with target dpll and target pin - single pin attributes + attribute-set: pin + flags: [ admin-perm ] + + do: + pre: dpll-pin-pre-doit + post: dpll-pin-post-doit + request: + attributes: + - id + reply: &pin-attrs + attributes: + - id + - board-label + - panel-label + - package-label + - type + - frequency + - frequency-supported + - capabilities + - parent-device + - parent-pin + + dump: + pre: dpll-lock-dumpit + post: dpll-unlock-dumpit + request: + attributes: + - id + reply: *pin-attrs + + - + name: pin-set + doc: Set attributes of a target pin + attribute-set: pin + flags: [ admin-perm ] + + do: + pre: dpll-pin-pre-doit + post: dpll-pin-post-doit + request: + attributes: + - id + - frequency + - direction + - prio + - state + - parent-device + - parent-pin + - + name: pin-create-ntf + doc: Notification about pin appearing + notify: pin-get + mcgrp: monitor + - + name: pin-delete-ntf + doc: Notification about pin disappearing + notify: pin-get + mcgrp: monitor + - + name: pin-change-ntf + doc: Notification about pin configuration being changed + notify: pin-get + mcgrp: monitor + +mcast-groups: + list: + - + name: monitor diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 1c7284fd535b..c46fcc78fc04 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -42,6 +42,19 @@ definitions: doc: This feature informs if netdev implements non-linear XDP buffer support in ndo_xdp_xmit callback. + - + type: flags + name: xdp-rx-metadata + render-max: true + entries: + - + name: timestamp + doc: + Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp(). + - + name: hash + doc: + Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash(). attribute-sets: - @@ -68,6 +81,13 @@ attribute-sets: type: u32 checks: min: 1 + - + name: xdp-rx-metadata-features + doc: Bitmask of supported XDP receive metadata features. + See Documentation/networking/xdp-rx-metadata.rst for more details. + type: u64 + enum: xdp-rx-metadata + enum-as-flags: true operations: list: @@ -84,6 +104,7 @@ operations: - ifindex - xdp-features - xdp-zc-max-segs + - xdp-rx-metadata-features dump: reply: *dev-all - diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 9827e816084b..43de285b8a92 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -32,6 +32,7 @@ Contents: intel/e1000 intel/e1000e intel/fm10k + intel/idpf intel/igb intel/igbvf intel/ixgbe diff --git a/Documentation/networking/device_drivers/ethernet/intel/idpf.rst b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst new file mode 100644 index 000000000000..adb16e2abd21 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst @@ -0,0 +1,160 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +========================================================================== +idpf Linux* Base Driver for the Intel(R) Infrastructure Data Path Function +========================================================================== + +Intel idpf Linux driver. +Copyright(C) 2023 Intel Corporation. + +.. contents:: + +The idpf driver serves as both the Physical Function (PF) and Virtual Function +(VF) driver for the Intel(R) Infrastructure Data Path Function. + +Driver information can be obtained using ethtool, lspci, and ip. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel adapter. All hardware requirements listed apply to use +with Linux. + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +http://www.intel.com/support + + +Additional Features and Configurations +====================================== + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. If you don't have one yet, you can +obtain it at: +https://kernel.org/pub/software/network/ethtool/ + + +Viewing Link Messages +--------------------- +Link messages will not be displayed to the console if the distribution is +restricting system messages. In order to see network driver link messages on +your console, set dmesg to eight by entering the following:: + + # dmesg -n 8 + +.. note:: + This setting is not saved across reboots. + + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ip command to increase the MTU size. For example, enter the following +where <ethX> is the interface number:: + + # ip link set mtu 9000 dev <ethX> + # ip link set up dev <ethX> + +.. note:: + The maximum MTU setting for jumbo frames is 9706. This corresponds to the + maximum jumbo frame size of 9728 bytes. + +.. note:: + This driver will attempt to use multiple page sized buffers to receive + each jumbo packet. This should help to avoid buffer starvation issues when + allocating receive packets. + +.. note:: + Packet loss may have a greater impact on throughput when you use jumbo + frames. If you observe a drop in performance after enabling jumbo frames, + enabling flow control may mitigate the issue. + + +Performance Optimization +======================== +Driver defaults are meant to fit a wide variety of workloads, but if further +optimization is required, we recommend experimenting with the following +settings. + + +Interrupt Rate Limiting +----------------------- +This driver supports an adaptive interrupt throttle rate (ITR) mechanism that +is tuned for general workloads. The user can customize the interrupt rate +control for specific workloads, via ethtool, adjusting the number of +microseconds between interrupts. + +To set the interrupt rate manually, you must disable adaptive mode:: + + # ethtool -C <ethX> adaptive-rx off adaptive-tx off + +For lower CPU utilization: + - Disable adaptive ITR and lower Rx and Tx interrupts. The examples below + affect every queue of the specified interface. + + - Setting rx-usecs and tx-usecs to 80 will limit interrupts to about + 12,500 interrupts per second per queue:: + + # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 80 + tx-usecs 80 + +For reduced latency: + - Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0 + using ethtool:: + + # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 0 + tx-usecs 0 + +Per-queue interrupt rate settings: + - The following examples are for queues 1 and 3, but you can adjust other + queues. + + - To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or + about 100,000 interrupts/second, for queues 1 and 3:: + + # ethtool --per-queue <ethX> queue_mask 0xa --coalesce adaptive-rx off + rx-usecs 10 + + - To show the current coalesce settings for queues 1 and 3:: + + # ethtool --per-queue <ethX> queue_mask 0xa --show-coalesce + + + +Virtualized Environments +------------------------ +In addition to the other suggestions in this section, the following may be +helpful to optimize performance in VMs. + + - Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to + individual LCPUs, making sure to use a set of CPUs included in the + device's local_cpulist: /sys/class/net/<ethX>/device/local_cpulist. + + - Configure as many Rx/Tx queues in the VM as available. (See the idpf driver + documentation for the number of queues supported.) For example:: + + # ethtool -L <virt_interface> rx <max> tx <max> + + +Support +======= +For general information, go to the Intel support website at: +http://www.intel.com/support/ + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to intel-wired-lan@lists.osuosl.org. + + +Trademarks +========== +Intel is a trademark or registered trademark of Intel Corporation or its +subsidiaries in the United States and/or other countries. + +* Other names and brands may be claimed as the property of others. diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index f69da5074860..7d8c5380492f 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -650,8 +650,8 @@ before a conversion to the new layout is being done behind the scenes! Currently, the classic BPF format is being used for JITing on most 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, -sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF -instruction set. +sparc64, arm32, riscv64, riscv32, loongarch64 perform JIT compilation +from eBPF instruction set. Testing ------- diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a66054d0763a..5bfa1837968c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -745,6 +745,13 @@ tcp_comp_sack_nr - INTEGER Default : 44 +tcp_backlog_ack_defer - BOOLEAN + If set, user thread processing socket backlog tries sending + one ACK for the whole queue. This helps to avoid potential + long latencies at end of a TCP socket syscall. + + Default : true + tcp_slow_start_after_idle - BOOLEAN If set, provide RFC2861 behavior and time out the congestion window after an idle period. An idle period is defined at diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index 25ce72af81c2..205696780b78 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -105,6 +105,13 @@ bpf_tail_call Adding programs that access metadata kfuncs to the ``BPF_MAP_TYPE_PROG_ARRAY`` is currently not supported. +Supported Devices +================= + +It is possible to query which kfunc the particular netdev implements via +netlink. See ``xdp-rx-metadata-features`` attribute set in +``Documentation/netlink/specs/netdev.yaml``. + Example ======= diff --git a/MAINTAINERS b/MAINTAINERS index dd5de540ec0b..80a46642a662 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3614,9 +3614,10 @@ F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml F: drivers/iio/accel/bma400* BPF JIT for ARM -M: Shubham Bansal <illusionist.neo@gmail.com> +M: Russell King <linux@armlinux.org.uk> +M: Puranjay Mohan <puranjay12@gmail.com> L: bpf@vger.kernel.org -S: Odd Fixes +S: Maintained F: arch/arm/net/ BPF JIT for ARM64 @@ -4338,8 +4339,7 @@ F: drivers/net/ethernet/broadcom/bcmsysport.* F: drivers/net/ethernet/broadcom/unimac.h BROADCOM TG3 GIGABIT ETHERNET DRIVER -M: Siva Reddy Kallam <siva.kallam@broadcom.com> -M: Prashant Sreedharan <prashant@broadcom.com> +M: Pavan Chebbi <pavan.chebbi@broadcom.com> M: Michael Chan <mchan@broadcom.com> L: netdev@vger.kernel.org S: Supported @@ -6352,6 +6352,17 @@ F: Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-drive F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch* F: drivers/net/ethernet/freescale/dpaa2/dpsw* +DPLL SUBSYSTEM +M: Vadim Fedorenko <vadim.fedorenko@linux.dev> +M: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com> +M: Jiri Pirko <jiri@resnulli.us> +L: netdev@vger.kernel.org +S: Supported +F: Documentation/driver-api/dpll.rst +F: drivers/dpll/* +F: include/net/dpll.h +F: include/uapi/linux/dpll.h + DRBD DRIVER M: Philipp Reisner <philipp.reisner@linbit.com> M: Lars Ellenberg <lars.ellenberg@linbit.com> @@ -14347,9 +14358,11 @@ MIPS/LOONGSON1 ARCHITECTURE M: Keguang Zhang <keguang.zhang@gmail.com> L: linux-mips@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/*/loongson,ls1*.yaml F: arch/mips/include/asm/mach-loongson32/ F: arch/mips/loongson32/ F: drivers/*/*loongson1* +F: drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c MIPS/LOONGSON2EF ARCHITECTURE M: Jiaxun Yang <jiaxun.yang@flygoat.com> diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6a1c9fca5260..1d672457d02f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -2,6 +2,7 @@ /* * Just-In-Time compiler for eBPF filters on 32bit ARM * + * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com> * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> */ @@ -15,6 +16,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/if_vlan.h> +#include <linux/math64.h> #include <asm/cacheflush.h> #include <asm/hwcap.h> @@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor) return dividend % divisor; } +static s32 jit_sdiv32(s32 dividend, s32 divisor) +{ + return dividend / divisor; +} + +static s32 jit_smod32(s32 dividend, s32 divisor) +{ + return dividend % divisor; +} + +/* Wrappers for 64-bit div/mod */ +static u64 jit_udiv64(u64 dividend, u64 divisor) +{ + return div64_u64(dividend, divisor); +} + +static u64 jit_mod64(u64 dividend, u64 divisor) +{ + u64 rem; + + div64_u64_rem(dividend, divisor, &rem); + return rem; +} + +static s64 jit_sdiv64(s64 dividend, s64 divisor) +{ + return div64_s64(dividend, divisor); +} + +static s64 jit_smod64(s64 dividend, s64 divisor) +{ + u64 q; + + q = div64_s64(dividend, divisor); + + return dividend - q * divisor; +} + static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { inst |= (cond << 28); @@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) +#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off) +#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off) + #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) @@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) return to - from - 2; } -static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign) { const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; + u32 dst; #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { - if (op == BPF_DIV) - emit(ARM_UDIV(rd, rm, rn), ctx); - else { - emit(ARM_UDIV(ARM_IP, rm, rn), ctx); + if (op == BPF_DIV) { + emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx); + } else { + emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx); emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); } return; @@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); /* Call appropriate function */ - emit_mov_i(ARM_IP, op == BPF_DIV ? - (u32)jit_udiv32 : (u32)jit_mod32, ctx); + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv32; + else + dst = (u32)jit_smod32; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv32; + else + dst = (u32)jit_mod32; + } + + emit_mov_i(ARM_IP, dst, ctx); emit_blx_r(ARM_IP, ctx); /* Restore caller-saved registers from stack */ @@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } +static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx, + u8 op, u8 sign) +{ + u32 dst; + + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK), ctx); + + /* + * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in + * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack, + * we can recover them later after returning from the function call. + */ + if (rm[1] != ARM_R0 || rn[1] != ARM_R2) { + /* + * Move Rm to {R1, R0} if it is not already there. + */ + if (rm[1] != ARM_R0) { + if (rn[1] == ARM_R0) + emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_MOV_R(ARM_R1, rm[0]), ctx); + emit(ARM_MOV_R(ARM_R0, rm[1]), ctx); + if (rn[1] == ARM_R0) { + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + goto cont; + } + } + /* + * Move Rn to {R3, R2} if it is not already there. + */ + if (rn[1] != ARM_R2) { + emit(ARM_MOV_R(ARM_R3, rn[0]), ctx); + emit(ARM_MOV_R(ARM_R2, rn[1]), ctx); + } + } + +cont: + + /* Call appropriate function */ + if (sign) { + if (op == BPF_DIV) + dst = (u32)jit_sdiv64; + else + dst = (u32)jit_smod64; + } else { + if (op == BPF_DIV) + dst = (u32)jit_udiv64; + else + dst = (u32)jit_mod64; + } + + emit_mov_i(ARM_IP, dst, ctx); + emit_blx_r(ARM_IP, ctx); + + /* Save return value */ + if (rd[1] != ARM_R0) { + emit(ARM_MOV_R(rd[0], ARM_R1), ctx); + emit(ARM_MOV_R(rd[1], ARM_R0), ctx); + } + + /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */ + if (rd[1] != ARM_R0 && rd[1] != ARM_R2) { + emit(ARM_POP(CALLER_MASK), ctx); + } else if (rd[1] != ARM_R0) { + emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx); + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + } else { + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); + } +} + /* Is the translated BPF register on stack? */ static bool is_stacked(s8 reg) { @@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], } /* dst = src (4 bytes)*/ -static inline void emit_a32_mov_r(const s8 dst, const s8 src, +static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; s8 rt; rt = arm_bpf_get_reg32(src, tmp[0], ctx); + if (off && off != 32) { + emit(ARM_LSL_I(rt, rt, 32 - off), ctx); + emit(ARM_ASR_I(rt, rt, 32 - off), ctx); + } arm_bpf_put_reg32(dst, rt, ctx); } @@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { - emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_lo, src_lo, ctx); - emit_a32_mov_r(dst_hi, src_hi, ctx); + emit_a32_mov_r(dst_lo, src_lo, 0, ctx); + emit_a32_mov_r(dst_hi, src_hi, 0, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; @@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], } } +/* dst = (signed)src */ +static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rt; + + rt = arm_bpf_get_reg64(dst, tmp, ctx); + + emit_a32_mov_r(dst_lo, src_lo, off, ctx); + if (!is64) { + if (!ctx->prog->aux->verifier_zext) + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); + } else { + emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); + } +} + /* Shift operations */ static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { @@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size) return -off_max <= off && off <= off_max; } +static bool is_ldst_imm8(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + off_max = 0xff; + break; + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], s16 off, struct jit_ctx *ctx, const u8 sz){ @@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, arm_bpf_put_reg64(dst, rd, ctx); } +/* dst = *(signed size*)(src + off) */ +static inline void emit_ldsx_r(const s8 dst[], const s8 src, + s16 off, struct jit_ctx *ctx, const u8 sz){ + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + s8 rm = src; + int add_off; + + if (!is_ldst_imm8(off, sz)) { + /* + * offset does not fit in the load/store immediate, + * construct an ADD instruction to apply the offset. + */ + add_off = imm8m(off); + if (add_off > 0) { + emit(ARM_ADD_I(tmp[0], src, add_off), ctx); + rm = tmp[0]; + } else { + emit_a32_mov_i(tmp[0], off, ctx); + emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); + rm = tmp[0]; + } + off = 0; + } + + switch (sz) { + case BPF_B: + /* Load a Byte with sign extension*/ + emit(ARM_LDRSB_I(rd[1], rm, off), ctx); + break; + case BPF_H: + /* Load a HalfWord with sign extension*/ + emit(ARM_LDRSH_I(rd[1], rm, off), ctx); + break; + case BPF_W: + /* Load a Word*/ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + break; + } + /* Carry the sign extension to upper 32 bits */ + emit(ARM_ASR_I(rd[0], rd[1], 31), ctx); + arm_bpf_put_reg64(dst, rd, ctx); +} + /* Arithmatic Operation */ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, const u8 rn, struct jit_ctx *ctx, u8 op, @@ -1385,7 +1596,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_a32_mov_i(dst_hi, 0, ctx); break; } - emit_a32_mov_r64(is64, dst, src, ctx); + if (insn->off) + emit_a32_movsx_r64(is64, insn->off, dst, src, ctx); + else + emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: /* Sign-extend immediate value to destination reg */ @@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rt = src_lo; break; } - emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); + emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off); arm_bpf_put_reg32(dst_lo, rd_lo, ctx); if (!ctx->prog->aux->verifier_zext) emit_a32_mov_i(dst_hi, 0, ctx); @@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: - goto notyet; + rd = arm_bpf_get_reg64(dst, tmp2, ctx); + switch (BPF_SRC(code)) { + case BPF_X: + rs = arm_bpf_get_reg64(src, tmp, ctx); + break; + case BPF_K: + rs = tmp; + emit_a32_mov_se_i64(is64, rs, imm, ctx); + break; + } + emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off); + arm_bpf_put_reg64(dst, rd, ctx); + break; /* dst = dst << imm */ /* dst = dst >> imm */ /* dst = dst >> imm (signed) */ @@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; /* dst = htole(dst) */ /* dst = htobe(dst) */ - case BPF_ALU | BPF_END | BPF_FROM_LE: - case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ + case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */ + /* dst = bswap(dst) */ + case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */ rd = arm_bpf_get_reg64(dst, tmp, ctx); - if (BPF_SRC(code) == BPF_FROM_LE) + if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64) goto emit_bswap_uxt; switch (imm) { case 16: @@ -1603,8 +1831,15 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: + /* LDSX: dst = *(signed size *)(src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); + if (BPF_MODE(insn->code) == BPF_MEMSX) + emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code)); + else + emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* speculation barrier */ case BPF_ST | BPF_NOSPEC: @@ -1761,10 +1996,15 @@ go_jmp: break; /* JMP OFF */ case BPF_JMP | BPF_JA: + case BPF_JMP32 | BPF_JA: { - if (off == 0) + if (BPF_CLASS(code) == BPF_JMP32 && imm != 0) + jmp_offset = bpf2a32_offset(i + imm, i, ctx); + else if (BPF_CLASS(code) == BPF_JMP && off != 0) + jmp_offset = bpf2a32_offset(i + off, i, ctx); + else break; - jmp_offset = bpf2a32_offset(i+off, i, ctx); + check_imm24(jmp_offset); emit(ARM_B(jmp_offset), ctx); break; diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index e0b593a1498d..438f0e1f91a0 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -79,9 +79,11 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 +#define ARM_INST_LDRSB_I 0x015000d0 #define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 +#define ARM_INST_LDRSH_I 0x015000f0 #define ARM_INST_LDR_I 0x05100000 #define ARM_INST_LDR_R 0x07900000 @@ -137,6 +139,7 @@ #define ARM_INST_TST_I 0x03100000 #define ARM_INST_UDIV 0x0730f010 +#define ARM_INST_SDIV 0x0710f010 #define ARM_INST_UMULL 0x00800090 @@ -265,6 +268,7 @@ #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) #define ARM_UDIV(rd, rn, rm) (ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8) +#define ARM_SDIV(rd, rn, rm) (ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8) #define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \ | (rd_lo) << 12 | (rm) << 8 | rn) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 150d1c6543f7..7d4af64e3982 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -288,7 +288,7 @@ static bool is_lsi_offset(int offset, int scale) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { const struct bpf_prog *prog = ctx->prog; - const bool is_main_prog = prog->aux->func_idx == 0; + const bool is_main_prog = !bpf_is_subprog(prog); const u8 r6 = bpf2a64[BPF_REG_6]; const u8 r7 = bpf2a64[BPF_REG_7]; const u8 r8 = bpf2a64[BPF_REG_8]; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e507692e51e7..bf06b7283f0c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -556,7 +556,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt); jit->prologue_plt_ret = jit->prg; - if (fp->aux->func_idx == 0) { + if (!bpf_is_subprog(fp)) { /* Initialize the tail call counter in the main program. */ /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); @@ -670,15 +670,18 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) static int get_probe_mem_regno(const u8 *insn) { /* - * insn must point to llgc, llgh, llgf or lg, which have destination - * register at the same position. + * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have + * destination register at the same position. */ - if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */ + if (insn[0] != 0xe3) /* common prefix */ return -1; if (insn[5] != 0x90 && /* llgc */ insn[5] != 0x91 && /* llgh */ insn[5] != 0x16 && /* llgf */ - insn[5] != 0x04) /* lg */ + insn[5] != 0x04 && /* lg */ + insn[5] != 0x77 && /* lgb */ + insn[5] != 0x15 && /* lgh */ + insn[5] != 0x14) /* lgf */ return -1; return insn[1] >> 4; } @@ -776,6 +779,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i, bool extra_pass, u32 stack_depth) { struct bpf_insn *insn = &fp->insnsi[i]; + s16 branch_oc_off = insn->off; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; int last, insn_count = 1; @@ -788,22 +792,55 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int err; if (BPF_CLASS(insn->code) == BPF_LDX && - BPF_MODE(insn->code) == BPF_PROBE_MEM) + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) probe_prg = jit->prg; switch (insn->code) { /* * BPF_MOV */ - case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */ - /* llgfr %dst,%src */ - EMIT4(0xb9160000, dst_reg, src_reg); - if (insn_is_zext(&insn[1])) - insn_count = 2; + case BPF_ALU | BPF_MOV | BPF_X: + switch (insn->off) { + case 0: /* DST = (u32) SRC */ + /* llgfr %dst,%src */ + EMIT4(0xb9160000, dst_reg, src_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; + break; + case 8: /* DST = (u32)(s8) SRC */ + /* lbr %dst,%src */ + EMIT4(0xb9260000, dst_reg, src_reg); + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + case 16: /* DST = (u32)(s16) SRC */ + /* lhr %dst,%src */ + EMIT4(0xb9270000, dst_reg, src_reg); + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + } break; - case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ - /* lgr %dst,%src */ - EMIT4(0xb9040000, dst_reg, src_reg); + case BPF_ALU64 | BPF_MOV | BPF_X: + switch (insn->off) { + case 0: /* DST = SRC */ + /* lgr %dst,%src */ + EMIT4(0xb9040000, dst_reg, src_reg); + break; + case 8: /* DST = (s8) SRC */ + /* lgbr %dst,%src */ + EMIT4(0xb9060000, dst_reg, src_reg); + break; + case 16: /* DST = (s16) SRC */ + /* lghr %dst,%src */ + EMIT4(0xb9070000, dst_reg, src_reg); + break; + case 32: /* DST = (s32) SRC */ + /* lgfr %dst,%src */ + EMIT4(0xb9140000, dst_reg, src_reg); + break; + } break; case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */ /* llilf %dst,imm */ @@ -912,66 +949,115 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* * BPF_DIV / BPF_MOD */ - case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */ - case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; - /* lhi %w0,0 */ - EMIT4_IMM(0xa7080000, REG_W0, 0); - /* lr %w1,%dst */ - EMIT2(0x1800, REG_W1, dst_reg); - /* dlr %w0,%src */ - EMIT4(0xb9970000, REG_W0, src_reg); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) src */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dlr %w0,%src */ + EMIT4(0xb9970000, REG_W0, src_reg); + break; + case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */ + /* lgfr %r1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* dsgfr %r0,%src */ + EMIT4(0xb91d0000, REG_W0, src_reg); + break; + } /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); if (insn_is_zext(&insn[1])) insn_count = 2; break; } - case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */ + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; - /* lghi %w0,0 */ - EMIT4_IMM(0xa7090000, REG_W0, 0); - /* lgr %w1,%dst */ - EMIT4(0xb9040000, REG_W1, dst_reg); - /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, src_reg); + switch (off) { + case 0: /* dst = dst {/,%} src */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dlgr %w0,%src */ + EMIT4(0xb9870000, REG_W0, src_reg); + break; + case 1: /* dst = (s64) dst {/,%} (s64) src */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dsgr %w0,%src */ + EMIT4(0xb90d0000, REG_W0, src_reg); + break; + } /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); break; } - case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */ - case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; if (imm == 1) { if (BPF_OP(insn->code) == BPF_MOD) - /* lhgi %dst,0 */ + /* lghi %dst,0 */ EMIT4_IMM(0xa7090000, dst_reg, 0); else EMIT_ZERO(dst_reg); break; } - /* lhi %w0,0 */ - EMIT4_IMM(0xa7080000, REG_W0, 0); - /* lr %w1,%dst */ - EMIT2(0x1800, REG_W1, dst_reg); if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) { - /* dl %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, - EMIT_CONST_U32(imm)); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) imm */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dl %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, + REG_L, EMIT_CONST_U32(imm)); + break; + case 1: /* dst = (s32) dst {/,%} (s32) imm */ + /* lgfr %r1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* dsgf %r0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0, + REG_L, EMIT_CONST_U32(imm)); + break; + } } else { - /* lgfrl %dst,imm */ - EMIT6_PCREL_RILB(0xc40c0000, dst_reg, - _EMIT_CONST_U32(imm)); - jit->seen |= SEEN_LITERAL; - /* dlr %w0,%dst */ - EMIT4(0xb9970000, REG_W0, dst_reg); + switch (off) { + case 0: /* dst = (u32) dst {/,%} (u32) imm */ + /* xr %w0,%w0 */ + EMIT2(0x1700, REG_W0, REG_W0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* lrl %dst,imm */ + EMIT6_PCREL_RILB(0xc40d0000, dst_reg, + _EMIT_CONST_U32(imm)); + jit->seen |= SEEN_LITERAL; + /* dlr %w0,%dst */ + EMIT4(0xb9970000, REG_W0, dst_reg); + break; + case 1: /* dst = (s32) dst {/,%} (s32) imm */ + /* lgfr %w1,%dst */ + EMIT4(0xb9140000, REG_W1, dst_reg); + /* lgfrl %dst,imm */ + EMIT6_PCREL_RILB(0xc40c0000, dst_reg, + _EMIT_CONST_U32(imm)); + jit->seen |= SEEN_LITERAL; + /* dsgr %w0,%dst */ + EMIT4(0xb90d0000, REG_W0, dst_reg); + break; + } } /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); @@ -979,8 +1065,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; } - case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */ + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: { int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; @@ -990,21 +1076,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4_IMM(0xa7090000, dst_reg, 0); break; } - /* lghi %w0,0 */ - EMIT4_IMM(0xa7090000, REG_W0, 0); - /* lgr %w1,%dst */ - EMIT4(0xb9040000, REG_W1, dst_reg); if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { - /* dlg %w0,<d(imm)>(%l) */ - EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, - EMIT_CONST_U64(imm)); + switch (off) { + case 0: /* dst = dst {/,%} imm */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dlg %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, + REG_L, EMIT_CONST_U64(imm)); + break; + case 1: /* dst = (s64) dst {/,%} (s64) imm */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dsg %w0,<d(imm)>(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0, + REG_L, EMIT_CONST_U64(imm)); + break; + } } else { - /* lgrl %dst,imm */ - EMIT6_PCREL_RILB(0xc4080000, dst_reg, - _EMIT_CONST_U64(imm)); - jit->seen |= SEEN_LITERAL; - /* dlgr %w0,%dst */ - EMIT4(0xb9870000, REG_W0, dst_reg); + switch (off) { + case 0: /* dst = dst {/,%} imm */ + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* dlgr %w0,%dst */ + EMIT4(0xb9870000, REG_W0, dst_reg); + break; + case 1: /* dst = (s64) dst {/,%} (s64) imm */ + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* lgrl %dst,imm */ + EMIT6_PCREL_RILB(0xc4080000, dst_reg, + _EMIT_CONST_U64(imm)); + jit->seen |= SEEN_LITERAL; + /* dsgr %w0,%dst */ + EMIT4(0xb90d0000, REG_W0, dst_reg); + break; + } } /* lgr %dst,%rc */ EMIT4(0xb9040000, dst_reg, rc_reg); @@ -1217,6 +1332,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, } break; case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm) { case 16: /* dst = (u16) cpu_to_le16(dst) */ /* lrvr %dst,%dst */ @@ -1374,6 +1490,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + /* lgb %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* llgh %dst,0(off,%src) */ @@ -1382,6 +1504,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + /* lgh %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* llgf %dst,off(%src) */ @@ -1390,6 +1518,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (insn_is_zext(&insn[1])) insn_count = 2; break; + case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + /* lgf %dst,off(%src) */ + jit->seen |= SEEN_MEM; + EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off); + break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* lg %dst,0(off,%src) */ @@ -1570,6 +1704,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * instruction itself (loop) and for BPF with offset 0 we * branch to the instruction behind the branch. */ + case BPF_JMP32 | BPF_JA: /* if (true) */ + branch_oc_off = imm; + fallthrough; case BPF_JMP | BPF_JA: /* if (true) */ mask = 0xf000; /* j */ goto branch_oc; @@ -1738,14 +1875,16 @@ branch_xu: break; branch_oc: if (!is_first_pass(jit) && - can_use_rel(jit, addrs[i + off + 1])) { + can_use_rel(jit, addrs[i + branch_oc_off + 1])) { /* brc mask,off */ EMIT4_PCREL_RIC(0xa7040000, - mask >> 12, addrs[i + off + 1]); + mask >> 12, + addrs[i + branch_oc_off + 1]); } else { /* brcl mask,off */ EMIT6_PCREL_RILC(0xc0040000, - mask >> 12, addrs[i + off + 1]); + mask >> 12, + addrs[i + branch_oc_off + 1]); } break; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a5930042139d..8c10d9abc239 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -16,6 +16,9 @@ #include <asm/set_memory.h> #include <asm/nospec-branch.h> #include <asm/text-patching.h> +#include <asm/unwind.h> + +static bool all_callee_regs_used[4] = {true, true, true, true}; static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -255,6 +258,14 @@ struct jit_context { /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) +static void push_r12(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x54); /* push r12 */ + *pprog = prog; +} + static void push_callee_regs(u8 **pprog, bool *callee_regs_used) { u8 *prog = *pprog; @@ -270,6 +281,14 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used) *pprog = prog; } +static void pop_r12(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x5C); /* pop r12 */ + *pprog = prog; +} + static void pop_callee_regs(u8 **pprog, bool *callee_regs_used) { u8 *prog = *pprog; @@ -291,7 +310,8 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used) * while jumping to another program */ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, - bool tail_call_reachable, bool is_subprog) + bool tail_call_reachable, bool is_subprog, + bool is_exception_cb) { u8 *prog = *pprog; @@ -303,12 +323,30 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, prog += X86_PATCH_SIZE; if (!ebpf_from_cbpf) { if (tail_call_reachable && !is_subprog) + /* When it's the entry of the whole tailcall context, + * zeroing rax means initialising tail_call_cnt. + */ EMIT2(0x31, 0xC0); /* xor eax, eax */ else + /* Keep the same instruction layout. */ EMIT2(0x66, 0x90); /* nop2 */ } - EMIT1(0x55); /* push rbp */ - EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + /* Exception callback receives FP as third parameter */ + if (is_exception_cb) { + EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */ + EMIT3(0x48, 0x89, 0xD5); /* mov rbp, rdx */ + /* The main frame must have exception_boundary as true, so we + * first restore those callee-saved regs from stack, before + * reusing the stack frame. + */ + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + /* Reset the stack frame. */ + EMIT3(0x48, 0x89, 0xEC); /* mov rsp, rbp */ + } else { + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + } /* X86_TAIL_CALL_OFFSET is here */ EMIT_ENDBR(); @@ -467,7 +505,8 @@ static void emit_return(u8 **pprog, u8 *ip) * goto *(prog->bpf_func + prologue_size); * out: */ -static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, +static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, + u8 **pprog, bool *callee_regs_used, u32 stack_depth, u8 *ip, struct jit_context *ctx) { @@ -517,7 +556,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, offset = ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JE, offset); /* je out */ - pop_callee_regs(&prog, callee_regs_used); + if (bpf_prog->aux->exception_boundary) { + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + } else { + pop_callee_regs(&prog, callee_regs_used); + } EMIT1(0x58); /* pop rax */ if (stack_depth) @@ -541,7 +585,8 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, *pprog = prog; } -static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, +static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog, + struct bpf_jit_poke_descriptor *poke, u8 **pprog, u8 *ip, bool *callee_regs_used, u32 stack_depth, struct jit_context *ctx) @@ -570,7 +615,13 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, poke->tailcall_bypass); - pop_callee_regs(&prog, callee_regs_used); + if (bpf_prog->aux->exception_boundary) { + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + } else { + pop_callee_regs(&prog, callee_regs_used); + } + EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); @@ -1018,6 +1069,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) +/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ +#define RESTORE_TAIL_CALL_CNT(stack) \ + EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8) + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { @@ -1041,8 +1096,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_prologue(&prog, bpf_prog->aux->stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, - bpf_prog->aux->func_idx != 0); - push_callee_regs(&prog, callee_regs_used); + bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); + /* Exception callback will clobber callee regs for its own use, and + * restore the original callee regs from main prog's stack frame. + */ + if (bpf_prog->aux->exception_boundary) { + /* We also need to save r12, which is not mapped to any BPF + * register, as we throw after entry into the kernel, which may + * overwrite r12. + */ + push_r12(&prog); + push_callee_regs(&prog, all_callee_regs_used); + } else { + push_callee_regs(&prog, callee_regs_used); + } ilen = prog - temp; if (rw_image) @@ -1623,9 +1690,7 @@ st: if (is_imm8(insn->off)) func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { - /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ - EMIT3_off32(0x48, 0x8B, 0x85, - -round_up(bpf_prog->aux->stack_depth, 8) - 8); + RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); if (!imm32) return -EINVAL; offs = 7 + x86_call_depth_emit_accounting(&prog, func); @@ -1641,13 +1706,15 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_TAIL_CALL: if (imm32) - emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], + emit_bpf_tail_call_direct(bpf_prog, + &bpf_prog->aux->poke_tab[imm32 - 1], &prog, image + addrs[i - 1], callee_regs_used, bpf_prog->aux->stack_depth, ctx); else - emit_bpf_tail_call_indirect(&prog, + emit_bpf_tail_call_indirect(bpf_prog, + &prog, callee_regs_used, bpf_prog->aux->stack_depth, image + addrs[i - 1], @@ -1900,7 +1967,12 @@ emit_jmp: seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; - pop_callee_regs(&prog, callee_regs_used); + if (bpf_prog->aux->exception_boundary) { + pop_callee_regs(&prog, all_callee_regs_used); + pop_r12(&prog); + } else { + pop_callee_regs(&prog, callee_regs_used); + } EMIT1(0xC9); /* leave */ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; @@ -2400,6 +2472,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * [ ... ] * [ stack_arg2 ] * RBP - arg_stack_off [ stack_arg1 ] + * RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX */ /* room for return value of orig_call or fentry prog */ @@ -2464,6 +2537,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i else /* sub rsp, stack_size */ EMIT4(0x48, 0x83, 0xEC, stack_size); + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + EMIT1(0x50); /* push rax */ /* mov QWORD PTR [rbp - rbx_off], rbx */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off); @@ -2516,9 +2591,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i restore_regs(m, &prog, regs_off); save_args(m, &prog, arg_stack_off, true); + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + /* Before calling the original function, restore the + * tail_call_cnt from stack to rax. + */ + RESTORE_TAIL_CALL_CNT(stack_size); + if (flags & BPF_TRAMP_F_ORIG_STACK) { - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); - EMIT2(0xff, 0xd0); /* call *rax */ + emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8); + EMIT2(0xff, 0xd3); /* call *rbx */ } else { /* call original function */ if (emit_rsb_call(&prog, orig_call, prog)) { @@ -2569,7 +2650,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ret = -EINVAL; goto cleanup; } - } + } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + /* Before running the original function, restore the + * tail_call_cnt from stack to rax. + */ + RESTORE_TAIL_CALL_CNT(stack_size); + /* restore return value of orig_call or fentry prog back into RAX */ if (save_ret) emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); @@ -2913,3 +2999,29 @@ void bpf_jit_free(struct bpf_prog *prog) bpf_prog_unlock_free(prog); } + +bool bpf_jit_supports_exceptions(void) +{ + /* We unwind through both kernel frames (starting from within bpf_throw + * call) and BPF frames. Therefore we require ORC unwinder to be enabled + * to walk kernel frames and reach BPF frames in the stack trace. + */ + return IS_ENABLED(CONFIG_UNWINDER_ORC); +} + +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) +{ +#if defined(CONFIG_UNWINDER_ORC) + struct unwind_state state; + unsigned long addr; + + for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); + unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp)) + break; + } + return; +#endif + WARN(1, "verification of programs using bpf_throw should have failed\n"); +} diff --git a/drivers/Kconfig b/drivers/Kconfig index efb66e25fa2d..8ba3e8b9ad72 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -243,4 +243,6 @@ source "drivers/hte/Kconfig" source "drivers/cdx/Kconfig" +source "drivers/dpll/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 1bec7819a837..722d15be0eb7 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -197,5 +197,6 @@ obj-$(CONFIG_PECI) += peci/ obj-$(CONFIG_HTE) += hte/ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ +obj-$(CONFIG_DPLL) += dpll/ obj-$(CONFIG_S390) += s390/ diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig new file mode 100644 index 000000000000..a4cae73f20d3 --- /dev/null +++ b/drivers/dpll/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Generic DPLL drivers configuration +# + +config DPLL + bool diff --git a/drivers/dpll/Makefile b/drivers/dpll/Makefile new file mode 100644 index 000000000000..2e5b27850110 --- /dev/null +++ b/drivers/dpll/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for DPLL drivers. +# + +obj-$(CONFIG_DPLL) += dpll.o +dpll-y += dpll_core.o +dpll-y += dpll_netlink.o +dpll-y += dpll_nl.o diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c new file mode 100644 index 000000000000..3568149b9562 --- /dev/null +++ b/drivers/dpll/dpll_core.c @@ -0,0 +1,798 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dpll_core.c - DPLL subsystem kernel-space interface implementation. + * + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel Corporation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/device.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/string.h> + +#include "dpll_core.h" +#include "dpll_netlink.h" + +/* Mutex lock to protect DPLL subsystem devices and pins */ +DEFINE_MUTEX(dpll_lock); + +DEFINE_XARRAY_FLAGS(dpll_device_xa, XA_FLAGS_ALLOC); +DEFINE_XARRAY_FLAGS(dpll_pin_xa, XA_FLAGS_ALLOC); + +static u32 dpll_xa_id; + +#define ASSERT_DPLL_REGISTERED(d) \ + WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED)) +#define ASSERT_DPLL_NOT_REGISTERED(d) \ + WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED)) +#define ASSERT_PIN_REGISTERED(p) \ + WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED)) + +struct dpll_device_registration { + struct list_head list; + const struct dpll_device_ops *ops; + void *priv; +}; + +struct dpll_pin_registration { + struct list_head list; + const struct dpll_pin_ops *ops; + void *priv; +}; + +struct dpll_device *dpll_device_get_by_id(int id) +{ + if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED)) + return xa_load(&dpll_device_xa, id); + + return NULL; +} + +static struct dpll_pin_registration * +dpll_pin_registration_find(struct dpll_pin_ref *ref, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + + list_for_each_entry(reg, &ref->registration_list, list) { + if (reg->ops == ops && reg->priv == priv) + return reg; + } + return NULL; +} + +static int +dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + bool ref_exists = false; + unsigned long i; + int ret; + + xa_for_each(xa_pins, i, ref) { + if (ref->pin != pin) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (reg) { + refcount_inc(&ref->refcount); + return 0; + } + ref_exists = true; + break; + } + + if (!ref_exists) { + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + return -ENOMEM; + ref->pin = pin; + INIT_LIST_HEAD(&ref->registration_list); + ret = xa_insert(xa_pins, pin->pin_idx, ref, GFP_KERNEL); + if (ret) { + kfree(ref); + return ret; + } + refcount_set(&ref->refcount, 1); + } + + reg = kzalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) { + if (!ref_exists) { + xa_erase(xa_pins, pin->pin_idx); + kfree(ref); + } + return -ENOMEM; + } + reg->ops = ops; + reg->priv = priv; + if (ref_exists) + refcount_inc(&ref->refcount); + list_add_tail(®->list, &ref->registration_list); + + return 0; +} + +static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + unsigned long i; + + xa_for_each(xa_pins, i, ref) { + if (ref->pin != pin) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (WARN_ON(!reg)) + return -EINVAL; + if (refcount_dec_and_test(&ref->refcount)) { + list_del(®->list); + kfree(reg); + xa_erase(xa_pins, i); + WARN_ON(!list_empty(&ref->registration_list)); + kfree(ref); + } + return 0; + } + + return -EINVAL; +} + +static int +dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + bool ref_exists = false; + unsigned long i; + int ret; + + xa_for_each(xa_dplls, i, ref) { + if (ref->dpll != dpll) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (reg) { + refcount_inc(&ref->refcount); + return 0; + } + ref_exists = true; + break; + } + + if (!ref_exists) { + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + return -ENOMEM; + ref->dpll = dpll; + INIT_LIST_HEAD(&ref->registration_list); + ret = xa_insert(xa_dplls, dpll->id, ref, GFP_KERNEL); + if (ret) { + kfree(ref); + return ret; + } + refcount_set(&ref->refcount, 1); + } + + reg = kzalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) { + if (!ref_exists) { + xa_erase(xa_dplls, dpll->id); + kfree(ref); + } + return -ENOMEM; + } + reg->ops = ops; + reg->priv = priv; + if (ref_exists) + refcount_inc(&ref->refcount); + list_add_tail(®->list, &ref->registration_list); + + return 0; +} + +static void +dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + unsigned long i; + + xa_for_each(xa_dplls, i, ref) { + if (ref->dpll != dpll) + continue; + reg = dpll_pin_registration_find(ref, ops, priv); + if (WARN_ON(!reg)) + return; + if (refcount_dec_and_test(&ref->refcount)) { + list_del(®->list); + kfree(reg); + xa_erase(xa_dplls, i); + WARN_ON(!list_empty(&ref->registration_list)); + kfree(ref); + } + return; + } +} + +struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs) +{ + struct dpll_pin_ref *ref; + unsigned long i = 0; + + ref = xa_find(xa_refs, &i, ULONG_MAX, XA_PRESENT); + WARN_ON(!ref); + return ref; +} + +static struct dpll_device * +dpll_device_alloc(const u64 clock_id, u32 device_idx, struct module *module) +{ + struct dpll_device *dpll; + int ret; + + dpll = kzalloc(sizeof(*dpll), GFP_KERNEL); + if (!dpll) + return ERR_PTR(-ENOMEM); + refcount_set(&dpll->refcount, 1); + INIT_LIST_HEAD(&dpll->registration_list); + dpll->device_idx = device_idx; + dpll->clock_id = clock_id; + dpll->module = module; + ret = xa_alloc_cyclic(&dpll_device_xa, &dpll->id, dpll, xa_limit_32b, + &dpll_xa_id, GFP_KERNEL); + if (ret < 0) { + kfree(dpll); + return ERR_PTR(ret); + } + xa_init_flags(&dpll->pin_refs, XA_FLAGS_ALLOC); + + return dpll; +} + +/** + * dpll_device_get - find existing or create new dpll device + * @clock_id: clock_id of creator + * @device_idx: idx given by device driver + * @module: reference to registering module + * + * Get existing object of a dpll device, unique for given arguments. + * Create new if doesn't exist yet. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * valid dpll_device struct pointer if succeeded + * * ERR_PTR(X) - error + */ +struct dpll_device * +dpll_device_get(u64 clock_id, u32 device_idx, struct module *module) +{ + struct dpll_device *dpll, *ret = NULL; + unsigned long index; + + mutex_lock(&dpll_lock); + xa_for_each(&dpll_device_xa, index, dpll) { + if (dpll->clock_id == clock_id && + dpll->device_idx == device_idx && + dpll->module == module) { + ret = dpll; + refcount_inc(&ret->refcount); + break; + } + } + if (!ret) + ret = dpll_device_alloc(clock_id, device_idx, module); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_device_get); + +/** + * dpll_device_put - decrease the refcount and free memory if possible + * @dpll: dpll_device struct pointer + * + * Context: Acquires a lock (dpll_lock) + * Drop reference for a dpll device, if all references are gone, delete + * dpll device object. + */ +void dpll_device_put(struct dpll_device *dpll) +{ + mutex_lock(&dpll_lock); + if (refcount_dec_and_test(&dpll->refcount)) { + ASSERT_DPLL_NOT_REGISTERED(dpll); + WARN_ON_ONCE(!xa_empty(&dpll->pin_refs)); + xa_destroy(&dpll->pin_refs); + xa_erase(&dpll_device_xa, dpll->id); + WARN_ON(!list_empty(&dpll->registration_list)); + kfree(dpll); + } + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_device_put); + +static struct dpll_device_registration * +dpll_device_registration_find(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv) +{ + struct dpll_device_registration *reg; + + list_for_each_entry(reg, &dpll->registration_list, list) { + if (reg->ops == ops && reg->priv == priv) + return reg; + } + return NULL; +} + +/** + * dpll_device_register - register the dpll device in the subsystem + * @dpll: pointer to a dpll + * @type: type of a dpll + * @ops: ops for a dpll device + * @priv: pointer to private information of owner + * + * Make dpll device available for user space. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * 0 on success + * * negative - error value + */ +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv) +{ + struct dpll_device_registration *reg; + bool first_registration = false; + + if (WARN_ON(!ops)) + return -EINVAL; + if (WARN_ON(!ops->mode_get)) + return -EINVAL; + if (WARN_ON(!ops->lock_status_get)) + return -EINVAL; + if (WARN_ON(type < DPLL_TYPE_PPS || type > DPLL_TYPE_MAX)) + return -EINVAL; + + mutex_lock(&dpll_lock); + reg = dpll_device_registration_find(dpll, ops, priv); + if (reg) { + mutex_unlock(&dpll_lock); + return -EEXIST; + } + + reg = kzalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) { + mutex_unlock(&dpll_lock); + return -ENOMEM; + } + reg->ops = ops; + reg->priv = priv; + dpll->type = type; + first_registration = list_empty(&dpll->registration_list); + list_add_tail(®->list, &dpll->registration_list); + if (!first_registration) { + mutex_unlock(&dpll_lock); + return 0; + } + + xa_set_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED); + dpll_device_create_ntf(dpll); + mutex_unlock(&dpll_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(dpll_device_register); + +/** + * dpll_device_unregister - unregister dpll device + * @dpll: registered dpll pointer + * @ops: ops for a dpll device + * @priv: pointer to private information of owner + * + * Unregister device, make it unavailable for userspace. + * Note: It does not free the memory + * Context: Acquires a lock (dpll_lock) + */ +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv) +{ + struct dpll_device_registration *reg; + + mutex_lock(&dpll_lock); + ASSERT_DPLL_REGISTERED(dpll); + dpll_device_delete_ntf(dpll); + reg = dpll_device_registration_find(dpll, ops, priv); + if (WARN_ON(!reg)) { + mutex_unlock(&dpll_lock); + return; + } + list_del(®->list); + kfree(reg); + + if (!list_empty(&dpll->registration_list)) { + mutex_unlock(&dpll_lock); + return; + } + xa_clear_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED); + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_device_unregister); + +static struct dpll_pin * +dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module, + const struct dpll_pin_properties *prop) +{ + struct dpll_pin *pin; + int ret; + + pin = kzalloc(sizeof(*pin), GFP_KERNEL); + if (!pin) + return ERR_PTR(-ENOMEM); + pin->pin_idx = pin_idx; + pin->clock_id = clock_id; + pin->module = module; + if (WARN_ON(prop->type < DPLL_PIN_TYPE_MUX || + prop->type > DPLL_PIN_TYPE_MAX)) { + ret = -EINVAL; + goto err; + } + pin->prop = prop; + refcount_set(&pin->refcount, 1); + xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC); + xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC); + ret = xa_alloc(&dpll_pin_xa, &pin->id, pin, xa_limit_16b, GFP_KERNEL); + if (ret) + goto err; + return pin; +err: + xa_destroy(&pin->dpll_refs); + xa_destroy(&pin->parent_refs); + kfree(pin); + return ERR_PTR(ret); +} + +/** + * dpll_pin_get - find existing or create new dpll pin + * @clock_id: clock_id of creator + * @pin_idx: idx given by dev driver + * @module: reference to registering module + * @prop: dpll pin properties + * + * Get existing object of a pin (unique for given arguments) or create new + * if doesn't exist yet. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * valid allocated dpll_pin struct pointer if succeeded + * * ERR_PTR(X) - error + */ +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 pin_idx, struct module *module, + const struct dpll_pin_properties *prop) +{ + struct dpll_pin *pos, *ret = NULL; + unsigned long i; + + mutex_lock(&dpll_lock); + xa_for_each(&dpll_pin_xa, i, pos) { + if (pos->clock_id == clock_id && + pos->pin_idx == pin_idx && + pos->module == module) { + ret = pos; + refcount_inc(&ret->refcount); + break; + } + } + if (!ret) + ret = dpll_pin_alloc(clock_id, pin_idx, module, prop); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_get); + +/** + * dpll_pin_put - decrease the refcount and free memory if possible + * @pin: pointer to a pin to be put + * + * Drop reference for a pin, if all references are gone, delete pin object. + * + * Context: Acquires a lock (dpll_lock) + */ +void dpll_pin_put(struct dpll_pin *pin) +{ + mutex_lock(&dpll_lock); + if (refcount_dec_and_test(&pin->refcount)) { + xa_destroy(&pin->dpll_refs); + xa_destroy(&pin->parent_refs); + xa_erase(&dpll_pin_xa, pin->id); + kfree(pin); + } + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_pin_put); + +static int +__dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + int ret; + + ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv); + if (ret) + return ret; + ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv); + if (ret) + goto ref_pin_del; + xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); + dpll_pin_create_ntf(pin); + + return ret; + +ref_pin_del: + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); + return ret; +} + +/** + * dpll_pin_register - register the dpll pin in the subsystem + * @dpll: pointer to a dpll + * @pin: pointer to a dpll pin + * @ops: ops for a dpll pin ops + * @priv: pointer to private information of owner + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * 0 on success + * * negative - error value + */ +int +dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + int ret; + + if (WARN_ON(!ops) || + WARN_ON(!ops->state_on_dpll_get) || + WARN_ON(!ops->direction_get)) + return -EINVAL; + if (ASSERT_DPLL_REGISTERED(dpll)) + return -EINVAL; + + mutex_lock(&dpll_lock); + if (WARN_ON(!(dpll->module == pin->module && + dpll->clock_id == pin->clock_id))) + ret = -EINVAL; + else + ret = __dpll_pin_register(dpll, pin, ops, priv); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_register); + +static void +__dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); + dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv); + if (xa_empty(&pin->dpll_refs)) + xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); +} + +/** + * dpll_pin_unregister - unregister dpll pin from dpll device + * @dpll: registered dpll pointer + * @pin: pointer to a pin + * @ops: ops for a dpll pin + * @priv: pointer to private information of owner + * + * Note: It does not free the memory + * Context: Acquires a lock (dpll_lock) + */ +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + if (WARN_ON(xa_empty(&dpll->pin_refs))) + return; + if (WARN_ON(!xa_empty(&pin->parent_refs))) + return; + + mutex_lock(&dpll_lock); + dpll_pin_delete_ntf(pin); + __dpll_pin_unregister(dpll, pin, ops, priv); + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_pin_unregister); + +/** + * dpll_pin_on_pin_register - register a pin with a parent pin + * @parent: pointer to a parent pin + * @pin: pointer to a pin + * @ops: ops for a dpll pin + * @priv: pointer to private information of owner + * + * Register a pin with a parent pin, create references between them and + * between newly registered pin and dplls connected with a parent pin. + * + * Context: Acquires a lock (dpll_lock) + * Return: + * * 0 on success + * * negative - error value + */ +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_ref *ref; + unsigned long i, stop; + int ret; + + if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX)) + return -EINVAL; + + if (WARN_ON(!ops) || + WARN_ON(!ops->state_on_pin_get) || + WARN_ON(!ops->direction_get)) + return -EINVAL; + if (ASSERT_PIN_REGISTERED(parent)) + return -EINVAL; + + mutex_lock(&dpll_lock); + ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv); + if (ret) + goto unlock; + refcount_inc(&pin->refcount); + xa_for_each(&parent->dpll_refs, i, ref) { + ret = __dpll_pin_register(ref->dpll, pin, ops, priv); + if (ret) { + stop = i; + goto dpll_unregister; + } + dpll_pin_create_ntf(pin); + } + mutex_unlock(&dpll_lock); + + return ret; + +dpll_unregister: + xa_for_each(&parent->dpll_refs, i, ref) + if (i < stop) { + __dpll_pin_unregister(ref->dpll, pin, ops, priv); + dpll_pin_delete_ntf(pin); + } + refcount_dec(&pin->refcount); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); +unlock: + mutex_unlock(&dpll_lock); + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_on_pin_register); + +/** + * dpll_pin_on_pin_unregister - unregister dpll pin from a parent pin + * @parent: pointer to a parent pin + * @pin: pointer to a pin + * @ops: ops for a dpll pin + * @priv: pointer to private information of owner + * + * Context: Acquires a lock (dpll_lock) + * Note: It does not free the memory + */ +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv) +{ + struct dpll_pin_ref *ref; + unsigned long i; + + mutex_lock(&dpll_lock); + dpll_pin_delete_ntf(pin); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); + refcount_dec(&pin->refcount); + xa_for_each(&pin->dpll_refs, i, ref) + __dpll_pin_unregister(ref->dpll, pin, ops, priv); + mutex_unlock(&dpll_lock); +} +EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister); + +static struct dpll_device_registration * +dpll_device_registration_first(struct dpll_device *dpll) +{ + struct dpll_device_registration *reg; + + reg = list_first_entry_or_null((struct list_head *)&dpll->registration_list, + struct dpll_device_registration, list); + WARN_ON(!reg); + return reg; +} + +void *dpll_priv(struct dpll_device *dpll) +{ + struct dpll_device_registration *reg; + + reg = dpll_device_registration_first(dpll); + return reg->priv; +} + +const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll) +{ + struct dpll_device_registration *reg; + + reg = dpll_device_registration_first(dpll); + return reg->ops; +} + +static struct dpll_pin_registration * +dpll_pin_registration_first(struct dpll_pin_ref *ref) +{ + struct dpll_pin_registration *reg; + + reg = list_first_entry_or_null(&ref->registration_list, + struct dpll_pin_registration, list); + WARN_ON(!reg); + return reg; +} + +void *dpll_pin_on_dpll_priv(struct dpll_device *dpll, + struct dpll_pin *pin) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + + ref = xa_load(&dpll->pin_refs, pin->pin_idx); + if (!ref) + return NULL; + reg = dpll_pin_registration_first(ref); + return reg->priv; +} + +void *dpll_pin_on_pin_priv(struct dpll_pin *parent, + struct dpll_pin *pin) +{ + struct dpll_pin_registration *reg; + struct dpll_pin_ref *ref; + + ref = xa_load(&pin->parent_refs, parent->pin_idx); + if (!ref) + return NULL; + reg = dpll_pin_registration_first(ref); + return reg->priv; +} + +const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref) +{ + struct dpll_pin_registration *reg; + + reg = dpll_pin_registration_first(ref); + return reg->ops; +} + +static int __init dpll_init(void) +{ + int ret; + + ret = genl_register_family(&dpll_nl_family); + if (ret) + goto error; + + return 0; + +error: + mutex_destroy(&dpll_lock); + return ret; +} + +static void __exit dpll_exit(void) +{ + genl_unregister_family(&dpll_nl_family); + mutex_destroy(&dpll_lock); +} + +subsys_initcall(dpll_init); +module_exit(dpll_exit); diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h new file mode 100644 index 000000000000..5585873c5c1b --- /dev/null +++ b/drivers/dpll/dpll_core.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_CORE_H__ +#define __DPLL_CORE_H__ + +#include <linux/dpll.h> +#include <linux/list.h> +#include <linux/refcount.h> +#include "dpll_nl.h" + +#define DPLL_REGISTERED XA_MARK_1 + +/** + * struct dpll_device - stores DPLL device internal data + * @id: unique id number for device given by dpll subsystem + * @device_idx: id given by dev driver + * @clock_id: unique identifier (clock_id) of a dpll + * @module: module of creator + * @type: type of a dpll + * @pin_refs: stores pins registered within a dpll + * @refcount: refcount + * @registration_list: list of registered ops and priv data of dpll owners + **/ +struct dpll_device { + u32 id; + u32 device_idx; + u64 clock_id; + struct module *module; + enum dpll_type type; + struct xarray pin_refs; + refcount_t refcount; + struct list_head registration_list; +}; + +/** + * struct dpll_pin - structure for a dpll pin + * @id: unique id number for pin given by dpll subsystem + * @pin_idx: index of a pin given by dev driver + * @clock_id: clock_id of creator + * @module: module of creator + * @dpll_refs: hold referencees to dplls pin was registered with + * @parent_refs: hold references to parent pins pin was registered with + * @prop: pointer to pin properties given by registerer + * @rclk_dev_name: holds name of device when pin can recover clock from it + * @refcount: refcount + **/ +struct dpll_pin { + u32 id; + u32 pin_idx; + u64 clock_id; + struct module *module; + struct xarray dpll_refs; + struct xarray parent_refs; + const struct dpll_pin_properties *prop; + refcount_t refcount; +}; + +/** + * struct dpll_pin_ref - structure for referencing either dpll or pins + * @dpll: pointer to a dpll + * @pin: pointer to a pin + * @registration_list: list of ops and priv data registered with the ref + * @refcount: refcount + **/ +struct dpll_pin_ref { + union { + struct dpll_device *dpll; + struct dpll_pin *pin; + }; + struct list_head registration_list; + refcount_t refcount; +}; + +void *dpll_priv(struct dpll_device *dpll); +void *dpll_pin_on_dpll_priv(struct dpll_device *dpll, struct dpll_pin *pin); +void *dpll_pin_on_pin_priv(struct dpll_pin *parent, struct dpll_pin *pin); + +const struct dpll_device_ops *dpll_device_ops(struct dpll_device *dpll); +struct dpll_device *dpll_device_get_by_id(int id); +const struct dpll_pin_ops *dpll_pin_ops(struct dpll_pin_ref *ref); +struct dpll_pin_ref *dpll_xa_ref_dpll_first(struct xarray *xa_refs); +extern struct xarray dpll_device_xa; +extern struct xarray dpll_pin_xa; +extern struct mutex dpll_lock; +#endif diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c new file mode 100644 index 000000000000..e20daba6896a --- /dev/null +++ b/drivers/dpll/dpll_netlink.c @@ -0,0 +1,1253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic netlink for DPLL management framework + * + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + * + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <net/genetlink.h> +#include "dpll_core.h" +#include "dpll_netlink.h" +#include "dpll_nl.h" +#include <uapi/linux/dpll.h> + +#define ASSERT_NOT_NULL(ptr) (WARN_ON(!ptr)) + +#define xa_for_each_marked_start(xa, index, entry, filter, start) \ + for (index = start, entry = xa_find(xa, &index, ULONG_MAX, filter); \ + entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter)) + +struct dpll_dump_ctx { + unsigned long idx; +}; + +static struct dpll_dump_ctx *dpll_dump_context(struct netlink_callback *cb) +{ + return (struct dpll_dump_ctx *)cb->ctx; +} + +static int +dpll_msg_add_dev_handle(struct sk_buff *msg, struct dpll_device *dpll) +{ + if (nla_put_u32(msg, DPLL_A_ID, dpll->id)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) +{ + if (nla_put_u32(msg, DPLL_A_PIN_PARENT_ID, id)) + return -EMSGSIZE; + + return 0; +} + +/** + * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin + * @pin: pin pointer + * + * Return: byte size of pin handle attribute for given pin. + */ +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ +} +EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); + +/** + * dpll_msg_add_pin_handle - attach pin handle attribute to a given message + * @msg: pointer to sk_buff message to attach a pin handle + * @pin: pin pointer + * + * Return: + * * 0 - success + * * -EMSGSIZE - no space in message to attach pin handle + */ +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +{ + if (!pin) + return 0; + if (nla_put_u32(msg, DPLL_A_PIN_ID, pin->id)) + return -EMSGSIZE; + return 0; +} +EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle); + +static int +dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_mode mode; + int ret; + + ret = ops->mode_get(dpll, dpll_priv(dpll), &mode, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_MODE, mode)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_mode_supported(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_mode mode; + + if (!ops->mode_supported) + return 0; + for (mode = DPLL_MODE_MANUAL; mode <= DPLL_MODE_MAX; mode++) + if (ops->mode_supported(dpll, dpll_priv(dpll), mode, extack)) + if (nla_put_u32(msg, DPLL_A_MODE_SUPPORTED, mode)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_lock_status(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + enum dpll_lock_status status; + int ret; + + ret = ops->lock_status_get(dpll, dpll_priv(dpll), &status, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_LOCK_STATUS, status)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_temp(struct sk_buff *msg, struct dpll_device *dpll, + struct netlink_ext_ack *extack) +{ + const struct dpll_device_ops *ops = dpll_device_ops(dpll); + s32 temp; + int ret; + + if (!ops->temp_get) + return 0; + ret = ops->temp_get(dpll, dpll_priv(dpll), &temp, extack); + if (ret) + return ret; + if (nla_put_s32(msg, DPLL_A_TEMP, temp)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_prio(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + u32 prio; + int ret; + + if (!ops->prio_get) + return 0; + ret = ops->prio_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), &prio, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_PIN_PRIO, prio)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_on_dpll_state(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + enum dpll_pin_state state; + int ret; + + if (!ops->state_on_dpll_get) + return 0; + ret = ops->state_on_dpll_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &state, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_PIN_STATE, state)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_direction(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + enum dpll_pin_direction direction; + int ret; + + ret = ops->direction_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), &direction, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_PIN_DIRECTION, direction)) + return -EMSGSIZE; + + return 0; +} + +static int +dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + struct nlattr *nest; + int fs, ret; + u64 freq; + + if (!ops->frequency_get) + return 0; + ret = ops->frequency_get(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), &freq, extack); + if (ret) + return ret; + if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq, + DPLL_A_PIN_PAD)) + return -EMSGSIZE; + for (fs = 0; fs < pin->prop->freq_supported_num; fs++) { + nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED); + if (!nest) + return -EMSGSIZE; + freq = pin->prop->freq_supported[fs].min; + if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq), + &freq, DPLL_A_PIN_PAD)) { + nla_nest_cancel(msg, nest); + return -EMSGSIZE; + } + freq = pin->prop->freq_supported[fs].max; + if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq), + &freq, DPLL_A_PIN_PAD)) { + nla_nest_cancel(msg, nest); + return -EMSGSIZE; + } + nla_nest_end(msg, nest); + } + + return 0; +} + +static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq) +{ + int fs; + + for (fs = 0; fs < pin->prop->freq_supported_num; fs++) + if (freq >= pin->prop->freq_supported[fs].min && + freq <= pin->prop->freq_supported[fs].max) + return true; + return false; +} + +static int +dpll_msg_add_pin_parents(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *dpll_ref, + struct netlink_ext_ack *extack) +{ + enum dpll_pin_state state; + struct dpll_pin_ref *ref; + struct dpll_pin *ppin; + struct nlattr *nest; + unsigned long index; + int ret; + + xa_for_each(&pin->parent_refs, index, ref) { + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + void *parent_priv; + + ppin = ref->pin; + parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, ppin); + ret = ops->state_on_pin_get(pin, + dpll_pin_on_pin_priv(ppin, pin), + ppin, parent_priv, &state, extack); + if (ret) + return ret; + nest = nla_nest_start(msg, DPLL_A_PIN_PARENT_PIN); + if (!nest) + return -EMSGSIZE; + ret = dpll_msg_add_dev_parent_handle(msg, ppin->id); + if (ret) + goto nest_cancel; + if (nla_put_u32(msg, DPLL_A_PIN_STATE, state)) { + ret = -EMSGSIZE; + goto nest_cancel; + } + nla_nest_end(msg, nest); + } + + return 0; + +nest_cancel: + nla_nest_cancel(msg, nest); + return ret; +} + +static int +dpll_msg_add_pin_dplls(struct sk_buff *msg, struct dpll_pin *pin, + struct netlink_ext_ack *extack) +{ + struct dpll_pin_ref *ref; + struct nlattr *attr; + unsigned long index; + int ret; + + xa_for_each(&pin->dpll_refs, index, ref) { + attr = nla_nest_start(msg, DPLL_A_PIN_PARENT_DEVICE); + if (!attr) + return -EMSGSIZE; + ret = dpll_msg_add_dev_parent_handle(msg, ref->dpll->id); + if (ret) + goto nest_cancel; + ret = dpll_msg_add_pin_on_dpll_state(msg, pin, ref, extack); + if (ret) + goto nest_cancel; + ret = dpll_msg_add_pin_prio(msg, pin, ref, extack); + if (ret) + goto nest_cancel; + ret = dpll_msg_add_pin_direction(msg, pin, ref, extack); + if (ret) + goto nest_cancel; + nla_nest_end(msg, attr); + } + + return 0; + +nest_cancel: + nla_nest_end(msg, attr); + return ret; +} + +static int +dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_properties *prop = pin->prop; + struct dpll_pin_ref *ref; + int ret; + + ref = dpll_xa_ref_dpll_first(&pin->dpll_refs); + ASSERT_NOT_NULL(ref); + + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) + return ret; + if (nla_put_string(msg, DPLL_A_PIN_MODULE_NAME, + module_name(pin->module))) + return -EMSGSIZE; + if (nla_put_64bit(msg, DPLL_A_PIN_CLOCK_ID, sizeof(pin->clock_id), + &pin->clock_id, DPLL_A_PIN_PAD)) + return -EMSGSIZE; + if (prop->board_label && + nla_put_string(msg, DPLL_A_PIN_BOARD_LABEL, prop->board_label)) + return -EMSGSIZE; + if (prop->panel_label && + nla_put_string(msg, DPLL_A_PIN_PANEL_LABEL, prop->panel_label)) + return -EMSGSIZE; + if (prop->package_label && + nla_put_string(msg, DPLL_A_PIN_PACKAGE_LABEL, + prop->package_label)) + return -EMSGSIZE; + if (nla_put_u32(msg, DPLL_A_PIN_TYPE, prop->type)) + return -EMSGSIZE; + if (nla_put_u32(msg, DPLL_A_PIN_CAPABILITIES, prop->capabilities)) + return -EMSGSIZE; + ret = dpll_msg_add_pin_freq(msg, pin, ref, extack); + if (ret) + return ret; + if (xa_empty(&pin->parent_refs)) + ret = dpll_msg_add_pin_dplls(msg, pin, extack); + else + ret = dpll_msg_add_pin_parents(msg, pin, ref, extack); + + return ret; +} + +static int +dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg, + struct netlink_ext_ack *extack) +{ + int ret; + + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) + return ret; + if (nla_put_string(msg, DPLL_A_MODULE_NAME, module_name(dpll->module))) + return -EMSGSIZE; + if (nla_put_64bit(msg, DPLL_A_CLOCK_ID, sizeof(dpll->clock_id), + &dpll->clock_id, DPLL_A_PAD)) + return -EMSGSIZE; + ret = dpll_msg_add_temp(msg, dpll, extack); + if (ret) + return ret; + ret = dpll_msg_add_lock_status(msg, dpll, extack); + if (ret) + return ret; + ret = dpll_msg_add_mode(msg, dpll, extack); + if (ret) + return ret; + ret = dpll_msg_add_mode_supported(msg, dpll, extack); + if (ret) + return ret; + if (nla_put_u32(msg, DPLL_A_TYPE, dpll->type)) + return -EMSGSIZE; + + return ret; +} + +static int +dpll_device_event_send(enum dpll_cmd event, struct dpll_device *dpll) +{ + struct sk_buff *msg; + int ret = -ENOMEM; + void *hdr; + + if (WARN_ON(!xa_get_mark(&dpll_device_xa, dpll->id, DPLL_REGISTERED))) + return -ENODEV; + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event); + if (!hdr) + goto err_free_msg; + ret = dpll_device_get_one(dpll, msg, NULL); + if (ret) + goto err_cancel_msg; + genlmsg_end(msg, hdr); + genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); +err_free_msg: + nlmsg_free(msg); + + return ret; +} + +int dpll_device_create_ntf(struct dpll_device *dpll) +{ + return dpll_device_event_send(DPLL_CMD_DEVICE_CREATE_NTF, dpll); +} + +int dpll_device_delete_ntf(struct dpll_device *dpll) +{ + return dpll_device_event_send(DPLL_CMD_DEVICE_DELETE_NTF, dpll); +} + +static int +__dpll_device_change_ntf(struct dpll_device *dpll) +{ + return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll); +} + +/** + * dpll_device_change_ntf - notify that the dpll device has been changed + * @dpll: registered dpll pointer + * + * Context: acquires and holds a dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ +int dpll_device_change_ntf(struct dpll_device *dpll) +{ + int ret; + + mutex_lock(&dpll_lock); + ret = __dpll_device_change_ntf(dpll); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_device_change_ntf); + +static int +dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin) +{ + struct sk_buff *msg; + int ret = -ENOMEM; + void *hdr; + + if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED))) + return -ENODEV; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &dpll_nl_family, 0, event); + if (!hdr) + goto err_free_msg; + ret = dpll_cmd_pin_get_one(msg, pin, NULL); + if (ret) + goto err_cancel_msg; + genlmsg_end(msg, hdr); + genlmsg_multicast(&dpll_nl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); +err_free_msg: + nlmsg_free(msg); + + return ret; +} + +int dpll_pin_create_ntf(struct dpll_pin *pin) +{ + return dpll_pin_event_send(DPLL_CMD_PIN_CREATE_NTF, pin); +} + +int dpll_pin_delete_ntf(struct dpll_pin *pin) +{ + return dpll_pin_event_send(DPLL_CMD_PIN_DELETE_NTF, pin); +} + +static int __dpll_pin_change_ntf(struct dpll_pin *pin) +{ + return dpll_pin_event_send(DPLL_CMD_PIN_CHANGE_NTF, pin); +} + +/** + * dpll_pin_change_ntf - notify that the pin has been changed + * @pin: registered pin pointer + * + * Context: acquires and holds a dpll_lock. + * Return: 0 if succeeds, error code otherwise. + */ +int dpll_pin_change_ntf(struct dpll_pin *pin) +{ + int ret; + + mutex_lock(&dpll_lock); + ret = __dpll_pin_change_ntf(pin); + mutex_unlock(&dpll_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dpll_pin_change_ntf); + +static int +dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a, + struct netlink_ext_ack *extack) +{ + u64 freq = nla_get_u64(a); + struct dpll_pin_ref *ref; + unsigned long i; + int ret; + + if (!dpll_pin_is_freq_supported(pin, freq)) { + NL_SET_ERR_MSG_ATTR(extack, a, "frequency is not supported by the device"); + return -EINVAL; + } + + xa_for_each(&pin->dpll_refs, i, ref) { + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + + if (!ops->frequency_set) + return -EOPNOTSUPP; + ret = ops->frequency_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), freq, extack); + if (ret) + return ret; + } + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + struct dpll_pin_ref *parent_ref; + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *dpll_ref; + void *pin_priv, *parent_priv; + struct dpll_pin *parent; + unsigned long i; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "state changing is not allowed"); + return -EOPNOTSUPP; + } + parent = xa_load(&dpll_pin_xa, parent_idx); + if (!parent) + return -EINVAL; + parent_ref = xa_load(&pin->parent_refs, parent->pin_idx); + if (!parent_ref) + return -EINVAL; + xa_for_each(&parent->dpll_refs, i, dpll_ref) { + ops = dpll_pin_ops(parent_ref); + if (!ops->state_on_pin_set) + return -EOPNOTSUPP; + pin_priv = dpll_pin_on_pin_priv(parent, pin); + parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, parent); + ret = ops->state_on_pin_set(pin, pin_priv, parent, parent_priv, + state, extack); + if (ret) + return ret; + } + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *ref; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "state changing is not allowed"); + return -EOPNOTSUPP; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + ASSERT_NOT_NULL(ref); + ops = dpll_pin_ops(ref); + if (!ops->state_on_dpll_set) + return -EOPNOTSUPP; + ret = ops->state_on_dpll_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), state, extack); + if (ret) + return ret; + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin, + u32 prio, struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *ref; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "prio changing is not allowed"); + return -EOPNOTSUPP; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + ASSERT_NOT_NULL(ref); + ops = dpll_pin_ops(ref); + if (!ops->prio_set) + return -EOPNOTSUPP; + ret = ops->prio_set(pin, dpll_pin_on_dpll_priv(dpll, pin), dpll, + dpll_priv(dpll), prio, extack); + if (ret) + return ret; + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll, + enum dpll_pin_direction direction, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops; + struct dpll_pin_ref *ref; + int ret; + + if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE & + pin->prop->capabilities)) { + NL_SET_ERR_MSG(extack, "direction changing is not allowed"); + return -EOPNOTSUPP; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + ASSERT_NOT_NULL(ref); + ops = dpll_pin_ops(ref); + if (!ops->direction_set) + return -EOPNOTSUPP; + ret = ops->direction_set(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), direction, extack); + if (ret) + return ret; + __dpll_pin_change_ntf(pin); + + return 0; +} + +static int +dpll_pin_parent_device_set(struct dpll_pin *pin, struct nlattr *parent_nest, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[DPLL_A_PIN_MAX + 1]; + enum dpll_pin_direction direction; + enum dpll_pin_state state; + struct dpll_pin_ref *ref; + struct dpll_device *dpll; + u32 pdpll_idx, prio; + int ret; + + nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest, + dpll_pin_parent_device_nl_policy, extack); + if (!tb[DPLL_A_PIN_PARENT_ID]) { + NL_SET_ERR_MSG(extack, "device parent id expected"); + return -EINVAL; + } + pdpll_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]); + dpll = xa_load(&dpll_device_xa, pdpll_idx); + if (!dpll) { + NL_SET_ERR_MSG(extack, "parent device not found"); + return -EINVAL; + } + ref = xa_load(&pin->dpll_refs, dpll->id); + if (!ref) { + NL_SET_ERR_MSG(extack, "pin not connected to given parent device"); + return -EINVAL; + } + if (tb[DPLL_A_PIN_STATE]) { + state = nla_get_u32(tb[DPLL_A_PIN_STATE]); + ret = dpll_pin_state_set(dpll, pin, state, extack); + if (ret) + return ret; + } + if (tb[DPLL_A_PIN_PRIO]) { + prio = nla_get_u32(tb[DPLL_A_PIN_PRIO]); + ret = dpll_pin_prio_set(dpll, pin, prio, extack); + if (ret) + return ret; + } + if (tb[DPLL_A_PIN_DIRECTION]) { + direction = nla_get_u32(tb[DPLL_A_PIN_DIRECTION]); + ret = dpll_pin_direction_set(pin, dpll, direction, extack); + if (ret) + return ret; + } + return 0; +} + +static int +dpll_pin_parent_pin_set(struct dpll_pin *pin, struct nlattr *parent_nest, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[DPLL_A_PIN_MAX + 1]; + enum dpll_pin_state state; + u32 ppin_idx; + int ret; + + nla_parse_nested(tb, DPLL_A_PIN_MAX, parent_nest, + dpll_pin_parent_pin_nl_policy, extack); + if (!tb[DPLL_A_PIN_PARENT_ID]) { + NL_SET_ERR_MSG(extack, "device parent id expected"); + return -EINVAL; + } + ppin_idx = nla_get_u32(tb[DPLL_A_PIN_PARENT_ID]); + state = nla_get_u32(tb[DPLL_A_PIN_STATE]); + ret = dpll_pin_on_pin_state_set(pin, ppin_idx, state, extack); + if (ret) + return ret; + + return 0; +} + +static int +dpll_pin_set_from_nlattr(struct dpll_pin *pin, struct genl_info *info) +{ + struct nlattr *a; + int rem, ret; + + nla_for_each_attr(a, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + switch (nla_type(a)) { + case DPLL_A_PIN_FREQUENCY: + ret = dpll_pin_freq_set(pin, a, info->extack); + if (ret) + return ret; + break; + case DPLL_A_PIN_PARENT_DEVICE: + ret = dpll_pin_parent_device_set(pin, a, info->extack); + if (ret) + return ret; + break; + case DPLL_A_PIN_PARENT_PIN: + ret = dpll_pin_parent_pin_set(pin, a, info->extack); + if (ret) + return ret; + break; + } + } + + return 0; +} + +static struct dpll_pin * +dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr, + enum dpll_pin_type type, struct nlattr *board_label, + struct nlattr *panel_label, struct nlattr *package_label, + struct netlink_ext_ack *extack) +{ + bool board_match, panel_match, package_match; + struct dpll_pin *pin_match = NULL, *pin; + const struct dpll_pin_properties *prop; + bool cid_match, mod_match, type_match; + unsigned long i; + + xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) { + prop = pin->prop; + cid_match = clock_id ? pin->clock_id == clock_id : true; + mod_match = mod_name_attr && module_name(pin->module) ? + !nla_strcmp(mod_name_attr, + module_name(pin->module)) : true; + type_match = type ? prop->type == type : true; + board_match = board_label ? (prop->board_label ? + !nla_strcmp(board_label, prop->board_label) : false) : + true; + panel_match = panel_label ? (prop->panel_label ? + !nla_strcmp(panel_label, prop->panel_label) : false) : + true; + package_match = package_label ? (prop->package_label ? + !nla_strcmp(package_label, prop->package_label) : + false) : true; + if (cid_match && mod_match && type_match && board_match && + panel_match && package_match) { + if (pin_match) { + NL_SET_ERR_MSG(extack, "multiple matches"); + return ERR_PTR(-EINVAL); + } + pin_match = pin; + } + } + if (!pin_match) { + NL_SET_ERR_MSG(extack, "not found"); + return ERR_PTR(-ENODEV); + } + return pin_match; +} + +static struct dpll_pin *dpll_pin_find_from_nlattr(struct genl_info *info) +{ + struct nlattr *attr, *mod_name_attr = NULL, *board_label_attr = NULL, + *panel_label_attr = NULL, *package_label_attr = NULL; + enum dpll_pin_type type = 0; + u64 clock_id = 0; + int rem = 0; + + nla_for_each_attr(attr, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + switch (nla_type(attr)) { + case DPLL_A_PIN_CLOCK_ID: + if (clock_id) + goto duplicated_attr; + clock_id = nla_get_u64(attr); + break; + case DPLL_A_PIN_MODULE_NAME: + if (mod_name_attr) + goto duplicated_attr; + mod_name_attr = attr; + break; + case DPLL_A_PIN_TYPE: + if (type) + goto duplicated_attr; + type = nla_get_u32(attr); + break; + case DPLL_A_PIN_BOARD_LABEL: + if (board_label_attr) + goto duplicated_attr; + board_label_attr = attr; + break; + case DPLL_A_PIN_PANEL_LABEL: + if (panel_label_attr) + goto duplicated_attr; + panel_label_attr = attr; + break; + case DPLL_A_PIN_PACKAGE_LABEL: + if (package_label_attr) + goto duplicated_attr; + package_label_attr = attr; + break; + default: + break; + } + } + if (!(clock_id || mod_name_attr || board_label_attr || + panel_label_attr || package_label_attr)) { + NL_SET_ERR_MSG(info->extack, "missing attributes"); + return ERR_PTR(-EINVAL); + } + return dpll_pin_find(clock_id, mod_name_attr, type, board_label_attr, + panel_label_attr, package_label_attr, + info->extack); +duplicated_attr: + NL_SET_ERR_MSG(info->extack, "duplicated attribute"); + return ERR_PTR(-EINVAL); +} + +int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_pin *pin; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_PIN_ID_GET); + if (!hdr) + return -EMSGSIZE; + + pin = dpll_pin_find_from_nlattr(info); + if (!IS_ERR(pin)) { + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) { + nlmsg_free(msg); + return ret; + } + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_pin *pin = info->user_ptr[0]; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + if (!pin) + return -ENODEV; + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_PIN_GET); + if (!hdr) + return -EMSGSIZE; + ret = dpll_cmd_pin_get_one(msg, pin, info->extack); + if (ret) { + nlmsg_free(msg); + return ret; + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct dpll_dump_ctx *ctx = dpll_dump_context(cb); + struct dpll_pin *pin; + struct nlattr *hdr; + unsigned long i; + int ret = 0; + + xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED, + ctx->idx) { + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + &dpll_nl_family, NLM_F_MULTI, + DPLL_CMD_PIN_GET); + if (!hdr) { + ret = -EMSGSIZE; + break; + } + ret = dpll_cmd_pin_get_one(skb, pin, cb->extack); + if (ret) { + genlmsg_cancel(skb, hdr); + break; + } + genlmsg_end(skb, hdr); + } + if (ret == -EMSGSIZE) { + ctx->idx = i; + return skb->len; + } + return ret; +} + +int dpll_nl_pin_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_pin *pin = info->user_ptr[0]; + + return dpll_pin_set_from_nlattr(pin, info); +} + +static struct dpll_device * +dpll_device_find(u64 clock_id, struct nlattr *mod_name_attr, + enum dpll_type type, struct netlink_ext_ack *extack) +{ + struct dpll_device *dpll_match = NULL, *dpll; + bool cid_match, mod_match, type_match; + unsigned long i; + + xa_for_each_marked(&dpll_device_xa, i, dpll, DPLL_REGISTERED) { + cid_match = clock_id ? dpll->clock_id == clock_id : true; + mod_match = mod_name_attr ? (module_name(dpll->module) ? + !nla_strcmp(mod_name_attr, + module_name(dpll->module)) : false) : true; + type_match = type ? dpll->type == type : true; + if (cid_match && mod_match && type_match) { + if (dpll_match) { + NL_SET_ERR_MSG(extack, "multiple matches"); + return ERR_PTR(-EINVAL); + } + dpll_match = dpll; + } + } + if (!dpll_match) { + NL_SET_ERR_MSG(extack, "not found"); + return ERR_PTR(-ENODEV); + } + + return dpll_match; +} + +static struct dpll_device * +dpll_device_find_from_nlattr(struct genl_info *info) +{ + struct nlattr *attr, *mod_name_attr = NULL; + enum dpll_type type = 0; + u64 clock_id = 0; + int rem = 0; + + nla_for_each_attr(attr, genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + switch (nla_type(attr)) { + case DPLL_A_CLOCK_ID: + if (clock_id) + goto duplicated_attr; + clock_id = nla_get_u64(attr); + break; + case DPLL_A_MODULE_NAME: + if (mod_name_attr) + goto duplicated_attr; + mod_name_attr = attr; + break; + case DPLL_A_TYPE: + if (type) + goto duplicated_attr; + type = nla_get_u32(attr); + break; + default: + break; + } + } + if (!clock_id && !mod_name_attr && !type) { + NL_SET_ERR_MSG(info->extack, "missing attributes"); + return ERR_PTR(-EINVAL); + } + return dpll_device_find(clock_id, mod_name_attr, type, info->extack); +duplicated_attr: + NL_SET_ERR_MSG(info->extack, "duplicated attribute"); + return ERR_PTR(-EINVAL); +} + +int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_device *dpll; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_DEVICE_ID_GET); + if (!hdr) + return -EMSGSIZE; + + dpll = dpll_device_find_from_nlattr(info); + if (!IS_ERR(dpll)) { + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) { + nlmsg_free(msg); + return ret; + } + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct dpll_device *dpll = info->user_ptr[0]; + struct sk_buff *msg; + struct nlattr *hdr; + int ret; + + msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, + DPLL_CMD_DEVICE_GET); + if (!hdr) + return -EMSGSIZE; + + ret = dpll_device_get_one(dpll, msg, info->extack); + if (ret) { + nlmsg_free(msg); + return ret; + } + genlmsg_end(msg, hdr); + + return genlmsg_reply(msg, info); +} + +int dpll_nl_device_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + /* placeholder for set command */ + return 0; +} + +int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct dpll_dump_ctx *ctx = dpll_dump_context(cb); + struct dpll_device *dpll; + struct nlattr *hdr; + unsigned long i; + int ret = 0; + + xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED, + ctx->idx) { + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &dpll_nl_family, + NLM_F_MULTI, DPLL_CMD_DEVICE_GET); + if (!hdr) { + ret = -EMSGSIZE; + break; + } + ret = dpll_device_get_one(dpll, skb, cb->extack); + if (ret) { + genlmsg_cancel(skb, hdr); + break; + } + genlmsg_end(skb, hdr); + } + if (ret == -EMSGSIZE) { + ctx->idx = i; + return skb->len; + } + return ret; +} + +int dpll_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + u32 id; + + if (GENL_REQ_ATTR_CHECK(info, DPLL_A_ID)) + return -EINVAL; + + mutex_lock(&dpll_lock); + id = nla_get_u32(info->attrs[DPLL_A_ID]); + info->user_ptr[0] = dpll_device_get_by_id(id); + if (!info->user_ptr[0]) { + NL_SET_ERR_MSG(info->extack, "device not found"); + goto unlock; + } + return 0; +unlock: + mutex_unlock(&dpll_lock); + return -ENODEV; +} + +void dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_unlock(&dpll_lock); +} + +int +dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_lock(&dpll_lock); + + return 0; +} + +void +dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_unlock(&dpll_lock); +} + +int dpll_lock_dumpit(struct netlink_callback *cb) +{ + mutex_lock(&dpll_lock); + + return 0; +} + +int dpll_unlock_dumpit(struct netlink_callback *cb) +{ + mutex_unlock(&dpll_lock); + + return 0; +} + +int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + int ret; + + mutex_lock(&dpll_lock); + if (GENL_REQ_ATTR_CHECK(info, DPLL_A_PIN_ID)) { + ret = -EINVAL; + goto unlock_dev; + } + info->user_ptr[0] = xa_load(&dpll_pin_xa, + nla_get_u32(info->attrs[DPLL_A_PIN_ID])); + if (!info->user_ptr[0]) { + NL_SET_ERR_MSG(info->extack, "pin not found"); + ret = -ENODEV; + goto unlock_dev; + } + + return 0; + +unlock_dev: + mutex_unlock(&dpll_lock); + return ret; +} + +void dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + mutex_unlock(&dpll_lock); +} diff --git a/drivers/dpll/dpll_netlink.h b/drivers/dpll/dpll_netlink.h new file mode 100644 index 000000000000..a9cfd55f57fc --- /dev/null +++ b/drivers/dpll/dpll_netlink.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +int dpll_device_create_ntf(struct dpll_device *dpll); + +int dpll_device_delete_ntf(struct dpll_device *dpll); + +int dpll_pin_create_ntf(struct dpll_pin *pin); + +int dpll_pin_delete_ntf(struct dpll_pin *pin); diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c new file mode 100644 index 000000000000..14064c8c783b --- /dev/null +++ b/drivers/dpll/dpll_nl.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/dpll.yaml */ +/* YNL-GEN kernel source */ + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "dpll_nl.h" + +#include <uapi/linux/dpll.h> + +/* Common nested types */ +const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1] = { + [DPLL_A_PIN_PARENT_ID] = { .type = NLA_U32, }, + [DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2), + [DPLL_A_PIN_PRIO] = { .type = NLA_U32, }, + [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3), +}; + +const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1] = { + [DPLL_A_PIN_PARENT_ID] = { .type = NLA_U32, }, + [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3), +}; + +/* DPLL_CMD_DEVICE_ID_GET - do */ +static const struct nla_policy dpll_device_id_get_nl_policy[DPLL_A_TYPE + 1] = { + [DPLL_A_MODULE_NAME] = { .type = NLA_NUL_STRING, }, + [DPLL_A_CLOCK_ID] = { .type = NLA_U64, }, + [DPLL_A_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 2), +}; + +/* DPLL_CMD_DEVICE_GET - do */ +static const struct nla_policy dpll_device_get_nl_policy[DPLL_A_ID + 1] = { + [DPLL_A_ID] = { .type = NLA_U32, }, +}; + +/* DPLL_CMD_DEVICE_SET - do */ +static const struct nla_policy dpll_device_set_nl_policy[DPLL_A_ID + 1] = { + [DPLL_A_ID] = { .type = NLA_U32, }, +}; + +/* DPLL_CMD_PIN_ID_GET - do */ +static const struct nla_policy dpll_pin_id_get_nl_policy[DPLL_A_PIN_TYPE + 1] = { + [DPLL_A_PIN_MODULE_NAME] = { .type = NLA_NUL_STRING, }, + [DPLL_A_PIN_CLOCK_ID] = { .type = NLA_U64, }, + [DPLL_A_PIN_BOARD_LABEL] = { .type = NLA_NUL_STRING, }, + [DPLL_A_PIN_PANEL_LABEL] = { .type = NLA_NUL_STRING, }, + [DPLL_A_PIN_PACKAGE_LABEL] = { .type = NLA_NUL_STRING, }, + [DPLL_A_PIN_TYPE] = NLA_POLICY_RANGE(NLA_U32, 1, 5), +}; + +/* DPLL_CMD_PIN_GET - do */ +static const struct nla_policy dpll_pin_get_do_nl_policy[DPLL_A_PIN_ID + 1] = { + [DPLL_A_PIN_ID] = { .type = NLA_U32, }, +}; + +/* DPLL_CMD_PIN_GET - dump */ +static const struct nla_policy dpll_pin_get_dump_nl_policy[DPLL_A_PIN_ID + 1] = { + [DPLL_A_PIN_ID] = { .type = NLA_U32, }, +}; + +/* DPLL_CMD_PIN_SET - do */ +static const struct nla_policy dpll_pin_set_nl_policy[DPLL_A_PIN_PARENT_PIN + 1] = { + [DPLL_A_PIN_ID] = { .type = NLA_U32, }, + [DPLL_A_PIN_FREQUENCY] = { .type = NLA_U64, }, + [DPLL_A_PIN_DIRECTION] = NLA_POLICY_RANGE(NLA_U32, 1, 2), + [DPLL_A_PIN_PRIO] = { .type = NLA_U32, }, + [DPLL_A_PIN_STATE] = NLA_POLICY_RANGE(NLA_U32, 1, 3), + [DPLL_A_PIN_PARENT_DEVICE] = NLA_POLICY_NESTED(dpll_pin_parent_device_nl_policy), + [DPLL_A_PIN_PARENT_PIN] = NLA_POLICY_NESTED(dpll_pin_parent_pin_nl_policy), +}; + +/* Ops table for dpll */ +static const struct genl_split_ops dpll_nl_ops[] = { + { + .cmd = DPLL_CMD_DEVICE_ID_GET, + .pre_doit = dpll_lock_doit, + .doit = dpll_nl_device_id_get_doit, + .post_doit = dpll_unlock_doit, + .policy = dpll_device_id_get_nl_policy, + .maxattr = DPLL_A_TYPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DPLL_CMD_DEVICE_GET, + .pre_doit = dpll_pre_doit, + .doit = dpll_nl_device_get_doit, + .post_doit = dpll_post_doit, + .policy = dpll_device_get_nl_policy, + .maxattr = DPLL_A_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DPLL_CMD_DEVICE_GET, + .start = dpll_lock_dumpit, + .dumpit = dpll_nl_device_get_dumpit, + .done = dpll_unlock_dumpit, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = DPLL_CMD_DEVICE_SET, + .pre_doit = dpll_pre_doit, + .doit = dpll_nl_device_set_doit, + .post_doit = dpll_post_doit, + .policy = dpll_device_set_nl_policy, + .maxattr = DPLL_A_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DPLL_CMD_PIN_ID_GET, + .pre_doit = dpll_lock_doit, + .doit = dpll_nl_pin_id_get_doit, + .post_doit = dpll_unlock_doit, + .policy = dpll_pin_id_get_nl_policy, + .maxattr = DPLL_A_PIN_TYPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DPLL_CMD_PIN_GET, + .pre_doit = dpll_pin_pre_doit, + .doit = dpll_nl_pin_get_doit, + .post_doit = dpll_pin_post_doit, + .policy = dpll_pin_get_do_nl_policy, + .maxattr = DPLL_A_PIN_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DPLL_CMD_PIN_GET, + .start = dpll_lock_dumpit, + .dumpit = dpll_nl_pin_get_dumpit, + .done = dpll_unlock_dumpit, + .policy = dpll_pin_get_dump_nl_policy, + .maxattr = DPLL_A_PIN_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = DPLL_CMD_PIN_SET, + .pre_doit = dpll_pin_pre_doit, + .doit = dpll_nl_pin_set_doit, + .post_doit = dpll_pin_post_doit, + .policy = dpll_pin_set_nl_policy, + .maxattr = DPLL_A_PIN_PARENT_PIN, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, +}; + +static const struct genl_multicast_group dpll_nl_mcgrps[] = { + [DPLL_NLGRP_MONITOR] = { "monitor", }, +}; + +struct genl_family dpll_nl_family __ro_after_init = { + .name = DPLL_FAMILY_NAME, + .version = DPLL_FAMILY_VERSION, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .split_ops = dpll_nl_ops, + .n_split_ops = ARRAY_SIZE(dpll_nl_ops), + .mcgrps = dpll_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(dpll_nl_mcgrps), +}; diff --git a/drivers/dpll/dpll_nl.h b/drivers/dpll/dpll_nl.h new file mode 100644 index 000000000000..1f67aaed4742 --- /dev/null +++ b/drivers/dpll/dpll_nl.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/dpll.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_DPLL_GEN_H +#define _LINUX_DPLL_GEN_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <uapi/linux/dpll.h> + +/* Common nested types */ +extern const struct nla_policy dpll_pin_parent_device_nl_policy[DPLL_A_PIN_STATE + 1]; +extern const struct nla_policy dpll_pin_parent_pin_nl_policy[DPLL_A_PIN_STATE + 1]; + +int dpll_lock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +int dpll_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +void +dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +void +dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +void +dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, + struct genl_info *info); +int dpll_lock_dumpit(struct netlink_callback *cb); +int dpll_unlock_dumpit(struct netlink_callback *cb); + +int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info); +int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info); +int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int dpll_nl_device_set_doit(struct sk_buff *skb, struct genl_info *info); +int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info); +int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info); +int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int dpll_nl_pin_set_doit(struct sk_buff *skb, struct genl_info *info); + +enum { + DPLL_NLGRP_MONITOR, +}; + +extern struct genl_family dpll_nl_family; + +#endif /* _LINUX_DPLL_GEN_H */ diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 5e39641ea887..3a89349dc918 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -324,14 +324,12 @@ static int b53_mmap_probe(struct platform_device *pdev) return b53_switch_register(dev); } -static int b53_mmap_remove(struct platform_device *pdev) +static void b53_mmap_remove(struct platform_device *pdev) { struct b53_device *dev = platform_get_drvdata(pdev); if (dev) b53_switch_remove(dev); - - return 0; } static void b53_mmap_shutdown(struct platform_device *pdev) @@ -372,7 +370,7 @@ MODULE_DEVICE_TABLE(of, b53_mmap_of_table); static struct platform_driver b53_mmap_driver = { .probe = b53_mmap_probe, - .remove = b53_mmap_remove, + .remove_new = b53_mmap_remove, .shutdown = b53_mmap_shutdown, .driver = { .name = "b53-switch", diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index bcb44034404d..f3f95332ff17 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -657,17 +657,15 @@ static int b53_srab_probe(struct platform_device *pdev) return b53_switch_register(dev); } -static int b53_srab_remove(struct platform_device *pdev) +static void b53_srab_remove(struct platform_device *pdev) { struct b53_device *dev = platform_get_drvdata(pdev); if (!dev) - return 0; + return; b53_srab_intr_set(dev->priv, false); b53_switch_remove(dev); - - return 0; } static void b53_srab_shutdown(struct platform_device *pdev) @@ -684,7 +682,7 @@ static void b53_srab_shutdown(struct platform_device *pdev) static struct platform_driver b53_srab_driver = { .probe = b53_srab_probe, - .remove = b53_srab_remove, + .remove_new = b53_srab_remove, .shutdown = b53_srab_shutdown, .driver = { .name = "b53-srab-switch", diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index cd1f240c90f3..326937e91f52 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1543,12 +1543,12 @@ out_clk: return ret; } -static int bcm_sf2_sw_remove(struct platform_device *pdev) +static void bcm_sf2_sw_remove(struct platform_device *pdev) { struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); if (!priv) - return 0; + return; priv->wol_ports_mask = 0; /* Disable interrupts */ @@ -1560,8 +1560,6 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) clk_disable_unprepare(priv->clk); if (priv->type == BCM7278_DEVICE_ID) reset_control_assert(priv->rcdev); - - return 0; } static void bcm_sf2_sw_shutdown(struct platform_device *pdev) @@ -1607,7 +1605,7 @@ static SIMPLE_DEV_PM_OPS(bcm_sf2_pm_ops, static struct platform_driver bcm_sf2_driver = { .probe = bcm_sf2_sw_probe, - .remove = bcm_sf2_sw_remove, + .remove_new = bcm_sf2_sw_remove, .shutdown = bcm_sf2_sw_shutdown, .driver = { .name = "brcm-sf2", diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 11ef1d7ea229..beda1e9d350f 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -2060,18 +2060,16 @@ err_ptp_setup: return ret; } -static int hellcreek_remove(struct platform_device *pdev) +static void hellcreek_remove(struct platform_device *pdev) { struct hellcreek *hellcreek = platform_get_drvdata(pdev); if (!hellcreek) - return 0; + return; hellcreek_hwtstamp_free(hellcreek); hellcreek_ptp_free(hellcreek); dsa_unregister_switch(hellcreek->ds); - - return 0; } static void hellcreek_shutdown(struct platform_device *pdev) @@ -2107,7 +2105,7 @@ MODULE_DEVICE_TABLE(of, hellcreek_of_match); static struct platform_driver hellcreek_driver = { .probe = hellcreek_probe, - .remove = hellcreek_remove, + .remove_new = hellcreek_remove, .shutdown = hellcreek_shutdown, .driver = { .name = "hellcreek", diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 3c76a1a14aee..25abf2caf5fb 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -2207,13 +2207,13 @@ put_mdio_node: return err; } -static int gswip_remove(struct platform_device *pdev) +static void gswip_remove(struct platform_device *pdev) { struct gswip_priv *priv = platform_get_drvdata(pdev); int i; if (!priv) - return 0; + return; /* disable the switch */ gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); @@ -2228,8 +2228,6 @@ static int gswip_remove(struct platform_device *pdev) for (i = 0; i < priv->num_gphy_fw; i++) gswip_gphy_fw_remove(priv, &priv->gphy_fw[i]); - - return 0; } static void gswip_shutdown(struct platform_device *pdev) @@ -2266,7 +2264,7 @@ MODULE_DEVICE_TABLE(of, gswip_of_match); static struct platform_driver gswip_driver = { .probe = gswip_probe, - .remove = gswip_remove, + .remove_new = gswip_remove, .shutdown = gswip_shutdown, .driver = { .name = "gswip", diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile index 48360cc9fc68..49459a50dbc8 100644 --- a/drivers/net/dsa/microchip/Makefile +++ b/drivers/net/dsa/microchip/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ_COMMON) += ksz_switch.o ksz_switch-objs := ksz_common.o -ksz_switch-objs += ksz9477.o +ksz_switch-objs += ksz9477.o ksz9477_acl.o ksz9477_tc_flower.o ksz_switch-objs += ksz8795.o ksz_switch-objs += lan937x_main.o diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h index 7a57c6088f80..d33db4f86c64 100644 --- a/drivers/net/dsa/microchip/ksz8795_reg.h +++ b/drivers/net/dsa/microchip/ksz8795_reg.h @@ -442,20 +442,6 @@ #define TOS_PRIO_M KS_PRIO_M #define TOS_PRIO_S KS_PRIO_S -#define REG_SW_CTRL_20 0xA3 - -#define SW_GMII_DRIVE_STRENGTH_S 4 -#define SW_DRIVE_STRENGTH_M 0x7 -#define SW_DRIVE_STRENGTH_2MA 0 -#define SW_DRIVE_STRENGTH_4MA 1 -#define SW_DRIVE_STRENGTH_8MA 2 -#define SW_DRIVE_STRENGTH_12MA 3 -#define SW_DRIVE_STRENGTH_16MA 4 -#define SW_DRIVE_STRENGTH_20MA 5 -#define SW_DRIVE_STRENGTH_24MA 6 -#define SW_DRIVE_STRENGTH_28MA 7 -#define SW_MII_DRIVE_STRENGTH_S 0 - #define REG_SW_CTRL_21 0xA4 #define SW_IPV6_MLD_OPTION BIT(3) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 83b7f2d5c1ea..9e63ed820c92 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1004,6 +1004,8 @@ void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) /* clear pending interrupts */ if (dev->info->internal_phy[port]) ksz_pread16(dev, port, REG_PORT_PHY_INT_ENABLE, &data16); + + ksz9477_port_acl_init(dev, port); } void ksz9477_config_cpu_port(struct dsa_switch *ds) diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h index a6f425866a29..d7bbd9bd8893 100644 --- a/drivers/net/dsa/microchip/ksz9477.h +++ b/drivers/net/dsa/microchip/ksz9477.h @@ -57,4 +57,40 @@ int ksz9477_switch_init(struct ksz_device *dev); void ksz9477_switch_exit(struct ksz_device *dev); void ksz9477_port_queue_split(struct ksz_device *dev, int port); +int ksz9477_port_acl_init(struct ksz_device *dev, int port); +void ksz9477_port_acl_free(struct ksz_device *dev, int port); +int ksz9477_cls_flower_add(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress); +int ksz9477_cls_flower_del(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress); + +#define KSZ9477_ACL_ENTRY_SIZE 18 +#define KSZ9477_ACL_MAX_ENTRIES 16 + +struct ksz9477_acl_entry { + u8 entry[KSZ9477_ACL_ENTRY_SIZE]; + unsigned long cookie; + u32 prio; +}; + +struct ksz9477_acl_entries { + struct ksz9477_acl_entry entries[KSZ9477_ACL_MAX_ENTRIES]; + int entries_count; +}; + +struct ksz9477_acl_priv { + struct ksz9477_acl_entries acles; +}; + +void ksz9477_acl_remove_entries(struct ksz_device *dev, int port, + struct ksz9477_acl_entries *acles, + unsigned long cookie); +int ksz9477_acl_write_list(struct ksz_device *dev, int port); +int ksz9477_sort_acl_entries(struct ksz_device *dev, int port); +void ksz9477_acl_action_rule_cfg(u8 *entry, bool force_prio, u8 prio_val); +void ksz9477_acl_processing_rule_set_action(u8 *entry, u8 action_idx); +void ksz9477_acl_match_process_l2(struct ksz_device *dev, int port, + u16 ethtype, u8 *src_mac, u8 *dst_mac, + unsigned long cookie, u32 prio); + #endif diff --git a/drivers/net/dsa/microchip/ksz9477_acl.c b/drivers/net/dsa/microchip/ksz9477_acl.c new file mode 100644 index 000000000000..06d74c19eb94 --- /dev/null +++ b/drivers/net/dsa/microchip/ksz9477_acl.c @@ -0,0 +1,1436 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + +/* Access Control List (ACL) structure: + * + * There are multiple groups of registers involved in ACL configuration: + * + * - Matching Rules: These registers define the criteria for matching incoming + * packets based on their header information (Layer 2 MAC, Layer 3 IP, or + * Layer 4 TCP/UDP). Different register settings are used depending on the + * matching rule mode (MD) and the Enable (ENB) settings. + * + * - Action Rules: These registers define how the ACL should modify the packet's + * priority, VLAN tag priority, and forwarding map once a matching rule has + * been triggered. The settings vary depending on whether the matching rule is + * in Count Mode (MD = 01 and ENB = 00) or not. + * + * - Processing Rules: These registers control the overall behavior of the ACL, + * such as selecting which matching rule to apply first, enabling/disabling + * specific rules, or specifying actions for matched packets. + * + * ACL Structure: + * +----------------------+ + * +----------------------+ | (optional) | + * | Matching Rules | | Matching Rules | + * | (Layer 2, 3, 4) | | (Layer 2, 3, 4) | + * +----------------------+ +----------------------+ + * | | + * \___________________________/ + * v + * +----------------------+ + * | Processing Rules | + * | (action idx, | + * | matching rule set) | + * +----------------------+ + * | + * v + * +----------------------+ + * | Action Rules | + * | (Modify Priority, | + * | Forwarding Map, | + * | VLAN tag, etc) | + * +----------------------+ + */ + +#include <linux/bitops.h> + +#include "ksz9477.h" +#include "ksz9477_reg.h" +#include "ksz_common.h" + +#define KSZ9477_PORT_ACL_0 0x600 + +enum ksz9477_acl_port_access { + KSZ9477_ACL_PORT_ACCESS_0 = 0x00, + KSZ9477_ACL_PORT_ACCESS_1 = 0x01, + KSZ9477_ACL_PORT_ACCESS_2 = 0x02, + KSZ9477_ACL_PORT_ACCESS_3 = 0x03, + KSZ9477_ACL_PORT_ACCESS_4 = 0x04, + KSZ9477_ACL_PORT_ACCESS_5 = 0x05, + KSZ9477_ACL_PORT_ACCESS_6 = 0x06, + KSZ9477_ACL_PORT_ACCESS_7 = 0x07, + KSZ9477_ACL_PORT_ACCESS_8 = 0x08, + KSZ9477_ACL_PORT_ACCESS_9 = 0x09, + KSZ9477_ACL_PORT_ACCESS_A = 0x0A, + KSZ9477_ACL_PORT_ACCESS_B = 0x0B, + KSZ9477_ACL_PORT_ACCESS_C = 0x0C, + KSZ9477_ACL_PORT_ACCESS_D = 0x0D, + KSZ9477_ACL_PORT_ACCESS_E = 0x0E, + KSZ9477_ACL_PORT_ACCESS_F = 0x0F, + KSZ9477_ACL_PORT_ACCESS_10 = 0x10, + KSZ9477_ACL_PORT_ACCESS_11 = 0x11 +}; + +#define KSZ9477_ACL_MD_MASK GENMASK(5, 4) +#define KSZ9477_ACL_MD_DISABLE 0 +#define KSZ9477_ACL_MD_L2_MAC 1 +#define KSZ9477_ACL_MD_L3_IP 2 +#define KSZ9477_ACL_MD_L4_TCP_UDP 3 + +#define KSZ9477_ACL_ENB_MASK GENMASK(3, 2) +#define KSZ9477_ACL_ENB_L2_COUNTER 0 +#define KSZ9477_ACL_ENB_L2_TYPE 1 +#define KSZ9477_ACL_ENB_L2_MAC 2 +#define KSZ9477_ACL_ENB_L2_MAC_TYPE 3 + +/* only IPv4 src or dst can be used with mask */ +#define KSZ9477_ACL_ENB_L3_IPV4_ADDR_MASK 1 +/* only IPv4 src and dst can be used without mask */ +#define KSZ9477_ACL_ENB_L3_IPV4_ADDR_SRC_DST 2 + +#define KSZ9477_ACL_ENB_L4_IP_PROTO 0 +#define KSZ9477_ACL_ENB_L4_TCP_SRC_DST_PORT 1 +#define KSZ9477_ACL_ENB_L4_UDP_SRC_DST_PORT 2 +#define KSZ9477_ACL_ENB_L4_TCP_SEQ_NUMBER 3 + +#define KSZ9477_ACL_SD_SRC BIT(1) +#define KSZ9477_ACL_SD_DST 0 +#define KSZ9477_ACL_EQ_EQUAL BIT(0) +#define KSZ9477_ACL_EQ_NOT_EQUAL 0 + +#define KSZ9477_ACL_PM_M GENMASK(7, 6) +#define KSZ9477_ACL_PM_DISABLE 0 +#define KSZ9477_ACL_PM_HIGHER 1 +#define KSZ9477_ACL_PM_LOWER 2 +#define KSZ9477_ACL_PM_REPLACE 3 +#define KSZ9477_ACL_P_M GENMASK(5, 3) + +#define KSZ9477_PORT_ACL_CTRL_0 0x0612 + +#define KSZ9477_ACL_WRITE_DONE BIT(6) +#define KSZ9477_ACL_READ_DONE BIT(5) +#define KSZ9477_ACL_WRITE BIT(4) +#define KSZ9477_ACL_INDEX_M GENMASK(3, 0) + +/** + * ksz9477_dump_acl_index - Print the ACL entry at the specified index + * + * @dev: Pointer to the ksz9477 device structure. + * @acle: Pointer to the ACL entry array. + * @index: The index of the ACL entry to print. + * + * This function prints the details of an ACL entry, located at a particular + * index within the ksz9477 device's ACL table. It omits printing entries that + * are empty. + * + * Return: 1 if the entry is non-empty and printed, 0 otherwise. + */ +static int ksz9477_dump_acl_index(struct ksz_device *dev, + struct ksz9477_acl_entry *acle, int index) +{ + bool empty = true; + char buf[64]; + u8 *entry; + int i; + + entry = &acle[index].entry[0]; + for (i = 0; i <= KSZ9477_ACL_PORT_ACCESS_11; i++) { + if (entry[i]) + empty = false; + + sprintf(buf + (i * 3), "%02x ", entry[i]); + } + + /* no need to print empty entries */ + if (empty) + return 0; + + dev_err(dev->dev, " Entry %02d, prio: %02d : %s", index, + acle[index].prio, buf); + + return 1; +} + +/** + * ksz9477_dump_acl - Print ACL entries + * + * @dev: Pointer to the device structure. + * @acle: Pointer to the ACL entry array. + */ +static void ksz9477_dump_acl(struct ksz_device *dev, + struct ksz9477_acl_entry *acle) +{ + int count = 0; + int i; + + for (i = 0; i < KSZ9477_ACL_MAX_ENTRIES; i++) + count += ksz9477_dump_acl_index(dev, acle, i); + + if (count != KSZ9477_ACL_MAX_ENTRIES - 1) + dev_err(dev->dev, " Empty ACL entries were skipped\n"); +} + +/** + * ksz9477_acl_is_valid_matching_rule - Check if an ACL entry contains a valid + * matching rule. + * + * @entry: Pointer to ACL entry buffer + * + * This function checks if the given ACL entry buffer contains a valid + * matching rule by inspecting the Mode (MD) and Enable (ENB) fields. + * + * Returns: True if it's a valid matching rule, false otherwise. + */ +static bool ksz9477_acl_is_valid_matching_rule(u8 *entry) +{ + u8 val1, md, enb; + + val1 = entry[KSZ9477_ACL_PORT_ACCESS_1]; + + md = FIELD_GET(KSZ9477_ACL_MD_MASK, val1); + if (md == KSZ9477_ACL_MD_DISABLE) + return false; + + if (md == KSZ9477_ACL_MD_L2_MAC) { + /* L2 counter is not support, so it is not valid rule for now */ + enb = FIELD_GET(KSZ9477_ACL_ENB_MASK, val1); + if (enb == KSZ9477_ACL_ENB_L2_COUNTER) + return false; + } + + return true; +} + +/** + * ksz9477_acl_get_cont_entr - Get count of contiguous ACL entries and validate + * the matching rules. + * @dev: Pointer to the KSZ9477 device structure. + * @port: Port number. + * @index: Index of the starting ACL entry. + * + * Based on the KSZ9477 switch's Access Control List (ACL) system, the RuleSet + * in an ACL entry indicates which entries contain Matching rules linked to it. + * This RuleSet is represented by two registers: KSZ9477_ACL_PORT_ACCESS_E and + * KSZ9477_ACL_PORT_ACCESS_F. Each bit set in these registers corresponds to + * an entry containing a Matching rule for this RuleSet. + * + * For a single Matching rule linked, only one bit is set. However, when an + * entry links multiple Matching rules, forming what's termed a 'complex rule', + * multiple bits are set in these registers. + * + * This function checks that, for complex rules, the entries containing the + * linked Matching rules are contiguous in terms of their indices. It calculates + * and returns the number of these contiguous entries. + * + * Returns: + * - 0 if the entry is empty and can be safely overwritten + * - 1 if the entry represents a simple rule + * - The number of contiguous entries if it is the root entry of a complex + * rule + * - -ENOTEMPTY if the entry is part of a complex rule but not the root + * entry + * - -EINVAL if the validation fails + */ +static int ksz9477_acl_get_cont_entr(struct ksz_device *dev, int port, + int index) +{ + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct ksz9477_acl_entries *acles = &acl->acles; + int start_idx, end_idx, contiguous_count; + unsigned long val; + u8 vale, valf; + u8 *entry; + int i; + + entry = &acles->entries[index].entry[0]; + vale = entry[KSZ9477_ACL_PORT_ACCESS_E]; + valf = entry[KSZ9477_ACL_PORT_ACCESS_F]; + + val = (vale << 8) | valf; + + /* If no bits are set, return an appropriate value or error */ + if (!val) { + if (ksz9477_acl_is_valid_matching_rule(entry)) { + /* Looks like we are about to corrupt some complex rule. + * Do not print an error here, as this is a normal case + * when we are trying to find a free or starting entry. + */ + dev_dbg(dev->dev, "ACL: entry %d starting with a valid matching rule, but no bits set in RuleSet\n", + index); + return -ENOTEMPTY; + } + + /* This entry does not contain a valid matching rule */ + return 0; + } + + start_idx = find_first_bit((unsigned long *)&val, 16); + end_idx = find_last_bit((unsigned long *)&val, 16); + + /* Calculate the contiguous count */ + contiguous_count = end_idx - start_idx + 1; + + /* Check if the number of bits set in val matches our calculated count */ + if (contiguous_count != hweight16(val)) { + /* Probably we have a fragmented complex rule, which is not + * supported by this driver. + */ + dev_err(dev->dev, "ACL: number of bits set in RuleSet does not match calculated count\n"); + return -EINVAL; + } + + /* loop over the contiguous entries and check for valid matching rules */ + for (i = start_idx; i <= end_idx; i++) { + u8 *current_entry = &acles->entries[i].entry[0]; + + if (!ksz9477_acl_is_valid_matching_rule(current_entry)) { + /* we have something linked without a valid matching + * rule. ACL table? + */ + dev_err(dev->dev, "ACL: entry %d does not contain a valid matching rule\n", + i); + return -EINVAL; + } + + if (i > start_idx) { + vale = current_entry[KSZ9477_ACL_PORT_ACCESS_E]; + valf = current_entry[KSZ9477_ACL_PORT_ACCESS_F]; + /* Following entry should have empty linkage list */ + if (vale || valf) { + dev_err(dev->dev, "ACL: entry %d has non-empty RuleSet linkage\n", + i); + return -EINVAL; + } + } + } + + return contiguous_count; +} + +/** + * ksz9477_acl_update_linkage - Update the RuleSet linkage for an ACL entry + * after a move operation. + * + * @dev: Pointer to the ksz_device. + * @entry: Pointer to the ACL entry array. + * @old_idx: The original index of the ACL entry before moving. + * @new_idx: The new index of the ACL entry after moving. + * + * This function updates the RuleSet linkage bits for an ACL entry when + * it's moved from one position to another in the ACL table. The RuleSet + * linkage is represented by two 8-bit registers, which are combined + * into a 16-bit value for easier manipulation. The linkage bits are shifted + * based on the difference between the old and new index. If any bits are lost + * during the shift operation, an error is returned. + * + * Note: Fragmentation within a RuleSet is not supported. Hence, entries must + * be moved as complete blocks, maintaining the integrity of the RuleSet. + * + * Returns: 0 on success, or -EINVAL if any RuleSet linkage bits are lost + * during the move. + */ +static int ksz9477_acl_update_linkage(struct ksz_device *dev, u8 *entry, + u16 old_idx, u16 new_idx) +{ + unsigned int original_bit_count; + unsigned long rule_linkage; + u8 vale, valf, val0; + int shift; + + val0 = entry[KSZ9477_ACL_PORT_ACCESS_0]; + vale = entry[KSZ9477_ACL_PORT_ACCESS_E]; + valf = entry[KSZ9477_ACL_PORT_ACCESS_F]; + + /* Combine the two u8 values into one u16 for easier manipulation */ + rule_linkage = (vale << 8) | valf; + original_bit_count = hweight16(rule_linkage); + + /* Even if HW is able to handle fragmented RuleSet, we don't support it. + * RuleSet is filled only for the first entry of the set. + */ + if (!rule_linkage) + return 0; + + if (val0 != old_idx) { + dev_err(dev->dev, "ACL: entry %d has unexpected ActionRule linkage: %d\n", + old_idx, val0); + return -EINVAL; + } + + val0 = new_idx; + + /* Calculate the number of positions to shift */ + shift = new_idx - old_idx; + + /* Shift the RuleSet */ + if (shift > 0) + rule_linkage <<= shift; + else + rule_linkage >>= -shift; + + /* Check that no bits were lost in the process */ + if (original_bit_count != hweight16(rule_linkage)) { + dev_err(dev->dev, "ACL RuleSet linkage bits lost during move\n"); + return -EINVAL; + } + + entry[KSZ9477_ACL_PORT_ACCESS_0] = val0; + + /* Update the RuleSet bitfields in the entry */ + entry[KSZ9477_ACL_PORT_ACCESS_E] = (rule_linkage >> 8) & 0xFF; + entry[KSZ9477_ACL_PORT_ACCESS_F] = rule_linkage & 0xFF; + + return 0; +} + +/** + * ksz9477_validate_and_get_src_count - Validate source and destination indices + * and determine the source entry count. + * @dev: Pointer to the KSZ device structure. + * @port: Port number on the KSZ device where the ACL entries reside. + * @src_idx: Index of the starting ACL entry that needs to be validated. + * @dst_idx: Index of the destination where the source entries are intended to + * be moved. + * @src_count: Pointer to the variable that will hold the number of contiguous + * source entries if the validation passes. + * @dst_count: Pointer to the variable that will hold the number of contiguous + * destination entries if the validation passes. + * + * This function performs validation on the source and destination indices + * provided for ACL entries. It checks if the indices are within the valid + * range, and if the source entries are contiguous. Additionally, the function + * ensures that there's adequate space at the destination for the source entries + * and that the destination index isn't in the middle of a RuleSet. If all + * validations pass, the function returns the number of contiguous source and + * destination entries. + * + * Return: 0 on success, otherwise returns a negative error code if any + * validation check fails. + */ +static int ksz9477_validate_and_get_src_count(struct ksz_device *dev, int port, + int src_idx, int dst_idx, + int *src_count, int *dst_count) +{ + int ret; + + if (src_idx >= KSZ9477_ACL_MAX_ENTRIES || + dst_idx >= KSZ9477_ACL_MAX_ENTRIES) { + dev_err(dev->dev, "ACL: invalid entry index\n"); + return -EINVAL; + } + + /* Nothing to do */ + if (src_idx == dst_idx) + return 0; + + /* Validate if the source entries are contiguous */ + ret = ksz9477_acl_get_cont_entr(dev, port, src_idx); + if (ret < 0) + return ret; + *src_count = ret; + + if (!*src_count) { + dev_err(dev->dev, "ACL: source entry is empty\n"); + return -EINVAL; + } + + if (dst_idx + *src_count >= KSZ9477_ACL_MAX_ENTRIES) { + dev_err(dev->dev, "ACL: Not enough space at the destination. Move operation will fail.\n"); + return -EINVAL; + } + + /* Validate if the destination entry is empty or not in the middle of + * a RuleSet. + */ + ret = ksz9477_acl_get_cont_entr(dev, port, dst_idx); + if (ret < 0) + return ret; + *dst_count = ret; + + return 0; +} + +/** + * ksz9477_move_entries_downwards - Move a range of ACL entries downwards in + * the list. + * @dev: Pointer to the KSZ device structure. + * @acles: Pointer to the structure encapsulating all the ACL entries. + * @start_idx: Starting index of the entries to be relocated. + * @num_entries_to_move: Number of consecutive entries to be relocated. + * @end_idx: Destination index where the first entry should be situated post + * relocation. + * + * This function is responsible for rearranging a specific block of ACL entries + * by shifting them downwards in the list based on the supplied source and + * destination indices. It ensures that the linkage between the ACL entries is + * maintained accurately after the relocation. + * + * Return: 0 on successful relocation of entries, otherwise returns a negative + * error code. + */ +static int ksz9477_move_entries_downwards(struct ksz_device *dev, + struct ksz9477_acl_entries *acles, + u16 start_idx, + u16 num_entries_to_move, + u16 end_idx) +{ + struct ksz9477_acl_entry *e; + int ret, i; + + for (i = start_idx; i < end_idx; i++) { + e = &acles->entries[i]; + *e = acles->entries[i + num_entries_to_move]; + + ret = ksz9477_acl_update_linkage(dev, &e->entry[0], + i + num_entries_to_move, i); + if (ret < 0) + return ret; + } + + return 0; +} + +/** + * ksz9477_move_entries_upwards - Move a range of ACL entries upwards in the + * list. + * @dev: Pointer to the KSZ device structure. + * @acles: Pointer to the structure holding all the ACL entries. + * @start_idx: The starting index of the entries to be moved. + * @num_entries_to_move: Number of contiguous entries to be moved. + * @target_idx: The destination index where the first entry should be placed + * after moving. + * + * This function rearranges a chunk of ACL entries by moving them upwards + * in the list based on the given source and destination indices. The reordering + * process preserves the linkage between entries by updating it accordingly. + * + * Return: 0 if the entries were successfully moved, otherwise a negative error + * code. + */ +static int ksz9477_move_entries_upwards(struct ksz_device *dev, + struct ksz9477_acl_entries *acles, + u16 start_idx, u16 num_entries_to_move, + u16 target_idx) +{ + struct ksz9477_acl_entry *e; + int ret, i, b; + + for (i = start_idx; i > target_idx; i--) { + b = i + num_entries_to_move - 1; + + e = &acles->entries[b]; + *e = acles->entries[i - 1]; + + ret = ksz9477_acl_update_linkage(dev, &e->entry[0], i - 1, b); + if (ret < 0) + return ret; + } + + return 0; +} + +/** + * ksz9477_acl_move_entries - Move a block of contiguous ACL entries from a + * source to a destination index. + * @dev: Pointer to the KSZ9477 device structure. + * @port: Port number. + * @src_idx: Index of the starting source ACL entry. + * @dst_idx: Index of the starting destination ACL entry. + * + * This function aims to move a block of contiguous ACL entries from the source + * index to the destination index while ensuring the integrity and validity of + * the ACL table. + * + * In case of any errors during the adjustments or copying, the function will + * restore the ACL entries to their original state from the backup. + * + * Return: 0 if the move operation is successful. Returns -EINVAL for validation + * errors or other error codes based on specific failure conditions. + */ +static int ksz9477_acl_move_entries(struct ksz_device *dev, int port, + u16 src_idx, u16 dst_idx) +{ + struct ksz9477_acl_entry buffer[KSZ9477_ACL_MAX_ENTRIES]; + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct ksz9477_acl_entries *acles = &acl->acles; + int src_count, ret, dst_count; + + ret = ksz9477_validate_and_get_src_count(dev, port, src_idx, dst_idx, + &src_count, &dst_count); + if (ret) + return ret; + + /* In case dst_index is greater than src_index, we need to adjust the + * destination index to account for the entries that will be moved + * downwards and the size of the entry located at dst_idx. + */ + if (dst_idx > src_idx) + dst_idx = dst_idx + dst_count - src_count; + + /* Copy source block to buffer and update its linkage */ + for (int i = 0; i < src_count; i++) { + buffer[i] = acles->entries[src_idx + i]; + ret = ksz9477_acl_update_linkage(dev, &buffer[i].entry[0], + src_idx + i, dst_idx + i); + if (ret < 0) + return ret; + } + + /* Adjust other entries and their linkage based on destination */ + if (dst_idx > src_idx) { + ret = ksz9477_move_entries_downwards(dev, acles, src_idx, + src_count, dst_idx); + } else { + ret = ksz9477_move_entries_upwards(dev, acles, src_idx, + src_count, dst_idx); + } + if (ret < 0) + return ret; + + /* Copy buffer to destination block */ + for (int i = 0; i < src_count; i++) + acles->entries[dst_idx + i] = buffer[i]; + + return 0; +} + +/** + * ksz9477_get_next_block_start - Identify the starting index of the next ACL + * block. + * @dev: Pointer to the device structure. + * @port: The port number on which the ACL entries are being checked. + * @start: The starting index from which the search begins. + * + * This function looks for the next valid ACL block starting from the provided + * 'start' index and returns the beginning index of that block. If the block is + * invalid or if it reaches the end of the ACL entries without finding another + * block, it returns the maximum ACL entries count. + * + * Returns: + * - The starting index of the next valid ACL block. + * - KSZ9477_ACL_MAX_ENTRIES if no other valid blocks are found after 'start'. + * - A negative error code if an error occurs while checking. + */ +static int ksz9477_get_next_block_start(struct ksz_device *dev, int port, + int start) +{ + int block_size; + + for (int i = start; i < KSZ9477_ACL_MAX_ENTRIES;) { + block_size = ksz9477_acl_get_cont_entr(dev, port, i); + if (block_size < 0 && block_size != -ENOTEMPTY) + return block_size; + + if (block_size > 0) + return i; + + i++; + } + return KSZ9477_ACL_MAX_ENTRIES; +} + +/** + * ksz9477_swap_acl_blocks - Swap two ACL blocks + * @dev: Pointer to the device structure. + * @port: The port number on which the ACL blocks are to be swapped. + * @i: The starting index of the first ACL block. + * @j: The starting index of the second ACL block. + * + * This function is used to swap two ACL blocks present at given indices. The + * main purpose is to aid in the sorting and reordering of ACL blocks based on + * certain criteria, e.g., priority. It checks the validity of the block at + * index 'i', ensuring it's not an empty block, and then proceeds to swap it + * with the block at index 'j'. + * + * Returns: + * - 0 on successful swapping of blocks. + * - -EINVAL if the block at index 'i' is empty. + * - A negative error code if any other error occurs during the swap. + */ +static int ksz9477_swap_acl_blocks(struct ksz_device *dev, int port, int i, + int j) +{ + int ret, current_block_size; + + current_block_size = ksz9477_acl_get_cont_entr(dev, port, i); + if (current_block_size < 0) + return current_block_size; + + if (!current_block_size) { + dev_err(dev->dev, "ACL: swapping empty entry %d\n", i); + return -EINVAL; + } + + ret = ksz9477_acl_move_entries(dev, port, i, j); + if (ret) + return ret; + + ret = ksz9477_acl_move_entries(dev, port, j - current_block_size, i); + if (ret) + return ret; + + return 0; +} + +/** + * ksz9477_sort_acl_entr_no_back - Sort ACL entries for a given port based on + * priority without backing up entries. + * @dev: Pointer to the device structure. + * @port: The port number whose ACL entries need to be sorted. + * + * This function sorts ACL entries of the specified port using a variant of the + * bubble sort algorithm. It operates on blocks of ACL entries rather than + * individual entries. Each block's starting point is identified and then + * compared with subsequent blocks based on their priority. If the current + * block has a lower priority than the subsequent block, the two blocks are + * swapped. + * + * This is done in order to maintain an organized order of ACL entries based on + * priority, ensuring efficient and predictable ACL rule application. + * + * Returns: + * - 0 on successful sorting of entries. + * - A negative error code if any issue arises during sorting, e.g., + * if the function is unable to get the next block start. + */ +static int ksz9477_sort_acl_entr_no_back(struct ksz_device *dev, int port) +{ + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct ksz9477_acl_entries *acles = &acl->acles; + struct ksz9477_acl_entry *curr, *next; + int i, j, ret; + + /* Bubble sort */ + for (i = 0; i < KSZ9477_ACL_MAX_ENTRIES;) { + curr = &acles->entries[i]; + + j = ksz9477_get_next_block_start(dev, port, i + 1); + if (j < 0) + return j; + + while (j < KSZ9477_ACL_MAX_ENTRIES) { + next = &acles->entries[j]; + + if (curr->prio > next->prio) { + ret = ksz9477_swap_acl_blocks(dev, port, i, j); + if (ret) + return ret; + } + + j = ksz9477_get_next_block_start(dev, port, j + 1); + if (j < 0) + return j; + } + + i = ksz9477_get_next_block_start(dev, port, i + 1); + if (i < 0) + return i; + } + + return 0; +} + +/** + * ksz9477_sort_acl_entries - Sort the ACL entries for a given port. + * @dev: Pointer to the KSZ device. + * @port: Port number. + * + * This function sorts the Access Control List (ACL) entries for a specified + * port. Before sorting, a backup of the original entries is created. If the + * sorting process fails, the function will log error messages displaying both + * the original and attempted sorted entries, and then restore the original + * entries from the backup. + * + * Return: 0 if the sorting succeeds, otherwise a negative error code. + */ +int ksz9477_sort_acl_entries(struct ksz_device *dev, int port) +{ + struct ksz9477_acl_entry backup[KSZ9477_ACL_MAX_ENTRIES]; + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct ksz9477_acl_entries *acles = &acl->acles; + int ret; + + /* create a backup of the ACL entries, if something goes wrong + * we can restore the ACL entries. + */ + memcpy(backup, acles->entries, sizeof(backup)); + + ret = ksz9477_sort_acl_entr_no_back(dev, port); + if (ret) { + dev_err(dev->dev, "ACL: failed to sort entries for port %d\n", + port); + dev_err(dev->dev, "ACL dump before sorting:\n"); + ksz9477_dump_acl(dev, backup); + dev_err(dev->dev, "ACL dump after sorting:\n"); + ksz9477_dump_acl(dev, acles->entries); + /* Restore the original entries */ + memcpy(acles->entries, backup, sizeof(backup)); + } + + return ret; +} + +/** + * ksz9477_acl_wait_ready - Waits for the ACL operation to complete on a given + * port. + * @dev: The ksz_device instance. + * @port: The port number to wait for. + * + * This function checks if the ACL write or read operation is completed by + * polling the specified register. + * + * Returns: 0 if the operation is successful, or a negative error code if an + * error occurs. + */ +static int ksz9477_acl_wait_ready(struct ksz_device *dev, int port) +{ + unsigned int wr_mask = KSZ9477_ACL_WRITE_DONE | KSZ9477_ACL_READ_DONE; + unsigned int val, reg; + int ret; + + reg = dev->dev_ops->get_port_addr(port, KSZ9477_PORT_ACL_CTRL_0); + + ret = regmap_read_poll_timeout(dev->regmap[0], reg, val, + (val & wr_mask) == wr_mask, 1000, 10000); + if (ret) + dev_err(dev->dev, "Failed to read/write ACL table\n"); + + return ret; +} + +/** + * ksz9477_acl_entry_write - Writes an ACL entry to a given port at the + * specified index. + * @dev: The ksz_device instance. + * @port: The port number to write the ACL entry to. + * @entry: A pointer to the ACL entry data. + * @idx: The index at which to write the ACL entry. + * + * This function writes the provided ACL entry to the specified port at the + * given index. + * + * Returns: 0 if the operation is successful, or a negative error code if an + * error occurs. + */ +static int ksz9477_acl_entry_write(struct ksz_device *dev, int port, u8 *entry, + int idx) +{ + int ret, i; + u8 val; + + for (i = 0; i < KSZ9477_ACL_ENTRY_SIZE; i++) { + ret = ksz_pwrite8(dev, port, KSZ9477_PORT_ACL_0 + i, entry[i]); + if (ret) { + dev_err(dev->dev, "Failed to write ACL entry %d\n", i); + return ret; + } + } + + /* write everything down */ + val = FIELD_PREP(KSZ9477_ACL_INDEX_M, idx) | KSZ9477_ACL_WRITE; + ret = ksz_pwrite8(dev, port, KSZ9477_PORT_ACL_CTRL_0, val); + if (ret) + return ret; + + /* wait until everything is written */ + return ksz9477_acl_wait_ready(dev, port); +} + +/** + * ksz9477_acl_port_enable - Enables ACL functionality on a given port. + * @dev: The ksz_device instance. + * @port: The port number on which to enable ACL functionality. + * + * This function enables ACL functionality on the specified port by configuring + * the appropriate control registers. It returns 0 if the operation is + * successful, or a negative error code if an error occurs. + * + * 0xn801 - KSZ9477S 5.2.8.2 Port Priority Control Register + * Bit 7 - Highest Priority + * Bit 6 - OR'ed Priority + * Bit 4 - MAC Address Priority Classification + * Bit 3 - VLAN Priority Classification + * Bit 2 - 802.1p Priority Classification + * Bit 1 - Diffserv Priority Classification + * Bit 0 - ACL Priority Classification + * + * Current driver implementation sets 802.1p priority classification by default. + * In this function we add ACL priority classification with OR'ed priority. + * According to testing, priority set by ACL will supersede the 802.1p priority. + * + * 0xn803 - KSZ9477S 5.2.8.4 Port Authentication Control Register + * Bit 2 - Access Control List (ACL) Enable + * Bits 1:0 - Authentication Mode + * 00 = Reserved + * 01 = Block Mode. Authentication is enabled. When ACL is + * enabled, all traffic that misses the ACL rules is + * blocked; otherwise ACL actions apply. + * 10 = Pass Mode. Authentication is disabled. When ACL is + * enabled, all traffic that misses the ACL rules is + * forwarded; otherwise ACL actions apply. + * 11 = Trap Mode. Authentication is enabled. All traffic is + * forwarded to the host port. When ACL is enabled, all + * traffic that misses the ACL rules is blocked; otherwise + * ACL actions apply. + * + * We are using Pass Mode int this function. + * + * Returns: 0 if the operation is successful, or a negative error code if an + * error occurs. + */ +static int ksz9477_acl_port_enable(struct ksz_device *dev, int port) +{ + int ret; + + ret = ksz_prmw8(dev, port, P_PRIO_CTRL, 0, PORT_ACL_PRIO_ENABLE | + PORT_OR_PRIO); + if (ret) + return ret; + + return ksz_pwrite8(dev, port, REG_PORT_MRI_AUTHEN_CTRL, + PORT_ACL_ENABLE | + FIELD_PREP(PORT_AUTHEN_MODE, PORT_AUTHEN_PASS)); +} + +/** + * ksz9477_acl_port_disable - Disables ACL functionality on a given port. + * @dev: The ksz_device instance. + * @port: The port number on which to disable ACL functionality. + * + * This function disables ACL functionality on the specified port by writing a + * value of 0 to the REG_PORT_MRI_AUTHEN_CTRL control register and remove + * PORT_ACL_PRIO_ENABLE bit from P_PRIO_CTRL register. + * + * Returns: 0 if the operation is successful, or a negative error code if an + * error occurs. + */ +static int ksz9477_acl_port_disable(struct ksz_device *dev, int port) +{ + int ret; + + ret = ksz_prmw8(dev, port, P_PRIO_CTRL, PORT_ACL_PRIO_ENABLE, 0); + if (ret) + return ret; + + return ksz_pwrite8(dev, port, REG_PORT_MRI_AUTHEN_CTRL, 0); +} + +/** + * ksz9477_acl_write_list - Write a list of ACL entries to a given port. + * @dev: The ksz_device instance. + * @port: The port number on which to write ACL entries. + * + * This function enables ACL functionality on the specified port, writes a list + * of ACL entries to the port, and disables ACL functionality if there are no + * entries. + * + * Returns: 0 if the operation is successful, or a negative error code if an + * error occurs. + */ +int ksz9477_acl_write_list(struct ksz_device *dev, int port) +{ + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct ksz9477_acl_entries *acles = &acl->acles; + int ret, i; + + /* ACL should be enabled before writing entries */ + ret = ksz9477_acl_port_enable(dev, port); + if (ret) + return ret; + + /* write all entries */ + for (i = 0; i < ARRAY_SIZE(acles->entries); i++) { + u8 *entry = acles->entries[i].entry; + + /* Check if entry was removed and should be zeroed. + * If last fields of the entry are not zero, it means + * it is removed locally but currently not synced with the HW. + * So, we will write it down to the HW to remove it. + */ + if (i >= acles->entries_count && + entry[KSZ9477_ACL_PORT_ACCESS_10] == 0 && + entry[KSZ9477_ACL_PORT_ACCESS_11] == 0) + continue; + + ret = ksz9477_acl_entry_write(dev, port, entry, i); + if (ret) + return ret; + + /* now removed entry is clean on HW side, so it can + * in the cache too + */ + if (i >= acles->entries_count && + entry[KSZ9477_ACL_PORT_ACCESS_10] != 0 && + entry[KSZ9477_ACL_PORT_ACCESS_11] != 0) { + entry[KSZ9477_ACL_PORT_ACCESS_10] = 0; + entry[KSZ9477_ACL_PORT_ACCESS_11] = 0; + } + } + + if (!acles->entries_count) + return ksz9477_acl_port_disable(dev, port); + + return 0; +} + +/** + * ksz9477_acl_remove_entries - Remove ACL entries with a given cookie from a + * specified ksz9477_acl_entries structure. + * @dev: The ksz_device instance. + * @port: The port number on which to remove ACL entries. + * @acles: The ksz9477_acl_entries instance. + * @cookie: The cookie value to match for entry removal. + * + * This function iterates through the entries array, removing any entries with + * a matching cookie value. The remaining entries are then shifted down to fill + * the gap. + */ +void ksz9477_acl_remove_entries(struct ksz_device *dev, int port, + struct ksz9477_acl_entries *acles, + unsigned long cookie) +{ + int entries_count = acles->entries_count; + int ret, i, src_count; + int src_idx = -1; + + if (!entries_count) + return; + + /* Search for the first position with the cookie */ + for (i = 0; i < entries_count; i++) { + if (acles->entries[i].cookie == cookie) { + src_idx = i; + break; + } + } + + /* No entries with the matching cookie found */ + if (src_idx == -1) + return; + + /* Get the size of the cookie entry. We may have complex entries. */ + src_count = ksz9477_acl_get_cont_entr(dev, port, src_idx); + if (src_count <= 0) + return; + + /* Move all entries down to overwrite removed entry with the cookie */ + ret = ksz9477_move_entries_downwards(dev, acles, src_idx, + src_count, + entries_count - src_count); + if (ret) { + dev_err(dev->dev, "Failed to move ACL entries down\n"); + return; + } + + /* Overwrite new empty places at the end of the list with zeros to make + * sure not unexpected things will happen or no unexplored quirks will + * come out. + */ + for (i = entries_count - src_count; i < entries_count; i++) { + struct ksz9477_acl_entry *entry = &acles->entries[i]; + + memset(entry, 0, sizeof(*entry)); + + /* Set all access bits to be able to write zeroed entry to HW */ + entry->entry[KSZ9477_ACL_PORT_ACCESS_10] = 0xff; + entry->entry[KSZ9477_ACL_PORT_ACCESS_11] = 0xff; + } + + /* Adjust the total entries count */ + acles->entries_count -= src_count; +} + +/** + * ksz9477_port_acl_init - Initialize the ACL for a specified port on a ksz + * device. + * @dev: The ksz_device instance. + * @port: The port number to initialize the ACL for. + * + * This function allocates memory for an acl structure, associates it with the + * specified port, and initializes the ACL entries to a default state. The + * entries are then written using the ksz9477_acl_write_list function, ensuring + * the ACL has a predictable initial hardware state. + * + * Returns: 0 on success, or an error code on failure. + */ +int ksz9477_port_acl_init(struct ksz_device *dev, int port) +{ + struct ksz9477_acl_entries *acles; + struct ksz9477_acl_priv *acl; + int ret, i; + + acl = kzalloc(sizeof(*acl), GFP_KERNEL); + if (!acl) + return -ENOMEM; + + dev->ports[port].acl_priv = acl; + + acles = &acl->acles; + /* write all entries */ + for (i = 0; i < ARRAY_SIZE(acles->entries); i++) { + u8 *entry = acles->entries[i].entry; + + /* Set all access bits to be able to write zeroed + * entry + */ + entry[KSZ9477_ACL_PORT_ACCESS_10] = 0xff; + entry[KSZ9477_ACL_PORT_ACCESS_11] = 0xff; + } + + ret = ksz9477_acl_write_list(dev, port); + if (ret) + goto free_acl; + + return 0; + +free_acl: + kfree(dev->ports[port].acl_priv); + dev->ports[port].acl_priv = NULL; + + return ret; +} + +/** + * ksz9477_port_acl_free - Free the ACL resources for a specified port on a ksz + * device. + * @dev: The ksz_device instance. + * @port: The port number to initialize the ACL for. + * + * This disables the ACL for the specified port and frees the associated memory, + */ +void ksz9477_port_acl_free(struct ksz_device *dev, int port) +{ + if (!dev->ports[port].acl_priv) + return; + + ksz9477_acl_port_disable(dev, port); + + kfree(dev->ports[port].acl_priv); + dev->ports[port].acl_priv = NULL; +} + +/** + * ksz9477_acl_set_reg - Set entry[16] and entry[17] depending on the updated + * entry[] + * @entry: An array containing the entries + * @reg: The register of the entry that needs to be updated + * @value: The value to be assigned to the updated entry + * + * This function updates the entry[] array based on the provided register and + * value. It also sets entry[0x10] and entry[0x11] according to the ACL byte + * enable rules. + * + * 0x10 - Byte Enable [15:8] + * + * Each bit enables accessing one of the ACL bytes when a read or write is + * initiated by writing to the Port ACL Byte Enable LSB Register. + * Bit 0 applies to the Port ACL Access 7 Register + * Bit 1 applies to the Port ACL Access 6 Register, etc. + * Bit 7 applies to the Port ACL Access 0 Register + * 1 = Byte is selected for read/write + * 0 = Byte is not selected + * + * 0x11 - Byte Enable [7:0] + * + * Each bit enables accessing one of the ACL bytes when a read or write is + * initiated by writing to the Port ACL Byte Enable LSB Register. + * Bit 0 applies to the Port ACL Access F Register + * Bit 1 applies to the Port ACL Access E Register, etc. + * Bit 7 applies to the Port ACL Access 8 Register + * 1 = Byte is selected for read/write + * 0 = Byte is not selected + */ +static void ksz9477_acl_set_reg(u8 *entry, enum ksz9477_acl_port_access reg, + u8 value) +{ + if (reg >= KSZ9477_ACL_PORT_ACCESS_0 && + reg <= KSZ9477_ACL_PORT_ACCESS_7) { + entry[KSZ9477_ACL_PORT_ACCESS_10] |= + BIT(KSZ9477_ACL_PORT_ACCESS_7 - reg); + } else if (reg >= KSZ9477_ACL_PORT_ACCESS_8 && + reg <= KSZ9477_ACL_PORT_ACCESS_F) { + entry[KSZ9477_ACL_PORT_ACCESS_11] |= + BIT(KSZ9477_ACL_PORT_ACCESS_F - reg); + } else { + WARN_ON(1); + return; + } + + entry[reg] = value; +} + +/** + * ksz9477_acl_matching_rule_cfg_l2 - Configure an ACL filtering entry to match + * L2 types of Ethernet frames + * @entry: Pointer to ACL entry buffer + * @ethertype: Ethertype value + * @eth_addr: Pointer to Ethernet address + * @is_src: If true, match the source MAC address; if false, match the + * destination MAC address + * + * This function configures an Access Control List (ACL) filtering + * entry to match Layer 2 types of Ethernet frames based on the provided + * ethertype and Ethernet address. Additionally, it can match either the source + * or destination MAC address depending on the value of the is_src parameter. + * + * Register Descriptions for MD = 01 and ENB != 00 (Layer 2 MAC header + * filtering) + * + * 0x01 - Mode and Enable + * Bits 5:4 - MD (Mode) + * 01 = Layer 2 MAC header or counter filtering + * Bits 3:2 - ENB (Enable) + * 01 = Comparison is performed only on the TYPE value + * 10 = Comparison is performed only on the MAC Address value + * 11 = Both the MAC Address and TYPE are tested + * Bit 1 - S/D (Source / Destination) + * 0 = Destination address + * 1 = Source address + * Bit 0 - EQ (Equal / Not Equal) + * 0 = Not Equal produces true result + * 1 = Equal produces true result + * + * 0x02-0x07 - MAC Address + * 0x02 - MAC Address [47:40] + * 0x03 - MAC Address [39:32] + * 0x04 - MAC Address [31:24] + * 0x05 - MAC Address [23:16] + * 0x06 - MAC Address [15:8] + * 0x07 - MAC Address [7:0] + * + * 0x08-0x09 - EtherType + * 0x08 - EtherType [15:8] + * 0x09 - EtherType [7:0] + */ +static void ksz9477_acl_matching_rule_cfg_l2(u8 *entry, u16 ethertype, + u8 *eth_addr, bool is_src) +{ + u8 enb = 0; + u8 val; + + if (ethertype) + enb |= KSZ9477_ACL_ENB_L2_TYPE; + if (eth_addr) + enb |= KSZ9477_ACL_ENB_L2_MAC; + + val = FIELD_PREP(KSZ9477_ACL_MD_MASK, KSZ9477_ACL_MD_L2_MAC) | + FIELD_PREP(KSZ9477_ACL_ENB_MASK, enb) | + FIELD_PREP(KSZ9477_ACL_SD_SRC, is_src) | KSZ9477_ACL_EQ_EQUAL; + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_1, val); + + if (eth_addr) { + int i; + + for (i = 0; i < ETH_ALEN; i++) { + ksz9477_acl_set_reg(entry, + KSZ9477_ACL_PORT_ACCESS_2 + i, + eth_addr[i]); + } + } + + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_8, ethertype >> 8); + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_9, ethertype & 0xff); +} + +/** + * ksz9477_acl_action_rule_cfg - Set action for an ACL entry + * @entry: Pointer to the ACL entry + * @force_prio: If true, force the priority value + * @prio_val: Priority value + * + * This function sets the action for the specified ACL entry. It prepares + * the priority mode and traffic class values and updates the entry's + * action registers accordingly. Currently, there is no port or VLAN PCP + * remapping. + * + * ACL Action Rule Parameters for Non-Count Modes (MD ≠01 or ENB ≠00) + * + * 0x0A - PM, P, RPE, RP[2:1] + * Bits 7:6 - PM[1:0] - Priority Mode + * 00 = ACL does not specify the packet priority. Priority is + * determined by standard QoS functions. + * 01 = Change packet priority to P[2:0] if it is greater than QoS + * result. + * 10 = Change packet priority to P[2:0] if it is smaller than the + * QoS result. + * 11 = Always change packet priority to P[2:0]. + * Bits 5:3 - P[2:0] - Priority value + * Bit 2 - RPE - Remark Priority Enable + * Bits 1:0 - RP[2:1] - Remarked Priority value (bits 2:1) + * 0 = Disable priority remarking + * 1 = Enable priority remarking. VLAN tag priority (PCP) bits are + * replaced by RP[2:0]. + * + * 0x0B - RP[0], MM + * Bit 7 - RP[0] - Remarked Priority value (bit 0) + * Bits 6:5 - MM[1:0] - Map Mode + * 00 = No forwarding remapping + * 01 = The forwarding map in FORWARD is OR'ed with the forwarding + * map from the Address Lookup Table. + * 10 = The forwarding map in FORWARD is AND'ed with the forwarding + * map from the Address Lookup Table. + * 11 = The forwarding map in FORWARD replaces the forwarding map + * from the Address Lookup Table. + * 0x0D - FORWARD[n:0] + * Bits 7:0 - FORWARD[n:0] - Forwarding map. Bit 0 = port 1, + * bit 1 = port 2, etc. + * 1 = enable forwarding to this port + * 0 = do not forward to this port + */ +void ksz9477_acl_action_rule_cfg(u8 *entry, bool force_prio, u8 prio_val) +{ + u8 prio_mode, val; + + if (force_prio) + prio_mode = KSZ9477_ACL_PM_REPLACE; + else + prio_mode = KSZ9477_ACL_PM_DISABLE; + + val = FIELD_PREP(KSZ9477_ACL_PM_M, prio_mode) | + FIELD_PREP(KSZ9477_ACL_P_M, prio_val); + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_A, val); + + /* no port or VLAN PCP remapping for now */ + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_B, 0); + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_D, 0); +} + +/** + * ksz9477_acl_processing_rule_set_action - Set the action for the processing + * rule set. + * @entry: Pointer to the ACL entry + * @action_idx: Index of the action to be applied + * + * This function sets the action for the processing rule set by updating the + * appropriate register in the entry. There can be only one action per + * processing rule. + * + * Access Control List (ACL) Processing Rule Registers: + * + * 0x00 - First Rule Number (FRN) + * Bits 3:0 - First Rule Number. Pointer to an Action rule entry. + */ +void ksz9477_acl_processing_rule_set_action(u8 *entry, u8 action_idx) +{ + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_0, action_idx); +} + +/** + * ksz9477_acl_processing_rule_add_match - Add a matching rule to the rule set + * @entry: Pointer to the ACL entry + * @match_idx: Index of the matching rule to be added + * + * This function adds a matching rule to the rule set by updating the + * appropriate bits in the entry's rule set registers. + * + * Access Control List (ACL) Processing Rule Registers: + * + * 0x0E - RuleSet [15:8] + * Bits 7:0 - RuleSet [15:8] Specifies a set of one or more Matching rule + * entries. RuleSet has one bit for each of the 16 Matching rule entries. + * If multiple Matching rules are selected, then all conditions will be + * AND'ed to produce a final match result. + * 0 = Matching rule not selected + * 1 = Matching rule selected + * + * 0x0F - RuleSet [7:0] + * Bits 7:0 - RuleSet [7:0] + */ +static void ksz9477_acl_processing_rule_add_match(u8 *entry, u8 match_idx) +{ + u8 vale = entry[KSZ9477_ACL_PORT_ACCESS_E]; + u8 valf = entry[KSZ9477_ACL_PORT_ACCESS_F]; + + if (match_idx < 8) + valf |= BIT(match_idx); + else + vale |= BIT(match_idx - 8); + + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_E, vale); + ksz9477_acl_set_reg(entry, KSZ9477_ACL_PORT_ACCESS_F, valf); +} + +/** + * ksz9477_acl_get_init_entry - Get a new uninitialized entry for a specified + * port on a ksz_device. + * @dev: The ksz_device instance. + * @port: The port number to get the uninitialized entry for. + * @cookie: The cookie to associate with the entry. + * @prio: The priority to associate with the entry. + * + * This function retrieves the next available ACL entry for the specified port, + * clears all access flags, and associates it with the current cookie. + * + * Returns: A pointer to the new uninitialized ACL entry. + */ +static struct ksz9477_acl_entry * +ksz9477_acl_get_init_entry(struct ksz_device *dev, int port, + unsigned long cookie, u32 prio) +{ + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct ksz9477_acl_entries *acles = &acl->acles; + struct ksz9477_acl_entry *entry; + + entry = &acles->entries[acles->entries_count]; + entry->cookie = cookie; + entry->prio = prio; + + /* clear all access flags */ + entry->entry[KSZ9477_ACL_PORT_ACCESS_10] = 0; + entry->entry[KSZ9477_ACL_PORT_ACCESS_11] = 0; + + return entry; +} + +/** + * ksz9477_acl_match_process_l2 - Configure Layer 2 ACL matching rules and + * processing rules. + * @dev: Pointer to the ksz_device. + * @port: Port number. + * @ethtype: Ethernet type. + * @src_mac: Source MAC address. + * @dst_mac: Destination MAC address. + * @cookie: The cookie to associate with the entry. + * @prio: The priority of the entry. + * + * This function sets up matching and processing rules for Layer 2 ACLs. + * It takes into account that only one MAC per entry is supported. + */ +void ksz9477_acl_match_process_l2(struct ksz_device *dev, int port, + u16 ethtype, u8 *src_mac, u8 *dst_mac, + unsigned long cookie, u32 prio) +{ + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct ksz9477_acl_entries *acles = &acl->acles; + struct ksz9477_acl_entry *entry; + + entry = ksz9477_acl_get_init_entry(dev, port, cookie, prio); + + /* ACL supports only one MAC per entry */ + if (src_mac && dst_mac) { + ksz9477_acl_matching_rule_cfg_l2(entry->entry, ethtype, src_mac, + true); + + /* Add both match entries to first processing rule */ + ksz9477_acl_processing_rule_add_match(entry->entry, + acles->entries_count); + acles->entries_count++; + ksz9477_acl_processing_rule_add_match(entry->entry, + acles->entries_count); + + entry = ksz9477_acl_get_init_entry(dev, port, cookie, prio); + ksz9477_acl_matching_rule_cfg_l2(entry->entry, 0, dst_mac, + false); + acles->entries_count++; + } else { + u8 *mac = src_mac ? src_mac : dst_mac; + bool is_src = src_mac ? true : false; + + ksz9477_acl_matching_rule_cfg_l2(entry->entry, ethtype, mac, + is_src); + ksz9477_acl_processing_rule_add_match(entry->entry, + acles->entries_count); + acles->entries_count++; + } +} diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index cba3dba58bc3..504e085aab52 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -112,19 +112,6 @@ #define REG_SW_IBA_SYNC__1 0x010C -#define REG_SW_IO_STRENGTH__1 0x010D -#define SW_DRIVE_STRENGTH_M 0x7 -#define SW_DRIVE_STRENGTH_2MA 0 -#define SW_DRIVE_STRENGTH_4MA 1 -#define SW_DRIVE_STRENGTH_8MA 2 -#define SW_DRIVE_STRENGTH_12MA 3 -#define SW_DRIVE_STRENGTH_16MA 4 -#define SW_DRIVE_STRENGTH_20MA 5 -#define SW_DRIVE_STRENGTH_24MA 6 -#define SW_DRIVE_STRENGTH_28MA 7 -#define SW_HI_SPEED_DRIVE_STRENGTH_S 4 -#define SW_LO_SPEED_DRIVE_STRENGTH_S 0 - #define REG_SW_IBA_STATUS__4 0x0110 #define SW_IBA_REQ BIT(31) diff --git a/drivers/net/dsa/microchip/ksz9477_tc_flower.c b/drivers/net/dsa/microchip/ksz9477_tc_flower.c new file mode 100644 index 000000000000..8b2f5be667e0 --- /dev/null +++ b/drivers/net/dsa/microchip/ksz9477_tc_flower.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2023 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + +#include "ksz9477.h" +#include "ksz9477_reg.h" +#include "ksz_common.h" + +#define ETHER_TYPE_FULL_MASK cpu_to_be16(~0) +#define KSZ9477_MAX_TC 7 + +/** + * ksz9477_flower_parse_key_l2 - Parse Layer 2 key from flow rule and configure + * ACL entries accordingly. + * @dev: Pointer to the ksz_device. + * @port: Port number. + * @extack: Pointer to the netlink_ext_ack. + * @rule: Pointer to the flow_rule. + * @cookie: The cookie to associate with the entry. + * @prio: The priority of the entry. + * + * This function parses the Layer 2 key from the flow rule and configures + * the corresponding ACL entries. It checks for unsupported offloads and + * available entries before proceeding with the configuration. + * + * Returns: 0 on success or a negative error code on failure. + */ +static int ksz9477_flower_parse_key_l2(struct ksz_device *dev, int port, + struct netlink_ext_ack *extack, + struct flow_rule *rule, + unsigned long cookie, u32 prio) +{ + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + struct flow_match_eth_addrs ematch; + struct ksz9477_acl_entries *acles; + int required_entries; + u8 *src_mac = NULL; + u8 *dst_mac = NULL; + u16 ethtype = 0; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + if (match.key->n_proto) { + if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) { + NL_SET_ERR_MSG_MOD(extack, + "ethernet type mask must be a full mask"); + return -EINVAL; + } + + ethtype = be16_to_cpu(match.key->n_proto); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + flow_rule_match_eth_addrs(rule, &ematch); + + if (!is_zero_ether_addr(ematch.key->src)) { + if (!is_broadcast_ether_addr(ematch.mask->src)) + goto not_full_mask_err; + + src_mac = ematch.key->src; + } + + if (!is_zero_ether_addr(ematch.key->dst)) { + if (!is_broadcast_ether_addr(ematch.mask->dst)) + goto not_full_mask_err; + + dst_mac = ematch.key->dst; + } + } + + acles = &acl->acles; + /* ACL supports only one MAC per entry */ + required_entries = src_mac && dst_mac ? 2 : 1; + + /* Check if there are enough available entries */ + if (acles->entries_count + required_entries > KSZ9477_ACL_MAX_ENTRIES) { + NL_SET_ERR_MSG_MOD(extack, "ACL entry limit reached"); + return -EOPNOTSUPP; + } + + ksz9477_acl_match_process_l2(dev, port, ethtype, src_mac, dst_mac, + cookie, prio); + + return 0; + +not_full_mask_err: + NL_SET_ERR_MSG_MOD(extack, "MAC address mask must be a full mask"); + return -EOPNOTSUPP; +} + +/** + * ksz9477_flower_parse_key - Parse flow rule keys for a specified port on a + * ksz_device. + * @dev: The ksz_device instance. + * @port: The port number to parse the flow rule keys for. + * @extack: The netlink extended ACK for reporting errors. + * @rule: The flow_rule to parse. + * @cookie: The cookie to associate with the entry. + * @prio: The priority of the entry. + * + * This function checks if the used keys in the flow rule are supported by + * the device and parses the L2 keys if they match. If unsupported keys are + * used, an error message is set in the extended ACK. + * + * Returns: 0 on success or a negative error code on failure. + */ +static int ksz9477_flower_parse_key(struct ksz_device *dev, int port, + struct netlink_ext_ack *extack, + struct flow_rule *rule, + unsigned long cookie, u32 prio) +{ + struct flow_dissector *dissector = rule->match.dissector; + int ret; + + if (dissector->used_keys & + ~(BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) | + BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported keys used"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + ret = ksz9477_flower_parse_key_l2(dev, port, extack, rule, + cookie, prio); + if (ret) + return ret; + } + + return 0; +} + +/** + * ksz9477_flower_parse_action - Parse flow rule actions for a specified port + * on a ksz_device. + * @dev: The ksz_device instance. + * @port: The port number to parse the flow rule actions for. + * @extack: The netlink extended ACK for reporting errors. + * @cls: The flow_cls_offload instance containing the flow rule. + * @entry_idx: The index of the ACL entry to store the action. + * + * This function checks if the actions in the flow rule are supported by + * the device. Currently, only actions that change priorities are supported. + * If unsupported actions are encountered, an error message is set in the + * extended ACK. + * + * Returns: 0 on success or a negative error code on failure. + */ +static int ksz9477_flower_parse_action(struct ksz_device *dev, int port, + struct netlink_ext_ack *extack, + struct flow_cls_offload *cls, + int entry_idx) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct ksz9477_acl_priv *acl = dev->ports[port].acl_priv; + const struct flow_action_entry *act; + struct ksz9477_acl_entry *entry; + bool prio_force = false; + u8 prio_val = 0; + int i; + + if (TC_H_MIN(cls->classid)) { + NL_SET_ERR_MSG_MOD(extack, "hw_tc is not supported. Use: action skbedit prio"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_PRIORITY: + if (act->priority > KSZ9477_MAX_TC) { + NL_SET_ERR_MSG_MOD(extack, "Priority value is too high"); + return -EOPNOTSUPP; + } + prio_force = true; + prio_val = act->priority; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "action not supported"); + return -EOPNOTSUPP; + } + } + + /* pick entry to store action */ + entry = &acl->acles.entries[entry_idx]; + + ksz9477_acl_action_rule_cfg(entry->entry, prio_force, prio_val); + ksz9477_acl_processing_rule_set_action(entry->entry, entry_idx); + + return 0; +} + +/** + * ksz9477_cls_flower_add - Add a flow classification rule for a specified port + * on a ksz_device. + * @ds: The DSA switch instance. + * @port: The port number to add the flow classification rule to. + * @cls: The flow_cls_offload instance containing the flow rule. + * @ingress: A flag indicating if the rule is applied on the ingress path. + * + * This function adds a flow classification rule for a specified port on a + * ksz_device. It checks if the ACL offloading is supported and parses the flow + * keys and actions. If the ACL is not supported, it returns an error. If there + * are unprocessed entries, it parses the action for the rule. + * + * Returns: 0 on success or a negative error code on failure. + */ +int ksz9477_cls_flower_add(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct netlink_ext_ack *extack = cls->common.extack; + struct ksz_device *dev = ds->priv; + struct ksz9477_acl_priv *acl; + int action_entry_idx; + int ret; + + acl = dev->ports[port].acl_priv; + + if (!acl) { + NL_SET_ERR_MSG_MOD(extack, "ACL offloading is not supported"); + return -EOPNOTSUPP; + } + + /* A complex rule set can take multiple entries. Use first entry + * to store the action. + */ + action_entry_idx = acl->acles.entries_count; + + ret = ksz9477_flower_parse_key(dev, port, extack, rule, cls->cookie, + cls->common.prio); + if (ret) + return ret; + + ret = ksz9477_flower_parse_action(dev, port, extack, cls, + action_entry_idx); + if (ret) + return ret; + + ret = ksz9477_sort_acl_entries(dev, port); + if (ret) + return ret; + + return ksz9477_acl_write_list(dev, port); +} + +/** + * ksz9477_cls_flower_del - Remove a flow classification rule for a specified + * port on a ksz_device. + * @ds: The DSA switch instance. + * @port: The port number to remove the flow classification rule from. + * @cls: The flow_cls_offload instance containing the flow rule. + * @ingress: A flag indicating if the rule is applied on the ingress path. + * + * This function removes a flow classification rule for a specified port on a + * ksz_device. It checks if the ACL is initialized, and if not, returns an + * error. If the ACL is initialized, it removes entries with the specified + * cookie and rewrites the ACL list. + * + * Returns: 0 on success or a negative error code on failure. + */ +int ksz9477_cls_flower_del(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress) +{ + unsigned long cookie = cls->cookie; + struct ksz_device *dev = ds->priv; + struct ksz9477_acl_priv *acl; + + acl = dev->ports[port].acl_priv; + + if (!acl) + return -EOPNOTSUPP; + + ksz9477_acl_remove_entries(dev, port, &acl->acles, cookie); + + return ksz9477_acl_write_list(dev, port); +} diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 42db7679c360..173ad8f04671 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -186,6 +186,72 @@ static const struct ksz_mib_names ksz9477_mib_names[] = { { 0x83, "tx_discards" }, }; +struct ksz_driver_strength_prop { + const char *name; + int offset; + int value; +}; + +enum ksz_driver_strength_type { + KSZ_DRIVER_STRENGTH_HI, + KSZ_DRIVER_STRENGTH_LO, + KSZ_DRIVER_STRENGTH_IO, +}; + +/** + * struct ksz_drive_strength - drive strength mapping + * @reg_val: register value + * @microamp: microamp value + */ +struct ksz_drive_strength { + u32 reg_val; + u32 microamp; +}; + +/* ksz9477_drive_strengths - Drive strength mapping for KSZ9477 variants + * + * This values are not documented in KSZ9477 variants but confirmed by + * Microchip that KSZ9477, KSZ9567, KSZ8567, KSZ9897, KSZ9896, KSZ9563, KSZ9893 + * and KSZ8563 are using same register (drive strength) settings like KSZ8795. + * + * Documentation in KSZ8795CLX provides more information with some + * recommendations: + * - for high speed signals + * 1. 4 mA or 8 mA is often used for MII, RMII, and SPI interface with using + * 2.5V or 3.3V VDDIO. + * 2. 12 mA or 16 mA is often used for MII, RMII, and SPI interface with + * using 1.8V VDDIO. + * 3. 20 mA or 24 mA is often used for GMII/RGMII interface with using 2.5V + * or 3.3V VDDIO. + * 4. 28 mA is often used for GMII/RGMII interface with using 1.8V VDDIO. + * 5. In same interface, the heavy loading should use higher one of the + * drive current strength. + * - for low speed signals + * 1. 3.3V VDDIO, use either 4 mA or 8 mA. + * 2. 2.5V VDDIO, use either 8 mA or 12 mA. + * 3. 1.8V VDDIO, use either 12 mA or 16 mA. + * 4. If it is heavy loading, can use higher drive current strength. + */ +static const struct ksz_drive_strength ksz9477_drive_strengths[] = { + { SW_DRIVE_STRENGTH_2MA, 2000 }, + { SW_DRIVE_STRENGTH_4MA, 4000 }, + { SW_DRIVE_STRENGTH_8MA, 8000 }, + { SW_DRIVE_STRENGTH_12MA, 12000 }, + { SW_DRIVE_STRENGTH_16MA, 16000 }, + { SW_DRIVE_STRENGTH_20MA, 20000 }, + { SW_DRIVE_STRENGTH_24MA, 24000 }, + { SW_DRIVE_STRENGTH_28MA, 28000 }, +}; + +/* ksz8830_drive_strengths - Drive strength mapping for KSZ8830, KSZ8873, .. + * variants. + * This values are documented in KSZ8873 and KSZ8863 datasheets. + */ +static const struct ksz_drive_strength ksz8830_drive_strengths[] = { + { 0, 8000 }, + { KSZ8873_DRIVE_STRENGTH_16MA, 16000 }, +}; + static const struct ksz_dev_ops ksz8_dev_ops = { .setup = ksz8_setup, .get_port_addr = ksz8_get_port_addr, @@ -2498,8 +2564,7 @@ static int ksz_port_mdb_del(struct dsa_switch *ds, int port, return dev->dev_ops->mdb_del(dev, port, mdb, db); } -static int ksz_enable_port(struct dsa_switch *ds, int port, - struct phy_device *phy) +static int ksz_port_setup(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; @@ -2562,6 +2627,23 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) ksz_update_port_member(dev, port); } +static void ksz_port_teardown(struct dsa_switch *ds, int port) +{ + struct ksz_device *dev = ds->priv; + + switch (dev->chip_id) { + case KSZ8563_CHIP_ID: + case KSZ9477_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9893_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: + if (dsa_is_user_port(ds, port)) + ksz9477_port_acl_free(dev, port); + } +} + static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack) @@ -3107,6 +3189,44 @@ static int ksz_switch_detect(struct ksz_device *dev) return 0; } +static int ksz_cls_flower_add(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress) +{ + struct ksz_device *dev = ds->priv; + + switch (dev->chip_id) { + case KSZ8563_CHIP_ID: + case KSZ9477_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9893_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: + return ksz9477_cls_flower_add(ds, port, cls, ingress); + } + + return -EOPNOTSUPP; +} + +static int ksz_cls_flower_del(struct dsa_switch *ds, int port, + struct flow_cls_offload *cls, bool ingress) +{ + struct ksz_device *dev = ds->priv; + + switch (dev->chip_id) { + case KSZ8563_CHIP_ID: + case KSZ9477_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9893_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: + return ksz9477_cls_flower_del(ds, port, cls, ingress); + } + + return -EOPNOTSUPP; +} + /* Bandwidth is calculated by idle slope/transmission speed. Then the Bandwidth * is converted to Hex-decimal using the successive multiplication method. On * every step, integer part is taken and decimal part is carry forwarded. @@ -3431,7 +3551,7 @@ static const struct dsa_switch_ops ksz_switch_ops = { .phylink_mac_config = ksz_phylink_mac_config, .phylink_mac_link_up = ksz_phylink_mac_link_up, .phylink_mac_link_down = ksz_mac_link_down, - .port_enable = ksz_enable_port, + .port_setup = ksz_port_setup, .set_ageing_time = ksz_set_ageing_time, .get_strings = ksz_get_strings, .get_ethtool_stats = ksz_get_ethtool_stats, @@ -3439,6 +3559,7 @@ static const struct dsa_switch_ops ksz_switch_ops = { .port_bridge_join = ksz_port_bridge_join, .port_bridge_leave = ksz_port_bridge_leave, .port_stp_state_set = ksz_port_stp_state_set, + .port_teardown = ksz_port_teardown, .port_pre_bridge_flags = ksz_port_pre_bridge_flags, .port_bridge_flags = ksz_port_bridge_flags, .port_fast_age = ksz_port_fast_age, @@ -3461,6 +3582,8 @@ static const struct dsa_switch_ops ksz_switch_ops = { .port_hwtstamp_set = ksz_hwtstamp_set, .port_txtstamp = ksz_port_txtstamp, .port_rxtstamp = ksz_port_rxtstamp, + .cls_flower_add = ksz_cls_flower_add, + .cls_flower_del = ksz_cls_flower_del, .port_setup_tc = ksz_setup_tc, .get_mac_eee = ksz_get_mac_eee, .set_mac_eee = ksz_set_mac_eee, @@ -3530,6 +3653,245 @@ static void ksz_parse_rgmii_delay(struct ksz_device *dev, int port_num, dev->ports[port_num].rgmii_tx_val = tx_delay; } +/** + * ksz_drive_strength_to_reg() - Convert drive strength value to corresponding + * register value. + * @array: The array of drive strength values to search. + * @array_size: The size of the array. + * @microamp: The drive strength value in microamp to be converted. + * + * This function searches the array of drive strength values for the given + * microamp value and returns the corresponding register value for that drive. + * + * Returns: If found, the corresponding register value for that drive strength + * is returned. Otherwise, -EINVAL is returned indicating an invalid value. + */ +static int ksz_drive_strength_to_reg(const struct ksz_drive_strength *array, + size_t array_size, int microamp) +{ + int i; + + for (i = 0; i < array_size; i++) { + if (array[i].microamp == microamp) + return array[i].reg_val; + } + + return -EINVAL; +} + +/** + * ksz_drive_strength_error() - Report invalid drive strength value + * @dev: ksz device + * @array: The array of drive strength values to search. + * @array_size: The size of the array. + * @microamp: Invalid drive strength value in microamp + * + * This function logs an error message when an unsupported drive strength value + * is detected. It lists out all the supported drive strength values for + * reference in the error message. + */ +static void ksz_drive_strength_error(struct ksz_device *dev, + const struct ksz_drive_strength *array, + size_t array_size, int microamp) +{ + char supported_values[100]; + size_t remaining_size; + int added_len; + char *ptr; + int i; + + remaining_size = sizeof(supported_values); + ptr = supported_values; + + for (i = 0; i < array_size; i++) { + added_len = snprintf(ptr, remaining_size, + i == 0 ? "%d" : ", %d", array[i].microamp); + + if (added_len >= remaining_size) + break; + + ptr += added_len; + remaining_size -= added_len; + } + + dev_err(dev->dev, "Invalid drive strength %d, supported values are %s\n", + microamp, supported_values); +} + +/** + * ksz9477_drive_strength_write() - Set the drive strength for specific KSZ9477 + * chip variants. + * @dev: ksz device + * @props: Array of drive strength properties to be applied + * @num_props: Number of properties in the array + * + * This function configures the drive strength for various KSZ9477 chip variants + * based on the provided properties. It handles chip-specific nuances and + * ensures only valid drive strengths are written to the respective chip. + * + * Return: 0 on successful configuration, a negative error code on failure. + */ +static int ksz9477_drive_strength_write(struct ksz_device *dev, + struct ksz_driver_strength_prop *props, + int num_props) +{ + size_t array_size = ARRAY_SIZE(ksz9477_drive_strengths); + int i, ret, reg; + u8 mask = 0; + u8 val = 0; + + if (props[KSZ_DRIVER_STRENGTH_IO].value != -1) + dev_warn(dev->dev, "%s is not supported by this chip variant\n", + props[KSZ_DRIVER_STRENGTH_IO].name); + + if (dev->chip_id == KSZ8795_CHIP_ID || + dev->chip_id == KSZ8794_CHIP_ID || + dev->chip_id == KSZ8765_CHIP_ID) + reg = KSZ8795_REG_SW_CTRL_20; + else + reg = KSZ9477_REG_SW_IO_STRENGTH; + + for (i = 0; i < num_props; i++) { + if (props[i].value == -1) + continue; + + ret = ksz_drive_strength_to_reg(ksz9477_drive_strengths, + array_size, props[i].value); + if (ret < 0) { + ksz_drive_strength_error(dev, ksz9477_drive_strengths, + array_size, props[i].value); + return ret; + } + + mask |= SW_DRIVE_STRENGTH_M << props[i].offset; + val |= ret << props[i].offset; + } + + return ksz_rmw8(dev, reg, mask, val); +} + +/** + * ksz8830_drive_strength_write() - Set the drive strength configuration for + * KSZ8830 compatible chip variants. + * @dev: ksz device + * @props: Array of drive strength properties to be set + * @num_props: Number of properties in the array + * + * This function applies the specified drive strength settings to KSZ8830 chip + * variants (KSZ8873, KSZ8863). + * It ensures the configurations align with what the chip variant supports and + * warns or errors out on unsupported settings. + * + * Return: 0 on success, error code otherwise + */ +static int ksz8830_drive_strength_write(struct ksz_device *dev, + struct ksz_driver_strength_prop *props, + int num_props) +{ + size_t array_size = ARRAY_SIZE(ksz8830_drive_strengths); + int microamp; + int i, ret; + + for (i = 0; i < num_props; i++) { + if (props[i].value == -1 || i == KSZ_DRIVER_STRENGTH_IO) + continue; + + dev_warn(dev->dev, "%s is not supported by this chip variant\n", + props[i].name); + } + + microamp = props[KSZ_DRIVER_STRENGTH_IO].value; + ret = ksz_drive_strength_to_reg(ksz8830_drive_strengths, array_size, + microamp); + if (ret < 0) { + ksz_drive_strength_error(dev, ksz8830_drive_strengths, + array_size, microamp); + return ret; + } + + return ksz_rmw8(dev, KSZ8873_REG_GLOBAL_CTRL_12, + KSZ8873_DRIVE_STRENGTH_16MA, ret); +} + +/** + * ksz_parse_drive_strength() - Extract and apply drive strength configurations + * from device tree properties. + * @dev: ksz device + * + * This function reads the specified drive strength properties from the + * device tree, validates against the supported chip variants, and sets + * them accordingly. An error should be critical here, as the drive strength + * settings are crucial for EMI compliance. + * + * Return: 0 on success, error code otherwise + */ +static int ksz_parse_drive_strength(struct ksz_device *dev) +{ + struct ksz_driver_strength_prop of_props[] = { + [KSZ_DRIVER_STRENGTH_HI] = { + .name = "microchip,hi-drive-strength-microamp", + .offset = SW_HI_SPEED_DRIVE_STRENGTH_S, + .value = -1, + }, + [KSZ_DRIVER_STRENGTH_LO] = { + .name = "microchip,lo-drive-strength-microamp", + .offset = SW_LO_SPEED_DRIVE_STRENGTH_S, + .value = -1, + }, + [KSZ_DRIVER_STRENGTH_IO] = { + .name = "microchip,io-drive-strength-microamp", + .offset = 0, /* don't care */ + .value = -1, + }, + }; + struct device_node *np = dev->dev->of_node; + bool have_any_prop = false; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(of_props); i++) { + ret = of_property_read_u32(np, of_props[i].name, + &of_props[i].value); + if (ret && ret != -EINVAL) + dev_warn(dev->dev, "Failed to read %s\n", + of_props[i].name); + if (ret) + continue; + + have_any_prop = true; + } + + if (!have_any_prop) + return 0; + + switch (dev->chip_id) { + case KSZ8830_CHIP_ID: + return ksz8830_drive_strength_write(dev, of_props, + ARRAY_SIZE(of_props)); + case KSZ8795_CHIP_ID: + case KSZ8794_CHIP_ID: + case KSZ8765_CHIP_ID: + case KSZ8563_CHIP_ID: + case KSZ9477_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9893_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: + return ksz9477_drive_strength_write(dev, of_props, + ARRAY_SIZE(of_props)); + default: + for (i = 0; i < ARRAY_SIZE(of_props); i++) { + if (of_props[i].value == -1) + continue; + + dev_warn(dev->dev, "%s is not supported by this chip variant\n", + of_props[i].name); + } + } + + return 0; +} + int ksz_switch_register(struct ksz_device *dev) { const struct ksz_chip_data *info; @@ -3612,6 +3974,10 @@ int ksz_switch_register(struct ksz_device *dev) for (port_num = 0; port_num < dev->info->port_cnt; ++port_num) dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA; if (dev->dev->of_node) { + ret = ksz_parse_drive_strength(dev); + if (ret) + return ret; + ret = of_get_phy_mode(dev->dev->of_node, &interface); if (ret == 0) dev->compat_interface = interface; diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index a4de58847dea..d180c8a34e27 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -117,6 +117,7 @@ struct ksz_port { u32 rgmii_tx_val; u32 rgmii_rx_val; struct ksz_device *ksz_dev; + void *acl_priv; struct ksz_irq pirq; u8 num; #if IS_ENABLED(CONFIG_NET_DSA_MICROCHIP_KSZ_PTP) @@ -689,6 +690,26 @@ static inline int is_lan937x(struct ksz_device *dev) #define KSZ8_LEGAL_PACKET_SIZE 1518 #define KSZ9477_MAX_FRAME_SIZE 9000 +#define KSZ8873_REG_GLOBAL_CTRL_12 0x0e +/* Drive Strength of I/O Pad + * 0: 8mA, 1: 16mA + */ +#define KSZ8873_DRIVE_STRENGTH_16MA BIT(6) + +#define KSZ8795_REG_SW_CTRL_20 0xa3 +#define KSZ9477_REG_SW_IO_STRENGTH 0x010d +#define SW_DRIVE_STRENGTH_M 0x7 +#define SW_DRIVE_STRENGTH_2MA 0 +#define SW_DRIVE_STRENGTH_4MA 1 +#define SW_DRIVE_STRENGTH_8MA 2 +#define SW_DRIVE_STRENGTH_12MA 3 +#define SW_DRIVE_STRENGTH_16MA 4 +#define SW_DRIVE_STRENGTH_20MA 5 +#define SW_DRIVE_STRENGTH_24MA 6 +#define SW_DRIVE_STRENGTH_28MA 7 +#define SW_HI_SPEED_DRIVE_STRENGTH_S 4 +#define SW_LO_SPEED_DRIVE_STRENGTH_S 0 + #define KSZ9477_REG_PORT_OUT_RATE_0 0x0420 #define KSZ9477_OUT_RATE_NO_LIMIT 0 diff --git a/drivers/net/dsa/mt7530-mmio.c b/drivers/net/dsa/mt7530-mmio.c index 0a6a2fe34e64..b74a230a3f13 100644 --- a/drivers/net/dsa/mt7530-mmio.c +++ b/drivers/net/dsa/mt7530-mmio.c @@ -63,15 +63,12 @@ mt7988_probe(struct platform_device *pdev) return dsa_register_switch(priv->ds); } -static int -mt7988_remove(struct platform_device *pdev) +static void mt7988_remove(struct platform_device *pdev) { struct mt7530_priv *priv = platform_get_drvdata(pdev); if (priv) mt7530_remove_common(priv); - - return 0; } static void mt7988_shutdown(struct platform_device *pdev) @@ -88,7 +85,7 @@ static void mt7988_shutdown(struct platform_device *pdev) static struct platform_driver mt7988_platform_driver = { .probe = mt7988_probe, - .remove = mt7988_remove, + .remove_new = mt7988_remove, .shutdown = mt7988_shutdown, .driver = { .name = "mt7530-mmio", diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c index ba373656bfe1..9a8429f5d09c 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c @@ -208,7 +208,7 @@ static void mv88e639x_sgmii_pcs_pre_config(struct phylink_pcs *pcs, static int mv88e6390_erratum_3_14(struct mv88e639x_pcs *mpcs) { - const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1, + static const int lanes[] = { MV88E6390_PORT9_LANE0, MV88E6390_PORT9_LANE1, MV88E6390_PORT9_LANE2, MV88E6390_PORT9_LANE3, MV88E6390_PORT10_LANE0, MV88E6390_PORT10_LANE1, MV88E6390_PORT10_LANE2, MV88E6390_PORT10_LANE3 }; diff --git a/drivers/net/dsa/ocelot/ocelot_ext.c b/drivers/net/dsa/ocelot/ocelot_ext.c index c29bee5a5c48..22187d831c4b 100644 --- a/drivers/net/dsa/ocelot/ocelot_ext.c +++ b/drivers/net/dsa/ocelot/ocelot_ext.c @@ -115,19 +115,17 @@ err_free_felix: return err; } -static int ocelot_ext_remove(struct platform_device *pdev) +static void ocelot_ext_remove(struct platform_device *pdev) { struct felix *felix = dev_get_drvdata(&pdev->dev); if (!felix) - return 0; + return; dsa_unregister_switch(felix->ds); kfree(felix->ds); kfree(felix); - - return 0; } static void ocelot_ext_shutdown(struct platform_device *pdev) @@ -154,7 +152,7 @@ static struct platform_driver ocelot_ext_switch_driver = { .of_match_table = ocelot_ext_switch_of_match, }, .probe = ocelot_ext_probe, - .remove = ocelot_ext_remove, + .remove_new = ocelot_ext_remove, .shutdown = ocelot_ext_shutdown, }; module_platform_driver(ocelot_ext_switch_driver); diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 8f912bda120b..049930da0521 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1029,19 +1029,17 @@ err_alloc_felix: return err; } -static int seville_remove(struct platform_device *pdev) +static void seville_remove(struct platform_device *pdev) { struct felix *felix = platform_get_drvdata(pdev); if (!felix) - return 0; + return; dsa_unregister_switch(felix->ds); kfree(felix->ds); kfree(felix); - - return 0; } static void seville_shutdown(struct platform_device *pdev) @@ -1064,7 +1062,7 @@ MODULE_DEVICE_TABLE(of, seville_of_match); static struct platform_driver seville_vsc9953_driver = { .probe = seville_probe, - .remove = seville_remove, + .remove_new = seville_remove, .shutdown = seville_shutdown, .driver = { .name = "mscc_seville", diff --git a/drivers/net/dsa/realtek/realtek-smi.c b/drivers/net/dsa/realtek/realtek-smi.c index ff13563059c5..bfd11591faf4 100644 --- a/drivers/net/dsa/realtek/realtek-smi.c +++ b/drivers/net/dsa/realtek/realtek-smi.c @@ -506,12 +506,12 @@ static int realtek_smi_probe(struct platform_device *pdev) return 0; } -static int realtek_smi_remove(struct platform_device *pdev) +static void realtek_smi_remove(struct platform_device *pdev) { struct realtek_priv *priv = platform_get_drvdata(pdev); if (!priv) - return 0; + return; dsa_unregister_switch(priv->ds); if (priv->slave_mii_bus) @@ -520,8 +520,6 @@ static int realtek_smi_remove(struct platform_device *pdev) /* leave the device reset asserted */ if (priv->reset) gpiod_set_value(priv->reset, 1); - - return 0; } static void realtek_smi_shutdown(struct platform_device *pdev) @@ -559,7 +557,7 @@ static struct platform_driver realtek_smi_driver = { .of_match_table = realtek_smi_of_match, }, .probe = realtek_smi_probe, - .remove = realtek_smi_remove, + .remove_new = realtek_smi_remove, .shutdown = realtek_smi_shutdown, }; module_platform_driver(realtek_smi_driver); diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c index 7868ef237f6c..b39b719a5b8f 100644 --- a/drivers/net/dsa/realtek/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -95,12 +95,6 @@ #define RTL8366RB_PAACR_RX_PAUSE BIT(6) #define RTL8366RB_PAACR_AN BIT(7) -#define RTL8366RB_PAACR_CPU_PORT (RTL8366RB_PAACR_SPEED_1000M | \ - RTL8366RB_PAACR_FULL_DUPLEX | \ - RTL8366RB_PAACR_LINK_UP | \ - RTL8366RB_PAACR_TX_PAUSE | \ - RTL8366RB_PAACR_RX_PAUSE) - /* bits 0..7 = port 0, bits 8..15 = port 1 */ #define RTL8366RB_PSTAT0 0x0014 /* bits 0..7 = port 2, bits 8..15 = port 3 */ @@ -1081,29 +1075,61 @@ rtl8366rb_mac_link_up(struct dsa_switch *ds, int port, unsigned int mode, int speed, int duplex, bool tx_pause, bool rx_pause) { struct realtek_priv *priv = ds->priv; + unsigned int val; int ret; + /* Allow forcing the mode on the fixed CPU port, no autonegotiation. + * We assume autonegotiation works on the PHY-facing ports. + */ if (port != priv->cpu_port) return; dev_dbg(priv->dev, "MAC link up on CPU port (%d)\n", port); - /* Force the fixed CPU port into 1Gbit mode, no autonegotiation */ ret = regmap_update_bits(priv->map, RTL8366RB_MAC_FORCE_CTRL_REG, BIT(port), BIT(port)); if (ret) { - dev_err(priv->dev, "failed to force 1Gbit on CPU port\n"); + dev_err(priv->dev, "failed to force CPU port\n"); return; } + /* Conjure port config */ + switch (speed) { + case SPEED_10: + val = RTL8366RB_PAACR_SPEED_10M; + break; + case SPEED_100: + val = RTL8366RB_PAACR_SPEED_100M; + break; + case SPEED_1000: + val = RTL8366RB_PAACR_SPEED_1000M; + break; + default: + val = RTL8366RB_PAACR_SPEED_1000M; + break; + } + + if (duplex == DUPLEX_FULL) + val |= RTL8366RB_PAACR_FULL_DUPLEX; + + if (tx_pause) + val |= RTL8366RB_PAACR_TX_PAUSE; + + if (rx_pause) + val |= RTL8366RB_PAACR_RX_PAUSE; + + val |= RTL8366RB_PAACR_LINK_UP; + ret = regmap_update_bits(priv->map, RTL8366RB_PAACR2, 0xFF00U, - RTL8366RB_PAACR_CPU_PORT << 8); + val << 8); if (ret) { dev_err(priv->dev, "failed to set PAACR on CPU port\n"); return; } + dev_dbg(priv->dev, "set PAACR to %04x\n", val); + /* Enable the CPU port */ ret = regmap_update_bits(priv->map, RTL8366RB_PECR, BIT(port), 0); diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c index 2eda10b33f2e..10092ea85e46 100644 --- a/drivers/net/dsa/rzn1_a5psw.c +++ b/drivers/net/dsa/rzn1_a5psw.c @@ -1272,19 +1272,17 @@ free_pcs: return ret; } -static int a5psw_remove(struct platform_device *pdev) +static void a5psw_remove(struct platform_device *pdev) { struct a5psw *a5psw = platform_get_drvdata(pdev); if (!a5psw) - return 0; + return; dsa_unregister_switch(&a5psw->ds); a5psw_pcs_free(a5psw); clk_disable_unprepare(a5psw->hclk); clk_disable_unprepare(a5psw->clk); - - return 0; } static void a5psw_shutdown(struct platform_device *pdev) @@ -1311,7 +1309,7 @@ static struct platform_driver a5psw_driver = { .of_match_table = a5psw_of_mtable, }, .probe = a5psw_probe, - .remove = a5psw_remove, + .remove_new = a5psw_remove, .shutdown = a5psw_shutdown, }; module_platform_driver(a5psw_driver); diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c index e3699f76f6d7..08a3e7b96254 100644 --- a/drivers/net/dsa/sja1105/sja1105_clocking.c +++ b/drivers/net/dsa/sja1105/sja1105_clocking.c @@ -153,14 +153,14 @@ static int sja1105_cgu_mii_tx_clk_config(struct sja1105_private *priv, { const struct sja1105_regs *regs = priv->info->regs; struct sja1105_cgu_mii_ctrl mii_tx_clk; - const int mac_clk_sources[] = { + static const int mac_clk_sources[] = { CLKSRC_MII0_TX_CLK, CLKSRC_MII1_TX_CLK, CLKSRC_MII2_TX_CLK, CLKSRC_MII3_TX_CLK, CLKSRC_MII4_TX_CLK, }; - const int phy_clk_sources[] = { + static const int phy_clk_sources[] = { CLKSRC_IDIV0, CLKSRC_IDIV1, CLKSRC_IDIV2, @@ -194,7 +194,7 @@ sja1105_cgu_mii_rx_clk_config(struct sja1105_private *priv, int port) const struct sja1105_regs *regs = priv->info->regs; struct sja1105_cgu_mii_ctrl mii_rx_clk; u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0}; - const int clk_sources[] = { + static const int clk_sources[] = { CLKSRC_MII0_RX_CLK, CLKSRC_MII1_RX_CLK, CLKSRC_MII2_RX_CLK, @@ -221,7 +221,7 @@ sja1105_cgu_mii_ext_tx_clk_config(struct sja1105_private *priv, int port) const struct sja1105_regs *regs = priv->info->regs; struct sja1105_cgu_mii_ctrl mii_ext_tx_clk; u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0}; - const int clk_sources[] = { + static const int clk_sources[] = { CLKSRC_IDIV0, CLKSRC_IDIV1, CLKSRC_IDIV2, @@ -248,7 +248,7 @@ sja1105_cgu_mii_ext_rx_clk_config(struct sja1105_private *priv, int port) const struct sja1105_regs *regs = priv->info->regs; struct sja1105_cgu_mii_ctrl mii_ext_rx_clk; u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0}; - const int clk_sources[] = { + static const int clk_sources[] = { CLKSRC_IDIV0, CLKSRC_IDIV1, CLKSRC_IDIV2, @@ -349,8 +349,13 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv, if (speed == priv->info->port_speed[SJA1105_SPEED_1000MBPS]) { clksrc = CLKSRC_PLL0; } else { - int clk_sources[] = {CLKSRC_IDIV0, CLKSRC_IDIV1, CLKSRC_IDIV2, - CLKSRC_IDIV3, CLKSRC_IDIV4}; + static const int clk_sources[] = { + CLKSRC_IDIV0, + CLKSRC_IDIV1, + CLKSRC_IDIV2, + CLKSRC_IDIV3, + CLKSRC_IDIV4, + }; clksrc = clk_sources[port]; } @@ -638,7 +643,7 @@ static int sja1105_cgu_rmii_ref_clk_config(struct sja1105_private *priv, const struct sja1105_regs *regs = priv->info->regs; struct sja1105_cgu_mii_ctrl ref_clk; u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0}; - const int clk_sources[] = { + static const int clk_sources[] = { CLKSRC_MII0_TX_CLK, CLKSRC_MII1_TX_CLK, CLKSRC_MII2_TX_CLK, diff --git a/drivers/net/dsa/vitesse-vsc73xx-platform.c b/drivers/net/dsa/vitesse-vsc73xx-platform.c index bd4206e8f9af..755b7895a15a 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-platform.c +++ b/drivers/net/dsa/vitesse-vsc73xx-platform.c @@ -112,16 +112,14 @@ static int vsc73xx_platform_probe(struct platform_device *pdev) return vsc73xx_probe(&vsc_platform->vsc); } -static int vsc73xx_platform_remove(struct platform_device *pdev) +static void vsc73xx_platform_remove(struct platform_device *pdev) { struct vsc73xx_platform *vsc_platform = platform_get_drvdata(pdev); if (!vsc_platform) - return 0; + return; vsc73xx_remove(&vsc_platform->vsc); - - return 0; } static void vsc73xx_platform_shutdown(struct platform_device *pdev) @@ -160,7 +158,7 @@ MODULE_DEVICE_TABLE(of, vsc73xx_of_match); static struct platform_driver vsc73xx_platform_driver = { .probe = vsc73xx_platform_probe, - .remove = vsc73xx_platform_remove, + .remove_new = vsc73xx_platform_remove, .shutdown = vsc73xx_platform_shutdown, .driver = { .name = "vsc73xx-platform", diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index af603256b724..2874680ef24d 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -811,7 +811,7 @@ static int ax_init_dev(struct net_device *dev) return ret; } -static int ax_remove(struct platform_device *pdev) +static void ax_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct ei_device *ei_local = netdev_priv(dev); @@ -832,8 +832,6 @@ static int ax_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); free_netdev(dev); - - return 0; } /* @@ -1011,7 +1009,7 @@ static struct platform_driver axdrv = { .name = "ax88796", }, .probe = ax_probe, - .remove = ax_remove, + .remove_new = ax_remove, .suspend = ax_suspend, .resume = ax_resume, }; diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c index 217838b28220..5a0fa995e643 100644 --- a/drivers/net/ethernet/8390/mcf8390.c +++ b/drivers/net/ethernet/8390/mcf8390.c @@ -441,7 +441,7 @@ static int mcf8390_probe(struct platform_device *pdev) return 0; } -static int mcf8390_remove(struct platform_device *pdev) +static void mcf8390_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct resource *mem; @@ -450,7 +450,6 @@ static int mcf8390_remove(struct platform_device *pdev) mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(mem->start, resource_size(mem)); free_netdev(dev); - return 0; } static struct platform_driver mcf8390_drv = { @@ -458,7 +457,7 @@ static struct platform_driver mcf8390_drv = { .name = "mcf8390", }, .probe = mcf8390_probe, - .remove = mcf8390_remove, + .remove_new = mcf8390_remove, }; module_platform_driver(mcf8390_drv); diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index 7d89ec1cf273..350683a09d2e 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -823,7 +823,7 @@ static int __init ne_drv_probe(struct platform_device *pdev) return 0; } -static int ne_drv_remove(struct platform_device *pdev) +static void ne_drv_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); @@ -842,7 +842,6 @@ static int ne_drv_remove(struct platform_device *pdev) release_region(dev->base_addr, NE_IO_EXTENT); free_netdev(dev); } - return 0; } /* Remove unused devices or all if true. */ @@ -895,7 +894,7 @@ static int ne_drv_resume(struct platform_device *pdev) #endif static struct platform_driver ne_driver = { - .remove = ne_drv_remove, + .remove_new = ne_drv_remove, .suspend = ne_drv_suspend, .resume = ne_drv_resume, .driver = { diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c index c6f8f852bff1..e03193da5874 100644 --- a/drivers/net/ethernet/actions/owl-emac.c +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -1582,15 +1582,13 @@ static int owl_emac_probe(struct platform_device *pdev) return 0; } -static int owl_emac_remove(struct platform_device *pdev) +static void owl_emac_remove(struct platform_device *pdev) { struct owl_emac_priv *priv = platform_get_drvdata(pdev); netif_napi_del(&priv->napi); phy_disconnect(priv->netdev->phydev); cancel_work_sync(&priv->mac_reset_task); - - return 0; } static const struct of_device_id owl_emac_of_match[] = { @@ -1609,7 +1607,7 @@ static struct platform_driver owl_emac_driver = { .pm = &owl_emac_pm_ops, }, .probe = owl_emac_probe, - .remove = owl_emac_remove, + .remove_new = owl_emac_remove, }; module_platform_driver(owl_emac_driver); diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 597a02c75d52..27af7746d645 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1525,7 +1525,7 @@ error1: return err; } -static int greth_of_remove(struct platform_device *of_dev) +static void greth_of_remove(struct platform_device *of_dev) { struct net_device *ndev = platform_get_drvdata(of_dev); struct greth_private *greth = netdev_priv(ndev); @@ -1544,8 +1544,6 @@ static int greth_of_remove(struct platform_device *of_dev) of_iounmap(&of_dev->resource[0], greth->regs, resource_size(&of_dev->resource[0])); free_netdev(ndev); - - return 0; } static const struct of_device_id greth_of_match[] = { @@ -1566,7 +1564,7 @@ static struct platform_driver greth_of_driver = { .of_match_table = greth_of_match, }, .probe = greth_of_probe, - .remove = greth_of_remove, + .remove_new = greth_of_remove, }; module_platform_driver(greth_of_driver); diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index a94c62956eed..d761c08fe5c1 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -1083,7 +1083,7 @@ out: return ret; } -static int emac_remove(struct platform_device *pdev) +static void emac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct emac_board_info *db = netdev_priv(ndev); @@ -1101,7 +1101,6 @@ static int emac_remove(struct platform_device *pdev) free_netdev(ndev); dev_dbg(&pdev->dev, "released and freed device\n"); - return 0; } static int emac_suspend(struct platform_device *dev, pm_message_t state) @@ -1143,7 +1142,7 @@ static struct platform_driver emac_driver = { .of_match_table = emac_of_match, }, .probe = emac_probe, - .remove = emac_remove, + .remove_new = emac_remove, .suspend = emac_suspend, .resume = emac_resume, }; diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 2e15800e5310..1b1799985d1d 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1464,7 +1464,7 @@ err_free_netdev: /* Remove Altera TSE MAC device */ -static int altera_tse_remove(struct platform_device *pdev) +static void altera_tse_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct altera_tse_private *priv = netdev_priv(ndev); @@ -1476,8 +1476,6 @@ static int altera_tse_remove(struct platform_device *pdev) lynx_pcs_destroy(priv->pcs); free_netdev(ndev); - - return 0; } static const struct altera_dmaops altera_dtype_sgdma = { @@ -1528,7 +1526,7 @@ MODULE_DEVICE_TABLE(of, altera_tse_ids); static struct platform_driver altera_tse_driver = { .probe = altera_tse_probe, - .remove = altera_tse_remove, + .remove_new = altera_tse_remove, .suspend = NULL, .resume = NULL, .driver = { diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index c5cec4e79489..85c978149bf6 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1323,7 +1323,7 @@ out: return err; } -static int au1000_remove(struct platform_device *pdev) +static void au1000_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct au1000_private *aup = netdev_priv(dev); @@ -1359,13 +1359,11 @@ static int au1000_remove(struct platform_device *pdev) release_mem_region(macen->start, resource_size(macen)); free_netdev(dev); - - return 0; } static struct platform_driver au1000_eth_driver = { .probe = au1000_probe, - .remove = au1000_remove, + .remove_new = au1000_remove, .driver = { .name = "au1000-eth", }, diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 36f9b932b9e2..2a8643e167e1 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -445,12 +445,13 @@ int pdsc_setup(struct pdsc *pdsc, bool init) goto err_out_teardown; /* Set up the VIFs */ - err = pdsc_viftypes_init(pdsc); - if (err) - goto err_out_teardown; + if (init) { + err = pdsc_viftypes_init(pdsc); + if (err) + goto err_out_teardown; - if (init) pdsc_debugfs_add_viftype(pdsc); + } clear_bit(PDSC_S_FW_DEAD, &pdsc->state); return 0; @@ -469,8 +470,10 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) pdsc_qcq_free(pdsc, &pdsc->notifyqcq); pdsc_qcq_free(pdsc, &pdsc->adminqcq); - kfree(pdsc->viftype_status); - pdsc->viftype_status = NULL; + if (removing) { + kfree(pdsc->viftype_status); + pdsc->viftype_status = NULL; + } if (pdsc->intr_info) { for (i = 0; i < pdsc->nintrs; i++) @@ -512,7 +515,7 @@ void pdsc_stop(struct pdsc *pdsc) PDS_CORE_INTR_MASK_SET); } -static void pdsc_fw_down(struct pdsc *pdsc) +void pdsc_fw_down(struct pdsc *pdsc) { union pds_core_notifyq_comp reset_event = { .reset.ecode = cpu_to_le16(PDS_EVENT_RESET), @@ -520,10 +523,13 @@ static void pdsc_fw_down(struct pdsc *pdsc) }; if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) { - dev_err(pdsc->dev, "%s: already happening\n", __func__); + dev_warn(pdsc->dev, "%s: already happening\n", __func__); return; } + if (pdsc->pdev->is_virtfn) + return; + /* Notify clients of fw_down */ if (pdsc->fw_reporter) devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); @@ -533,7 +539,7 @@ static void pdsc_fw_down(struct pdsc *pdsc) pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY); } -static void pdsc_fw_up(struct pdsc *pdsc) +void pdsc_fw_up(struct pdsc *pdsc) { union pds_core_notifyq_comp reset_event = { .reset.ecode = cpu_to_le16(PDS_EVENT_RESET), @@ -546,6 +552,11 @@ static void pdsc_fw_up(struct pdsc *pdsc) return; } + if (pdsc->pdev->is_virtfn) { + clear_bit(PDSC_S_FW_DEAD, &pdsc->state); + return; + } + err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY); if (err) goto err_out; @@ -567,6 +578,18 @@ err_out: pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY); } +static void pdsc_check_pci_health(struct pdsc *pdsc) +{ + u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + + /* is PCI broken? */ + if (fw_status != PDS_RC_BAD_PCI) + return; + + pdsc_reset_prepare(pdsc->pdev); + pdsc_reset_done(pdsc->pdev); +} + void pdsc_health_thread(struct work_struct *work) { struct pdsc *pdsc = container_of(work, struct pdsc, health_work); @@ -593,6 +616,8 @@ void pdsc_health_thread(struct work_struct *work) pdsc_fw_down(pdsc); } + pdsc_check_pci_health(pdsc); + pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION; out_unlock: diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index e545fafc4819..f3a7deda9972 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -283,6 +283,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc); int pdsc_dev_reinit(struct pdsc *pdsc); int pdsc_dev_init(struct pdsc *pdsc); +void pdsc_reset_prepare(struct pci_dev *pdev); +void pdsc_reset_done(struct pci_dev *pdev); + int pdsc_intr_alloc(struct pdsc *pdsc, char *name, irq_handler_t handler, void *data); void pdsc_intr_free(struct pdsc *pdsc, int index); @@ -309,4 +312,8 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data); int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw, struct netlink_ext_ack *extack); + +void pdsc_fw_down(struct pdsc *pdsc); +void pdsc_fw_up(struct pdsc *pdsc); + #endif /* _PDSC_H_ */ diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index f77cd9f5a2fd..7c1b965d61a9 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -42,6 +42,8 @@ int pdsc_err_to_errno(enum pds_core_status_code code) return -ERANGE; case PDS_RC_BAD_ADDR: return -EFAULT; + case PDS_RC_BAD_PCI: + return -ENXIO; case PDS_RC_EOPCODE: case PDS_RC_EINTR: case PDS_RC_DEV_CMD: @@ -62,7 +64,7 @@ bool pdsc_is_fw_running(struct pdsc *pdsc) /* Firmware is useful only if the running bit is set and * fw_status != 0xff (bad PCI read) */ - return (pdsc->fw_status != 0xff) && + return (pdsc->fw_status != PDS_RC_BAD_PCI) && (pdsc->fw_status & PDS_CORE_FW_STS_F_RUNNING); } @@ -128,6 +130,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) unsigned long max_wait; unsigned long duration; int timeout = 0; + bool running; int done = 0; int err = 0; int status; @@ -136,6 +139,10 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) max_wait = start_time + (max_seconds * HZ); while (!done && !timeout) { + running = pdsc_is_fw_running(pdsc); + if (!running) + break; + done = pdsc_devcmd_done(pdsc); if (done) break; @@ -152,7 +159,7 @@ static int pdsc_devcmd_wait(struct pdsc *pdsc, u8 opcode, int max_seconds) dev_dbg(dev, "DEVCMD %d %s after %ld secs\n", opcode, pdsc_devcmd_str(opcode), duration / HZ); - if (!done || timeout) { + if ((!done || timeout) && running) { dev_err(dev, "DEVCMD %d %s timeout, done %d timeout %d max_seconds=%d\n", opcode, pdsc_devcmd_str(opcode), done, timeout, max_seconds); diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 3a45bf474a19..3080898d7b95 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -445,12 +445,62 @@ static void pdsc_remove(struct pci_dev *pdev) devlink_free(dl); } +void pdsc_reset_prepare(struct pci_dev *pdev) +{ + struct pdsc *pdsc = pci_get_drvdata(pdev); + + pdsc_fw_down(pdsc); + + pci_free_irq_vectors(pdev); + pdsc_unmap_bars(pdsc); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +void pdsc_reset_done(struct pci_dev *pdev) +{ + struct pdsc *pdsc = pci_get_drvdata(pdev); + struct device *dev = pdsc->dev; + int err; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Cannot enable PCI device: %pe\n", ERR_PTR(err)); + return; + } + pci_set_master(pdev); + + if (!pdev->is_virtfn) { + pcie_print_link_status(pdsc->pdev); + + err = pci_request_regions(pdsc->pdev, PDS_CORE_DRV_NAME); + if (err) { + dev_err(pdsc->dev, "Cannot request PCI regions: %pe\n", + ERR_PTR(err)); + return; + } + + err = pdsc_map_bars(pdsc); + if (err) + return; + } + + pdsc_fw_up(pdsc); +} + +static const struct pci_error_handlers pdsc_err_handler = { + /* FLR handling */ + .reset_prepare = pdsc_reset_prepare, + .reset_done = pdsc_reset_done, +}; + static struct pci_driver pdsc_driver = { .name = PDS_CORE_DRV_NAME, .id_table = pdsc_id_table, .probe = pdsc_probe, .remove = pdsc_remove, .sriov_configure = pdsc_sriov_configure, + .err_handler = &pdsc_err_handler, }; void *pdsc_get_pf_struct(struct pci_dev *vf_pdev) diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 33bb539ad70a..c78706d21a6a 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1487,7 +1487,7 @@ static int sunlance_sbus_probe(struct platform_device *op) return err; } -static int sunlance_sbus_remove(struct platform_device *op) +static void sunlance_sbus_remove(struct platform_device *op) { struct lance_private *lp = platform_get_drvdata(op); struct net_device *net_dev = lp->dev; @@ -1497,8 +1497,6 @@ static int sunlance_sbus_remove(struct platform_device *op) lance_free_hwresources(lp); free_netdev(net_dev); - - return 0; } static const struct of_device_id sunlance_sbus_match[] = { @@ -1516,7 +1514,7 @@ static struct platform_driver sunlance_sbus_driver = { .of_match_table = sunlance_sbus_match, }, .probe = sunlance_sbus_probe, - .remove = sunlance_sbus_remove, + .remove_new = sunlance_sbus_remove, }; module_platform_driver(sunlance_sbus_driver); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c index 4d790a89fe77..91842a5e161b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -512,7 +512,7 @@ err_alloc: return ret; } -static int xgbe_platform_remove(struct platform_device *pdev) +static void xgbe_platform_remove(struct platform_device *pdev) { struct xgbe_prv_data *pdata = platform_get_drvdata(pdev); @@ -521,8 +521,6 @@ static int xgbe_platform_remove(struct platform_device *pdev) platform_device_put(pdata->phy_platdev); xgbe_free_pdata(pdata); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -615,7 +613,7 @@ static struct platform_driver xgbe_driver = { .pm = &xgbe_platform_pm_ops, }, .probe = xgbe_platform_probe, - .remove = xgbe_platform_remove, + .remove_new = xgbe_platform_remove, }; int xgbe_platform_init(void) diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c index 379d19d18dbe..9e90c2381491 100644 --- a/drivers/net/ethernet/apm/xgene-v2/main.c +++ b/drivers/net/ethernet/apm/xgene-v2/main.c @@ -690,7 +690,7 @@ err: return ret; } -static int xge_remove(struct platform_device *pdev) +static void xge_remove(struct platform_device *pdev) { struct xge_pdata *pdata; struct net_device *ndev; @@ -706,8 +706,6 @@ static int xge_remove(struct platform_device *pdev) xge_mdio_remove(ndev); unregister_netdev(ndev); free_netdev(ndev); - - return 0; } static void xge_shutdown(struct platform_device *pdev) @@ -736,7 +734,7 @@ static struct platform_driver xge_driver = { .acpi_match_table = ACPI_PTR(xge_acpi_match), }, .probe = xge_probe, - .remove = xge_remove, + .remove_new = xge_remove, .shutdown = xge_shutdown, }; module_platform_driver(xge_driver); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index f3305c434c95..91b4ab31f233 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2127,7 +2127,7 @@ err: return ret; } -static int xgene_enet_remove(struct platform_device *pdev) +static void xgene_enet_remove(struct platform_device *pdev) { struct xgene_enet_pdata *pdata; struct net_device *ndev; @@ -2149,8 +2149,6 @@ static int xgene_enet_remove(struct platform_device *pdev) xgene_enet_delete_desc_rings(pdata); pdata->port_ops->shutdown(pdata); free_netdev(ndev); - - return 0; } static void xgene_enet_shutdown(struct platform_device *pdev) @@ -2174,7 +2172,7 @@ static struct platform_driver xgene_enet_driver = { .acpi_match_table = ACPI_PTR(xgene_enet_acpi_match), }, .probe = xgene_enet_probe, - .remove = xgene_enet_remove, + .remove_new = xgene_enet_remove, .shutdown = xgene_enet_shutdown, }; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index 8775c3234e91..766ab78256fe 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -739,7 +739,7 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Macintosh MACE ethernet driver"); MODULE_ALIAS("platform:macmace"); -static int mac_mace_device_remove(struct platform_device *pdev) +static void mac_mace_device_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct mace_data *mp = netdev_priv(dev); @@ -755,13 +755,11 @@ static int mac_mace_device_remove(struct platform_device *pdev) mp->tx_ring, mp->tx_ring_phys); free_netdev(dev); - - return 0; } static struct platform_driver mac_mace_driver = { .probe = mace_probe, - .remove = mac_mace_device_remove, + .remove_new = mac_mace_device_remove, .driver = { .name = mac_mace_string, }, diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c index ce3147e886a1..a3afddb23ee8 100644 --- a/drivers/net/ethernet/arc/emac_arc.c +++ b/drivers/net/ethernet/arc/emac_arc.c @@ -58,14 +58,12 @@ out_netdev: return err; } -static int emac_arc_remove(struct platform_device *pdev) +static void emac_arc_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); arc_emac_remove(ndev); free_netdev(ndev); - - return 0; } static const struct of_device_id emac_arc_dt_ids[] = { @@ -76,7 +74,7 @@ MODULE_DEVICE_TABLE(of, emac_arc_dt_ids); static struct platform_driver emac_arc_driver = { .probe = emac_arc_probe, - .remove = emac_arc_remove, + .remove_new = emac_arc_remove, .driver = { .name = DRV_NAME, .of_match_table = emac_arc_dt_ids, diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index 509101112279..493d6356c8ca 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -244,7 +244,7 @@ out_netdev: return err; } -static int emac_rockchip_remove(struct platform_device *pdev) +static void emac_rockchip_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct rockchip_priv_data *priv = netdev_priv(ndev); @@ -260,12 +260,11 @@ static int emac_rockchip_remove(struct platform_device *pdev) clk_disable_unprepare(priv->macclk); free_netdev(ndev); - return 0; } static struct platform_driver emac_rockchip_driver = { .probe = emac_rockchip_probe, - .remove = emac_rockchip_remove, + .remove_new = emac_rockchip_remove, .driver = { .name = DRV_NAME, .of_match_table = emac_rockchip_dt_ids, diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 009e0b3066fa..0f2f400b5bc4 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1968,21 +1968,19 @@ err_put_clk: return err; } -static int ag71xx_remove(struct platform_device *pdev) +static void ag71xx_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct ag71xx *ag; if (!ndev) - return 0; + return; ag = netdev_priv(ndev); unregister_netdev(ndev); ag71xx_mdio_remove(ag); clk_disable_unprepare(ag->clk_eth); platform_set_drvdata(pdev, NULL); - - return 0; } static const u32 ar71xx_fifo_ar7100[] = { @@ -2069,7 +2067,7 @@ static const struct of_device_id ag71xx_match[] = { static struct platform_driver ag71xx_driver = { .probe = ag71xx_probe, - .remove = ag71xx_remove, + .remove_new = ag71xx_remove, .driver = { .name = "ag71xx", .of_match_table = ag71xx_match, diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h index 43d821fe7a54..63ba64dbb731 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c.h +++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h @@ -504,15 +504,12 @@ struct atl1c_rrd_ring { u16 next_to_use; u16 next_to_clean; struct napi_struct napi; - struct page *rx_page; - unsigned int rx_page_offset; }; /* board specific private data structure */ struct atl1c_adapter { struct net_device *netdev; struct pci_dev *pdev; - unsigned int rx_frag_size; struct atl1c_hw hw; struct atl1c_hw_stats hw_stats; struct mii_if_info mii; /* MII interface info */ diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 940c5d1ff9cf..74b78164cf74 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -483,15 +483,10 @@ static int atl1c_set_mac_addr(struct net_device *netdev, void *p) static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter, struct net_device *dev) { - unsigned int head_size; int mtu = dev->mtu; adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ? roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE; - - head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - adapter->rx_frag_size = roundup_pow_of_two(head_size); } static netdev_features_t atl1c_fix_features(struct net_device *netdev, @@ -964,7 +959,6 @@ static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter) static void atl1c_free_ring_resources(struct atl1c_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; - int i; dma_free_coherent(&pdev->dev, adapter->ring_header.size, adapter->ring_header.desc, adapter->ring_header.dma); @@ -977,12 +971,6 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter) kfree(adapter->tpd_ring[0].buffer_info); adapter->tpd_ring[0].buffer_info = NULL; } - for (i = 0; i < adapter->rx_queue_count; ++i) { - if (adapter->rrd_ring[i].rx_page) { - put_page(adapter->rrd_ring[i].rx_page); - adapter->rrd_ring[i].rx_page = NULL; - } - } } /** @@ -1754,48 +1742,11 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter, skb_checksum_none_assert(skb); } -static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter, - u32 queue, bool napi_mode) -{ - struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue]; - struct sk_buff *skb; - struct page *page; - - if (adapter->rx_frag_size > PAGE_SIZE) { - if (likely(napi_mode)) - return napi_alloc_skb(&rrd_ring->napi, - adapter->rx_buffer_len); - else - return netdev_alloc_skb_ip_align(adapter->netdev, - adapter->rx_buffer_len); - } - - page = rrd_ring->rx_page; - if (!page) { - page = alloc_page(GFP_ATOMIC); - if (unlikely(!page)) - return NULL; - rrd_ring->rx_page = page; - rrd_ring->rx_page_offset = 0; - } - - skb = build_skb(page_address(page) + rrd_ring->rx_page_offset, - adapter->rx_frag_size); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - rrd_ring->rx_page_offset += adapter->rx_frag_size; - if (rrd_ring->rx_page_offset >= PAGE_SIZE) - rrd_ring->rx_page = NULL; - else - get_page(page); - } - return skb; -} - static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue, bool napi_mode) { struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[queue]; + struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue]; struct pci_dev *pdev = adapter->pdev; struct atl1c_buffer *buffer_info, *next_info; struct sk_buff *skb; @@ -1814,13 +1765,27 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue, while (next_info->flags & ATL1C_BUFFER_FREE) { rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use); - skb = atl1c_alloc_skb(adapter, queue, napi_mode); + /* When DMA RX address is set to something like + * 0x....fc0, it will be very likely to cause DMA + * RFD overflow issue. + * + * To work around it, we apply rx skb with 64 bytes + * longer space, and offset the address whenever + * 0x....fc0 is detected. + */ + if (likely(napi_mode)) + skb = napi_alloc_skb(&rrd_ring->napi, adapter->rx_buffer_len + 64); + else + skb = netdev_alloc_skb(adapter->netdev, adapter->rx_buffer_len + 64); if (unlikely(!skb)) { if (netif_msg_rx_err(adapter)) dev_warn(&pdev->dev, "alloc rx buffer failed\n"); break; } + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + /* * Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index 41a6098eb0c2..29b04a274d07 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -1344,17 +1344,15 @@ of_put_exit: return ret; } -static int bcmasp_remove(struct platform_device *pdev) +static void bcmasp_remove(struct platform_device *pdev) { struct bcmasp_priv *priv = dev_get_drvdata(&pdev->dev); if (!priv) - return 0; + return; priv->destroy_wol(priv); bcmasp_remove_intfs(priv); - - return 0; } static void bcmasp_shutdown(struct platform_device *pdev) @@ -1428,7 +1426,7 @@ static SIMPLE_DEV_PM_OPS(bcmasp_pm_ops, static struct platform_driver bcmasp_driver = { .probe = bcmasp_probe, - .remove = bcmasp_remove, + .remove_new = bcmasp_remove, .shutdown = bcmasp_shutdown, .driver = { .name = "brcm,asp-v2", diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 33d86683af50..3e7c8671cd11 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -768,7 +768,7 @@ err_dma_free: return err; } -static int bcm4908_enet_remove(struct platform_device *pdev) +static void bcm4908_enet_remove(struct platform_device *pdev) { struct bcm4908_enet *enet = platform_get_drvdata(pdev); @@ -776,8 +776,6 @@ static int bcm4908_enet_remove(struct platform_device *pdev) netif_napi_del(&enet->rx_ring.napi); netif_napi_del(&enet->tx_ring.napi); bcm4908_enet_dma_free(enet); - - return 0; } static const struct of_device_id bcm4908_enet_of_match[] = { @@ -791,7 +789,7 @@ static struct platform_driver bcm4908_enet_driver = { .of_match_table = bcm4908_enet_of_match, }, .probe = bcm4908_enet_probe, - .remove = bcm4908_enet_remove, + .remove_new = bcm4908_enet_remove, }; module_platform_driver(bcm4908_enet_driver); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index a741070f1f9a..105afde5dbe1 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1902,7 +1902,7 @@ out: /* * exit func, stops hardware and unregisters netdevice */ -static int bcm_enet_remove(struct platform_device *pdev) +static void bcm_enet_remove(struct platform_device *pdev) { struct bcm_enet_priv *priv; struct net_device *dev; @@ -1932,12 +1932,11 @@ static int bcm_enet_remove(struct platform_device *pdev) clk_disable_unprepare(priv->mac_clk); free_netdev(dev); - return 0; } static struct platform_driver bcm63xx_enet_driver = { .probe = bcm_enet_probe, - .remove = bcm_enet_remove, + .remove_new = bcm_enet_remove, .driver = { .name = "bcm63xx_enet", }, @@ -2739,7 +2738,7 @@ out: /* exit func, stops hardware and unregisters netdevice */ -static int bcm_enetsw_remove(struct platform_device *pdev) +static void bcm_enetsw_remove(struct platform_device *pdev) { struct bcm_enet_priv *priv; struct net_device *dev; @@ -2752,12 +2751,11 @@ static int bcm_enetsw_remove(struct platform_device *pdev) clk_disable_unprepare(priv->mac_clk); free_netdev(dev); - return 0; } static struct platform_driver bcm63xx_enetsw_driver = { .probe = bcm_enetsw_probe, - .remove = bcm_enetsw_remove, + .remove_new = bcm_enetsw_remove, .driver = { .name = "bcm63xx_enetsw", }, diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index bf1611cce974..ab096795e805 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2648,7 +2648,7 @@ err_free_netdev: return ret; } -static int bcm_sysport_remove(struct platform_device *pdev) +static void bcm_sysport_remove(struct platform_device *pdev) { struct net_device *dev = dev_get_drvdata(&pdev->dev); struct bcm_sysport_priv *priv = netdev_priv(dev); @@ -2663,8 +2663,6 @@ static int bcm_sysport_remove(struct platform_device *pdev) of_phy_deregister_fixed_link(dn); free_netdev(dev); dev_set_drvdata(&pdev->dev, NULL); - - return 0; } static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) @@ -2901,7 +2899,7 @@ static SIMPLE_DEV_PM_OPS(bcm_sysport_pm_ops, static struct platform_driver bcm_sysport_driver = { .probe = bcm_sysport_probe, - .remove = bcm_sysport_remove, + .remove_new = bcm_sysport_remove, .driver = { .name = "brcm-systemport", .of_match_table = bcm_sysport_of_match, diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index b4381cd41979..0b21fd5bd457 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -246,13 +246,11 @@ static int bgmac_probe(struct platform_device *pdev) return bgmac_enet_probe(bgmac); } -static int bgmac_remove(struct platform_device *pdev) +static void bgmac_remove(struct platform_device *pdev) { struct bgmac *bgmac = platform_get_drvdata(pdev); bgmac_enet_remove(bgmac); - - return 0; } #ifdef CONFIG_PM @@ -296,7 +294,7 @@ static struct platform_driver bgmac_enet_driver = { .pm = BGMAC_PM_OPS }, .probe = bgmac_probe, - .remove = bgmac_remove, + .remove_new = bgmac_remove, }; module_platform_driver(bgmac_enet_driver); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 24bade875ca6..91f3a7e78d65 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4164,7 +4164,7 @@ err: return err; } -static int bcmgenet_remove(struct platform_device *pdev) +static void bcmgenet_remove(struct platform_device *pdev) { struct bcmgenet_priv *priv = dev_to_priv(&pdev->dev); @@ -4172,8 +4172,6 @@ static int bcmgenet_remove(struct platform_device *pdev) unregister_netdev(priv->dev); bcmgenet_mii_exit(priv->dev); free_netdev(priv->dev); - - return 0; } static void bcmgenet_shutdown(struct platform_device *pdev) @@ -4352,7 +4350,7 @@ MODULE_DEVICE_TABLE(acpi, genet_acpi_match); static struct platform_driver bcmgenet_driver = { .probe = bcmgenet_probe, - .remove = bcmgenet_remove, + .remove_new = bcmgenet_remove, .shutdown = bcmgenet_shutdown, .driver = { .name = "bcmgenet", diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index 3a6763c5e8b3..fcf8485f3446 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2593,7 +2593,7 @@ out_out: return err; } -static int sbmac_remove(struct platform_device *pldev) +static void sbmac_remove(struct platform_device *pldev) { struct net_device *dev = platform_get_drvdata(pldev); struct sbmac_softc *sc = netdev_priv(dev); @@ -2604,13 +2604,11 @@ static int sbmac_remove(struct platform_device *pldev) mdiobus_free(sc->mii_bus); iounmap(sc->sbm_base); free_netdev(dev); - - return 0; } static struct platform_driver sbmac_driver = { .probe = sbmac_probe, - .remove = sbmac_remove, + .remove_new = sbmac_remove, .driver = { .name = sbmac_string, }, diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index b940dcd3ace6..cebae0f418f2 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5156,7 +5156,7 @@ err_disable_clocks: return err; } -static int macb_remove(struct platform_device *pdev) +static void macb_remove(struct platform_device *pdev) { struct net_device *dev; struct macb *bp; @@ -5181,8 +5181,6 @@ static int macb_remove(struct platform_device *pdev) phylink_destroy(bp->phylink); free_netdev(dev); } - - return 0; } static int __maybe_unused macb_suspend(struct device *dev) @@ -5398,7 +5396,7 @@ static const struct dev_pm_ops macb_pm_ops = { static struct platform_driver macb_driver = { .probe = macb_probe, - .remove = macb_remove, + .remove_new = macb_remove, .driver = { .name = "macb", .of_match_table = of_match_ptr(macb_dt_ids), diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index f4f87dfa9687..5e97f1e4e38e 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1820,7 +1820,7 @@ err_alloc: * changes the link status, releases the DMA descriptor rings, * unregisters the MDIO bus and unmaps the allocated memory. */ -static int xgmac_remove(struct platform_device *pdev) +static void xgmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct xgmac_priv *priv = netdev_priv(ndev); @@ -1840,8 +1840,6 @@ static int xgmac_remove(struct platform_device *pdev) release_mem_region(res->start, resource_size(res)); free_netdev(ndev); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -1921,7 +1919,7 @@ static struct platform_driver xgmac_driver = { .pm = &xgmac_pm_ops, }, .probe = xgmac_probe, - .remove = xgmac_remove, + .remove_new = xgmac_remove, }; module_platform_driver(xgmac_driver); diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index edde0b8fa49c..007d4b06819e 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1521,7 +1521,7 @@ err: return result; } -static int octeon_mgmt_remove(struct platform_device *pdev) +static void octeon_mgmt_remove(struct platform_device *pdev) { struct net_device *netdev = platform_get_drvdata(pdev); struct octeon_mgmt *p = netdev_priv(netdev); @@ -1529,7 +1529,6 @@ static int octeon_mgmt_remove(struct platform_device *pdev) unregister_netdev(netdev); of_node_put(p->phy_np); free_netdev(netdev); - return 0; } static const struct of_device_id octeon_mgmt_match[] = { @@ -1546,7 +1545,7 @@ static struct platform_driver octeon_mgmt_driver = { .of_match_table = octeon_mgmt_match, }, .probe = octeon_mgmt_probe, - .remove = octeon_mgmt_remove, + .remove_new = octeon_mgmt_remove, }; module_platform_driver(octeon_mgmt_driver); diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index d323c5c23521..0a21a10a791c 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1879,7 +1879,7 @@ free: return err; } -static int cs89x0_platform_remove(struct platform_device *pdev) +static void cs89x0_platform_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); @@ -1889,7 +1889,6 @@ static int cs89x0_platform_remove(struct platform_device *pdev) */ unregister_netdev(dev); free_netdev(dev); - return 0; } static const struct of_device_id __maybe_unused cs89x0_match[] = { @@ -1904,7 +1903,7 @@ static struct platform_driver cs89x0_driver = { .name = DRV_NAME, .of_match_table = of_match_ptr(cs89x0_match), }, - .remove = cs89x0_platform_remove, + .remove_new = cs89x0_platform_remove, }; module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe); diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index 8627ab19d470..1c2a540db13d 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -757,7 +757,7 @@ static struct net_device *ep93xx_dev_alloc(struct ep93xx_eth_data *data) } -static int ep93xx_eth_remove(struct platform_device *pdev) +static void ep93xx_eth_remove(struct platform_device *pdev) { struct net_device *dev; struct ep93xx_priv *ep; @@ -765,7 +765,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev) dev = platform_get_drvdata(pdev); if (dev == NULL) - return 0; + return; ep = netdev_priv(dev); @@ -782,8 +782,6 @@ static int ep93xx_eth_remove(struct platform_device *pdev) } free_netdev(dev); - - return 0; } static int ep93xx_eth_probe(struct platform_device *pdev) @@ -862,7 +860,7 @@ err_out: static struct platform_driver ep93xx_eth_driver = { .probe = ep93xx_eth_probe, - .remove = ep93xx_eth_remove, + .remove_new = ep93xx_eth_remove, .driver = { .name = "ep93xx-eth", }, diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c index 21a70b1f0ac5..887876f35f10 100644 --- a/drivers/net/ethernet/cirrus/mac89x0.c +++ b/drivers/net/ethernet/cirrus/mac89x0.c @@ -556,19 +556,18 @@ static int set_mac_address(struct net_device *dev, void *addr) MODULE_LICENSE("GPL"); -static int mac89x0_device_remove(struct platform_device *pdev) +static void mac89x0_device_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); unregister_netdev(dev); nubus_writew(0, dev->base_addr + ADD_PORT); free_netdev(dev); - return 0; } static struct platform_driver mac89x0_platform_driver = { .probe = mac89x0_device_probe, - .remove = mac89x0_device_remove, + .remove_new = mac89x0_device_remove, .driver = { .name = "mac89x0", }, diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index a8b9d1a3e4d5..5423fe26b4ef 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2518,13 +2518,11 @@ unprepare: return ret; } -static int gemini_ethernet_port_remove(struct platform_device *pdev) +static void gemini_ethernet_port_remove(struct platform_device *pdev) { struct gemini_ethernet_port *port = platform_get_drvdata(pdev); gemini_port_remove(port); - - return 0; } static const struct of_device_id gemini_ethernet_port_of_match[] = { @@ -2541,7 +2539,7 @@ static struct platform_driver gemini_ethernet_port_driver = { .of_match_table = gemini_ethernet_port_of_match, }, .probe = gemini_ethernet_port_probe, - .remove = gemini_ethernet_port_remove, + .remove_new = gemini_ethernet_port_remove, }; static int gemini_ethernet_probe(struct platform_device *pdev) @@ -2583,14 +2581,12 @@ static int gemini_ethernet_probe(struct platform_device *pdev) return devm_of_platform_populate(dev); } -static int gemini_ethernet_remove(struct platform_device *pdev) +static void gemini_ethernet_remove(struct platform_device *pdev) { struct gemini_ethernet *geth = platform_get_drvdata(pdev); geth_cleanup_freeq(geth); geth->initialized = false; - - return 0; } static const struct of_device_id gemini_ethernet_of_match[] = { @@ -2607,7 +2603,7 @@ static struct platform_driver gemini_ethernet_driver = { .of_match_table = gemini_ethernet_of_match, }, .probe = gemini_ethernet_probe, - .remove = gemini_ethernet_remove, + .remove_new = gemini_ethernet_remove, }; static int __init gemini_ethernet_module_init(void) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 05a89ab6766c..150cc94ae9f8 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1770,8 +1770,7 @@ static const struct dev_pm_ops dm9000_drv_pm_ops = { .resume = dm9000_drv_resume, }; -static int -dm9000_drv_remove(struct platform_device *pdev) +static void dm9000_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct board_info *dm = to_dm9000_board(ndev); @@ -1783,7 +1782,6 @@ dm9000_drv_remove(struct platform_device *pdev) regulator_disable(dm->power_supply); dev_dbg(&pdev->dev, "released and freed device\n"); - return 0; } #ifdef CONFIG_OF @@ -1801,7 +1799,7 @@ static struct platform_driver dm9000_driver = { .of_match_table = of_match_ptr(dm9000_of_matches), }, .probe = dm9000_probe, - .remove = dm9000_drv_remove, + .remove_new = dm9000_drv_remove, }; module_platform_driver(dm9000_driver); diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 151ca9573be9..2a18df3605f1 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -841,7 +841,7 @@ err_out_free_dev: return err; } -static int dnet_remove(struct platform_device *pdev) +static void dnet_remove(struct platform_device *pdev) { struct net_device *dev; @@ -859,13 +859,11 @@ static int dnet_remove(struct platform_device *pdev) free_irq(dev->irq, dev); free_netdev(dev); } - - return 0; } static struct platform_driver dnet_driver = { .probe = dnet_probe, - .remove = dnet_remove, + .remove_new = dnet_remove, .driver = { .name = "dnet", }, diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 8b992dc9bb52..77a3fcb821c8 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -2587,7 +2587,7 @@ mdio_init_failed: return retval; } -static int tsnep_remove(struct platform_device *pdev) +static void tsnep_remove(struct platform_device *pdev) { struct tsnep_adapter *adapter = platform_get_drvdata(pdev); @@ -2603,8 +2603,6 @@ static int tsnep_remove(struct platform_device *pdev) mdiobus_unregister(adapter->mdiobus); tsnep_disable_irq(adapter, ECM_INT_ALL); - - return 0; } static const struct of_device_id tsnep_of_match[] = { @@ -2619,7 +2617,7 @@ static struct platform_driver tsnep_driver = { .of_match_table = tsnep_of_match, }, .probe = tsnep_probe, - .remove = tsnep_remove, + .remove_new = tsnep_remove, }; module_platform_driver(tsnep_driver); diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 95cbad198b4b..ad41c9019018 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1254,7 +1254,7 @@ out: * ethoc_remove - shutdown OpenCores ethernet MAC * @pdev: platform device */ -static int ethoc_remove(struct platform_device *pdev) +static void ethoc_remove(struct platform_device *pdev) { struct net_device *netdev = platform_get_drvdata(pdev); struct ethoc *priv = netdev_priv(netdev); @@ -1271,8 +1271,6 @@ static int ethoc_remove(struct platform_device *pdev) unregister_netdev(netdev); free_netdev(netdev); } - - return 0; } #ifdef CONFIG_PM @@ -1298,7 +1296,7 @@ MODULE_DEVICE_TABLE(of, ethoc_match); static struct platform_driver ethoc_driver = { .probe = ethoc_probe, - .remove = ethoc_remove, + .remove_new = ethoc_remove, .suspend = ethoc_suspend, .resume = ethoc_resume, .driver = { diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9135b918dd49..fddfd1dd5070 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -2012,7 +2012,7 @@ err_alloc_etherdev: return err; } -static int ftgmac100_remove(struct platform_device *pdev) +static void ftgmac100_remove(struct platform_device *pdev) { struct net_device *netdev; struct ftgmac100 *priv; @@ -2040,7 +2040,6 @@ static int ftgmac100_remove(struct platform_device *pdev) netif_napi_del(&priv->napi); free_netdev(netdev); - return 0; } static const struct of_device_id ftgmac100_of_match[] = { @@ -2051,7 +2050,7 @@ MODULE_DEVICE_TABLE(of, ftgmac100_of_match); static struct platform_driver ftgmac100_driver = { .probe = ftgmac100_probe, - .remove = ftgmac100_remove, + .remove_new = ftgmac100_remove, .driver = { .name = DRV_NAME, .of_match_table = ftgmac100_of_match, diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 183069581bc0..003bc9a45c65 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1219,7 +1219,7 @@ err_alloc_etherdev: return err; } -static int ftmac100_remove(struct platform_device *pdev) +static void ftmac100_remove(struct platform_device *pdev) { struct net_device *netdev; struct ftmac100 *priv; @@ -1234,7 +1234,6 @@ static int ftmac100_remove(struct platform_device *pdev) netif_napi_del(&priv->napi); free_netdev(netdev); - return 0; } static const struct of_device_id ftmac100_of_ids[] = { @@ -1244,7 +1243,7 @@ static const struct of_device_id ftmac100_of_ids[] = { static struct platform_driver ftmac100_driver = { .probe = ftmac100_probe, - .remove = ftmac100_remove, + .remove_new = ftmac100_remove, .driver = { .name = DRV_NAME, .of_match_table = ftmac100_of_ids diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 5704b5f57cd0..83b09dcfafc4 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -190,7 +190,7 @@ static int gve_alloc_stats_report(struct gve_priv *priv) rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * priv->rx_cfg.num_queues; priv->stats_report_len = struct_size(priv->stats_report, stats, - tx_stats_num + rx_stats_num); + size_add(tx_stats_num, rx_stats_num)); priv->stats_report = dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, &priv->stats_report_bus, GFP_KERNEL); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index ecf92a5d56bb..b91e7a06b97f 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -1021,7 +1021,7 @@ init_fail: return ret; } -static int hip04_remove(struct platform_device *pdev) +static void hip04_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct hip04_priv *priv = netdev_priv(ndev); @@ -1035,8 +1035,6 @@ static int hip04_remove(struct platform_device *pdev) of_node_put(priv->phy_node); cancel_work_sync(&priv->tx_timeout_task); free_netdev(ndev); - - return 0; } static const struct of_device_id hip04_mac_match[] = { @@ -1048,7 +1046,7 @@ MODULE_DEVICE_TABLE(of, hip04_mac_match); static struct platform_driver hip04_mac_driver = { .probe = hip04_mac_probe, - .remove = hip04_remove, + .remove_new = hip04_remove, .driver = { .name = DRV_NAME, .of_match_table = hip04_mac_match, diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index cb7b0293fe85..2406263c9dd3 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -893,7 +893,7 @@ out_free_netdev: return ret; } -static int hisi_femac_drv_remove(struct platform_device *pdev) +static void hisi_femac_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct hisi_femac_priv *priv = netdev_priv(ndev); @@ -904,8 +904,6 @@ static int hisi_femac_drv_remove(struct platform_device *pdev) phy_disconnect(ndev->phydev); clk_disable_unprepare(priv->clk); free_netdev(ndev); - - return 0; } #ifdef CONFIG_PM @@ -961,7 +959,7 @@ static struct platform_driver hisi_femac_driver = { .of_match_table = hisi_femac_match, }, .probe = hisi_femac_drv_probe, - .remove = hisi_femac_drv_remove, + .remove_new = hisi_femac_drv_remove, #ifdef CONFIG_PM .suspend = hisi_femac_drv_suspend, .resume = hisi_femac_drv_resume, diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 26d22bb04b87..506fa3d8bbee 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -1282,7 +1282,7 @@ out_free_netdev: return ret; } -static int hix5hd2_dev_remove(struct platform_device *pdev) +static void hix5hd2_dev_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct hix5hd2_priv *priv = netdev_priv(ndev); @@ -1298,8 +1298,6 @@ static int hix5hd2_dev_remove(struct platform_device *pdev) of_node_put(priv->phy_node); cancel_work_sync(&priv->tx_timeout_task); free_netdev(ndev); - - return 0; } static const struct of_device_id hix5hd2_of_match[] = { @@ -1319,7 +1317,7 @@ static struct platform_driver hix5hd2_dev_driver = { .of_match_table = hix5hd2_of_match, }, .probe = hix5hd2_dev_probe, - .remove = hix5hd2_dev_remove, + .remove_new = hix5hd2_dev_remove, }; module_platform_driver(hix5hd2_dev_driver); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index fcaf5132b865..1b67da1f6fa8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3007,7 +3007,7 @@ free_dev: * hns_dsaf_remove - remove dsaf dev * @pdev: dasf platform device */ -static int hns_dsaf_remove(struct platform_device *pdev) +static void hns_dsaf_remove(struct platform_device *pdev) { struct dsaf_device *dsaf_dev = dev_get_drvdata(&pdev->dev); @@ -3020,8 +3020,6 @@ static int hns_dsaf_remove(struct platform_device *pdev) hns_dsaf_free(dsaf_dev); hns_dsaf_free_dev(dsaf_dev); - - return 0; } static const struct of_device_id g_dsaf_match[] = { @@ -3033,7 +3031,7 @@ MODULE_DEVICE_TABLE(of, g_dsaf_match); static struct platform_driver g_dsaf_driver = { .probe = hns_dsaf_probe, - .remove = hns_dsaf_remove, + .remove_new = hns_dsaf_remove, .driver = { .name = DSAF_DRV_NAME, .of_match_table = g_dsaf_match, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 7cf10d1e2b31..0900abf5c508 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2384,7 +2384,7 @@ out_read_prop_fail: return ret; } -static int hns_nic_dev_remove(struct platform_device *pdev) +static void hns_nic_dev_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct hns_nic_priv *priv = netdev_priv(ndev); @@ -2413,7 +2413,6 @@ static int hns_nic_dev_remove(struct platform_device *pdev) of_node_put(to_of_node(priv->fwnode)); free_netdev(ndev); - return 0; } static const struct of_device_id hns_enet_of_match[] = { @@ -2431,7 +2430,7 @@ static struct platform_driver hns_nic_dev_driver = { .acpi_match_table = ACPI_PTR(hns_enet_acpi_match), }, .probe = hns_nic_dev_probe, - .remove = hns_nic_dev_remove, + .remove_new = hns_nic_dev_remove, }; module_platform_driver(hns_nic_dev_driver); diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 409a89d80220..ed73707176c1 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -610,7 +610,7 @@ static int hns_mdio_probe(struct platform_device *pdev) * * Return 0 on success, negative on failure */ -static int hns_mdio_remove(struct platform_device *pdev) +static void hns_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus; @@ -618,7 +618,6 @@ static int hns_mdio_remove(struct platform_device *pdev) mdiobus_unregister(bus); platform_set_drvdata(pdev, NULL); - return 0; } static const struct of_device_id hns_mdio_match[] = { @@ -636,7 +635,7 @@ MODULE_DEVICE_TABLE(acpi, hns_mdio_acpi_match); static struct platform_driver hns_mdio_driver = { .probe = hns_mdio_probe, - .remove = hns_mdio_remove, + .remove_new = hns_mdio_remove, .driver = { .name = MDIO_DRV_NAME, .of_match_table = hns_mdio_match, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index ad47ac51a139..9b60966736db 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -861,7 +861,7 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq, struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq); struct hinic_dev *nic_dev = netdev_priv(netdev); struct hinic_hwdev *hwdev = nic_dev->hwdev; - int err, irqname_len; + int err; txq->netdev = netdev; txq->sq = sq; @@ -882,15 +882,13 @@ int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq, goto err_alloc_free_sges; } - irqname_len = snprintf(NULL, 0, "%s_txq%d", netdev->name, qp->q_id) + 1; - txq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL); + txq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL, "%s_txq%d", + netdev->name, qp->q_id); if (!txq->irq_name) { err = -ENOMEM; goto err_alloc_irqname; } - sprintf(txq->irq_name, "%s_txq%d", netdev->name, qp->q_id); - err = hinic_hwdev_hw_ci_addr_set(hwdev, sq, CI_UPDATE_NO_PENDING, CI_UPDATE_NO_COALESC); if (err) diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 54bb4d9a0d1e..813403c2628f 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -153,7 +153,7 @@ probe_failed_free_mpu: return retval; } -static int sni_82596_driver_remove(struct platform_device *pdev) +static void sni_82596_driver_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct i596_private *lp = netdev_priv(dev); @@ -164,12 +164,11 @@ static int sni_82596_driver_remove(struct platform_device *pdev) iounmap(lp->ca); iounmap(lp->mpu_port); free_netdev (dev); - return 0; } static struct platform_driver sni_82596_driver = { .probe = sni_82596_probe, - .remove = sni_82596_driver_remove, + .remove_new = sni_82596_driver_remove, .driver = { .name = sni_82596_string, }, diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 0a56e9752464..251dedd55cfb 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -90,7 +90,7 @@ static struct ehea_bcmc_reg_array ehea_bcmc_regs; static int ehea_probe_adapter(struct platform_device *dev); -static int ehea_remove(struct platform_device *dev); +static void ehea_remove(struct platform_device *dev); static const struct of_device_id ehea_module_device_table[] = { { @@ -121,7 +121,7 @@ static struct platform_driver ehea_driver = { .of_match_table = ehea_device_table, }, .probe = ehea_probe_adapter, - .remove = ehea_remove, + .remove_new = ehea_remove, }; void ehea_dump(void *adr, int len, char *msg) @@ -3471,7 +3471,7 @@ out: return ret; } -static int ehea_remove(struct platform_device *dev) +static void ehea_remove(struct platform_device *dev) { struct ehea_adapter *adapter = platform_get_drvdata(dev); int i; @@ -3492,8 +3492,6 @@ static int ehea_remove(struct platform_device *dev) list_del(&adapter->list); ehea_update_firmware_handles(); - - return 0; } static int check_module_parm(void) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 0c314bf97480..e6e47b1842ea 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -3253,7 +3253,7 @@ static int emac_probe(struct platform_device *ofdev) return err; } -static int emac_remove(struct platform_device *ofdev) +static void emac_remove(struct platform_device *ofdev) { struct emac_instance *dev = platform_get_drvdata(ofdev); @@ -3290,8 +3290,6 @@ static int emac_remove(struct platform_device *ofdev) irq_dispose_mapping(dev->emac_irq); free_netdev(dev->ndev); - - return 0; } /* XXX Features in here should be replaced by properties... */ @@ -3319,7 +3317,7 @@ static struct platform_driver emac_driver = { .of_match_table = emac_match, }, .probe = emac_probe, - .remove = emac_remove, + .remove_new = emac_remove, }; static void __init emac_make_bootlist(void) diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index c3236b59e7e9..462646d1b817 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -711,7 +711,7 @@ static int mal_probe(struct platform_device *ofdev) return err; } -static int mal_remove(struct platform_device *ofdev) +static void mal_remove(struct platform_device *ofdev) { struct mal_instance *mal = platform_get_drvdata(ofdev); @@ -740,8 +740,6 @@ static int mal_remove(struct platform_device *ofdev) NUM_RX_BUFF * mal->num_rx_chans), mal->bd_virt, mal->bd_dma); kfree(mal); - - return 0; } static const struct of_device_id mal_platform_match[] = @@ -770,7 +768,7 @@ static struct platform_driver mal_of_driver = { .of_match_table = mal_platform_match, }, .probe = mal_probe, - .remove = mal_remove, + .remove_new = mal_remove, }; int __init mal_init(void) diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c index fd437f986edf..e1712fdc3c31 100644 --- a/drivers/net/ethernet/ibm/emac/rgmii.c +++ b/drivers/net/ethernet/ibm/emac/rgmii.c @@ -273,7 +273,7 @@ static int rgmii_probe(struct platform_device *ofdev) return rc; } -static int rgmii_remove(struct platform_device *ofdev) +static void rgmii_remove(struct platform_device *ofdev) { struct rgmii_instance *dev = platform_get_drvdata(ofdev); @@ -281,8 +281,6 @@ static int rgmii_remove(struct platform_device *ofdev) iounmap(dev->base); kfree(dev); - - return 0; } static const struct of_device_id rgmii_match[] = @@ -302,7 +300,7 @@ static struct platform_driver rgmii_driver = { .of_match_table = rgmii_match, }, .probe = rgmii_probe, - .remove = rgmii_remove, + .remove_new = rgmii_remove, }; int __init rgmii_init(void) diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c index aae9a88d95d7..fa3488258ca2 100644 --- a/drivers/net/ethernet/ibm/emac/tah.c +++ b/drivers/net/ethernet/ibm/emac/tah.c @@ -130,7 +130,7 @@ static int tah_probe(struct platform_device *ofdev) return rc; } -static int tah_remove(struct platform_device *ofdev) +static void tah_remove(struct platform_device *ofdev) { struct tah_instance *dev = platform_get_drvdata(ofdev); @@ -138,8 +138,6 @@ static int tah_remove(struct platform_device *ofdev) iounmap(dev->base); kfree(dev); - - return 0; } static const struct of_device_id tah_match[] = @@ -160,7 +158,7 @@ static struct platform_driver tah_driver = { .of_match_table = tah_match, }, .probe = tah_probe, - .remove = tah_remove, + .remove_new = tah_remove, }; int __init tah_init(void) diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c index 6337388ee5f4..26e86cdee2f6 100644 --- a/drivers/net/ethernet/ibm/emac/zmii.c +++ b/drivers/net/ethernet/ibm/emac/zmii.c @@ -278,7 +278,7 @@ static int zmii_probe(struct platform_device *ofdev) return rc; } -static int zmii_remove(struct platform_device *ofdev) +static void zmii_remove(struct platform_device *ofdev) { struct zmii_instance *dev = platform_get_drvdata(ofdev); @@ -286,8 +286,6 @@ static int zmii_remove(struct platform_device *ofdev) iounmap(dev->base); kfree(dev); - - return 0; } static const struct of_device_id zmii_match[] = @@ -308,7 +306,7 @@ static struct platform_driver zmii_driver = { .of_match_table = zmii_match, }, .probe = zmii_probe, - .remove = zmii_remove, + .remove_new = zmii_remove, }; int __init zmii_init(void) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 9bc0a9519899..e6684f3cc0ce 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -284,6 +284,7 @@ config ICE select DIMLIB select NET_DEVLINK select PLDMFW + select DPLL help This driver supports Intel(R) Ethernet Connection E800 Series of devices. For more information on how to identify your adapter, go @@ -355,5 +356,17 @@ config IGC To compile this driver as a module, choose M here. The module will be called igc. +config IDPF + tristate "Intel(R) Infrastructure Data Path Function Support" + depends on PCI_MSI + select DIMLIB + select PAGE_POOL + select PAGE_POOL_STATS + help + This driver supports Intel(R) Infrastructure Data Path Function + devices. + + To compile this driver as a module, choose M here. The module + will be called idpf. endif # NET_VENDOR_INTEL diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index d80d04132073..dacb481ee5b1 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_I40E) += i40e/ obj-$(CONFIG_IAVF) += iavf/ obj-$(CONFIG_FM10K) += fm10k/ obj-$(CONFIG_ICE) += ice/ +obj-$(CONFIG_IDPF) += idpf/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index de7fd43dc11c..00ca2b88165c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16320,11 +16320,15 @@ static void i40e_remove(struct pci_dev *pdev) i40e_switch_branch_release(pf->veb[i]); } - /* Now we can shutdown the PF's VSI, just before we kill + /* Now we can shutdown the PF's VSIs, just before we kill * adminq and hmc. */ - if (pf->vsi[pf->lan_vsi]) - i40e_vsi_release(pf->vsi[pf->lan_vsi]); + for (i = pf->num_alloc_vsi; i--;) + if (pf->vsi[i]) { + i40e_vsi_close(pf->vsi[i]); + i40e_vsi_release(pf->vsi[i]); + pf->vsi[i] = NULL; + } i40e_cloud_filter_exit(pf); diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index e110ba346185..44daf335e8c5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -298,8 +298,6 @@ struct iavf_adapter { #define IAVF_FLAG_CLIENT_NEEDS_OPEN BIT(10) #define IAVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11) #define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12) -#define IAVF_FLAG_PROMISC_ON BIT(13) -#define IAVF_FLAG_ALLMULTI_ON BIT(14) #define IAVF_FLAG_LEGACY_RX BIT(15) #define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16) #define IAVF_FLAG_QUEUES_DISABLED BIT(17) @@ -325,10 +323,7 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_SET_HENA BIT_ULL(12) #define IAVF_FLAG_AQ_SET_RSS_KEY BIT_ULL(13) #define IAVF_FLAG_AQ_SET_RSS_LUT BIT_ULL(14) -#define IAVF_FLAG_AQ_REQUEST_PROMISC BIT_ULL(15) -#define IAVF_FLAG_AQ_RELEASE_PROMISC BIT_ULL(16) -#define IAVF_FLAG_AQ_REQUEST_ALLMULTI BIT_ULL(17) -#define IAVF_FLAG_AQ_RELEASE_ALLMULTI BIT_ULL(18) +#define IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE BIT_ULL(15) #define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT_ULL(19) #define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT_ULL(20) #define IAVF_FLAG_AQ_ENABLE_CHANNELS BIT_ULL(21) @@ -365,6 +360,12 @@ struct iavf_adapter { (IAVF_EXTENDED_CAP_SEND_VLAN_V2 | \ IAVF_EXTENDED_CAP_RECV_VLAN_V2) + /* Lock to prevent possible clobbering of + * current_netdev_promisc_flags + */ + spinlock_t current_netdev_promisc_flags_lock; + netdev_features_t current_netdev_promisc_flags; + /* OS defined structs */ struct net_device *netdev; struct pci_dev *pdev; @@ -405,6 +406,8 @@ struct iavf_adapter { VIRTCHNL_VF_OFFLOAD_VLAN) #define VLAN_V2_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \ VIRTCHNL_VF_OFFLOAD_VLAN_V2) +#define CRC_OFFLOAD_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_CRC) #define VLAN_V2_FILTERING_ALLOWED(_a) \ (VLAN_V2_ALLOWED((_a)) && \ ((_a)->vlan_v2_caps.filtering.filtering_support.outer || \ @@ -551,7 +554,8 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter); void iavf_del_ether_addrs(struct iavf_adapter *adapter); void iavf_add_vlans(struct iavf_adapter *adapter); void iavf_del_vlans(struct iavf_adapter *adapter); -void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags); +void iavf_set_promiscuous(struct iavf_adapter *adapter); +bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter); void iavf_request_stats(struct iavf_adapter *adapter); int iavf_request_reset(struct iavf_adapter *adapter); void iavf_get_hena(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index b3434dbc90d6..d47eae3a2d32 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1187,6 +1187,16 @@ static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr) } /** + * iavf_promiscuous_mode_changed - check if promiscuous mode bits changed + * @adapter: device specific adapter + */ +bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter) +{ + return (adapter->current_netdev_promisc_flags ^ adapter->netdev->flags) & + (IFF_PROMISC | IFF_ALLMULTI); +} + +/** * iavf_set_rx_mode - NDO callback to set the netdev filters * @netdev: network interface device structure **/ @@ -1199,19 +1209,10 @@ static void iavf_set_rx_mode(struct net_device *netdev) __dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync); spin_unlock_bh(&adapter->mac_vlan_list_lock); - if (netdev->flags & IFF_PROMISC && - !(adapter->flags & IAVF_FLAG_PROMISC_ON)) - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_PROMISC; - else if (!(netdev->flags & IFF_PROMISC) && - adapter->flags & IAVF_FLAG_PROMISC_ON) - adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_PROMISC; - - if (netdev->flags & IFF_ALLMULTI && - !(adapter->flags & IAVF_FLAG_ALLMULTI_ON)) - adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_ALLMULTI; - else if (!(netdev->flags & IFF_ALLMULTI) && - adapter->flags & IAVF_FLAG_ALLMULTI_ON) - adapter->aq_required |= IAVF_FLAG_AQ_RELEASE_ALLMULTI; + spin_lock_bh(&adapter->current_netdev_promisc_flags_lock); + if (iavf_promiscuous_mode_changed(adapter)) + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE; + spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock); } /** @@ -2162,19 +2163,8 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) return 0; } - if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) { - iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC | - FLAG_VF_MULTICAST_PROMISC); - return 0; - } - - if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) { - iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); - return 0; - } - if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) || - (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { - iavf_set_promiscuous(adapter, 0); + if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE) { + iavf_set_promiscuous(adapter); return 0; } @@ -4410,6 +4400,9 @@ static int iavf_set_features(struct net_device *netdev, (features & NETIF_VLAN_OFFLOAD_FEATURES)) iavf_set_vlan_offload_features(adapter, netdev->features, features); + if (CRC_OFFLOAD_ALLOWED(adapter) && + ((netdev->features & NETIF_F_RXFCS) ^ (features & NETIF_F_RXFCS))) + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); return 0; } @@ -4531,6 +4524,9 @@ iavf_get_netdev_vlan_hw_features(struct iavf_adapter *adapter) } } + if (CRC_OFFLOAD_ALLOWED(adapter)) + hw_features |= NETIF_F_RXFCS; + return hw_features; } @@ -4695,6 +4691,55 @@ iavf_fix_netdev_vlan_features(struct iavf_adapter *adapter, } /** + * iavf_fix_strip_features - fix NETDEV CRC and VLAN strip features + * @adapter: board private structure + * @requested_features: stack requested NETDEV features + * + * Returns fixed-up features bits + **/ +static netdev_features_t +iavf_fix_strip_features(struct iavf_adapter *adapter, + netdev_features_t requested_features) +{ + struct net_device *netdev = adapter->netdev; + bool crc_offload_req, is_vlan_strip; + netdev_features_t vlan_strip; + int num_non_zero_vlan; + + crc_offload_req = CRC_OFFLOAD_ALLOWED(adapter) && + (requested_features & NETIF_F_RXFCS); + num_non_zero_vlan = iavf_get_num_vlans_added(adapter); + vlan_strip = (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX); + is_vlan_strip = requested_features & vlan_strip; + + if (!crc_offload_req) + return requested_features; + + if (!num_non_zero_vlan && (netdev->features & vlan_strip) && + !(netdev->features & NETIF_F_RXFCS) && is_vlan_strip) { + requested_features &= ~vlan_strip; + netdev_info(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n"); + return requested_features; + } + + if ((netdev->features & NETIF_F_RXFCS) && is_vlan_strip) { + requested_features &= ~vlan_strip; + if (!(netdev->features & vlan_strip)) + netdev_info(netdev, "To enable VLAN stripping, first need to enable FCS/CRC stripping"); + + return requested_features; + } + + if (num_non_zero_vlan && is_vlan_strip && + !(netdev->features & NETIF_F_RXFCS)) { + requested_features &= ~NETIF_F_RXFCS; + netdev_info(netdev, "To disable FCS/CRC stripping, first need to disable VLAN stripping"); + } + + return requested_features; +} + +/** * iavf_fix_features - fix up the netdev feature bits * @netdev: our net device * @features: desired feature bits @@ -4706,7 +4751,9 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); - return iavf_fix_netdev_vlan_features(adapter, features); + features = iavf_fix_netdev_vlan_features(adapter, features); + + return iavf_fix_strip_features(adapter, features); } static const struct net_device_ops iavf_netdev_ops = { @@ -4970,6 +5017,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->cloud_filter_list_lock); spin_lock_init(&adapter->fdir_fltr_lock); spin_lock_init(&adapter->adv_rss_lock); + spin_lock_init(&adapter->current_netdev_promisc_flags_lock); INIT_LIST_HEAD(&adapter->mac_filter_list); INIT_LIST_HEAD(&adapter->vlan_filter_list); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index f9727e9c3d63..8ce6389b5815 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -142,6 +142,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | VIRTCHNL_VF_OFFLOAD_ENCAP | VIRTCHNL_VF_OFFLOAD_VLAN_V2 | + VIRTCHNL_VF_OFFLOAD_CRC | VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | VIRTCHNL_VF_OFFLOAD_ADQ | @@ -312,6 +313,9 @@ void iavf_configure_queues(struct iavf_adapter *adapter) vqpi->rxq.databuffer_size = ALIGN(adapter->rx_rings[i].rx_buf_len, BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT)); + if (CRC_OFFLOAD_ALLOWED(adapter)) + vqpi->rxq.crc_disable = !!(adapter->netdev->features & + NETIF_F_RXFCS); vqpi++; } @@ -936,14 +940,14 @@ void iavf_del_vlans(struct iavf_adapter *adapter) /** * iavf_set_promiscuous * @adapter: adapter structure - * @flags: bitmask to control unicast/multicast promiscuous. * * Request that the PF enable promiscuous mode for our VSI. **/ -void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags) +void iavf_set_promiscuous(struct iavf_adapter *adapter) { + struct net_device *netdev = adapter->netdev; struct virtchnl_promisc_info vpi; - int promisc_all; + unsigned int flags; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -952,36 +956,57 @@ void iavf_set_promiscuous(struct iavf_adapter *adapter, int flags) return; } - promisc_all = FLAG_VF_UNICAST_PROMISC | - FLAG_VF_MULTICAST_PROMISC; - if ((flags & promisc_all) == promisc_all) { - adapter->flags |= IAVF_FLAG_PROMISC_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_PROMISC; - dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); - } + /* prevent changes to promiscuous flags */ + spin_lock_bh(&adapter->current_netdev_promisc_flags_lock); - if (flags & FLAG_VF_MULTICAST_PROMISC) { - adapter->flags |= IAVF_FLAG_ALLMULTI_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_ALLMULTI; - dev_info(&adapter->pdev->dev, "%s is entering multicast promiscuous mode\n", - adapter->netdev->name); + /* sanity check to prevent duplicate AQ calls */ + if (!iavf_promiscuous_mode_changed(adapter)) { + adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE; + dev_dbg(&adapter->pdev->dev, "No change in promiscuous mode\n"); + /* allow changes to promiscuous flags */ + spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock); + return; } - if (!flags) { - if (adapter->flags & IAVF_FLAG_PROMISC_ON) { - adapter->flags &= ~IAVF_FLAG_PROMISC_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_PROMISC; - dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); - } + /* there are 2 bits, but only 3 states */ + if (!(netdev->flags & IFF_PROMISC) && + netdev->flags & IFF_ALLMULTI) { + /* State 1 - only multicast promiscuous mode enabled + * - !IFF_PROMISC && IFF_ALLMULTI + */ + flags = FLAG_VF_MULTICAST_PROMISC; + adapter->current_netdev_promisc_flags |= IFF_ALLMULTI; + adapter->current_netdev_promisc_flags &= ~IFF_PROMISC; + dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n"); + } else if (!(netdev->flags & IFF_PROMISC) && + !(netdev->flags & IFF_ALLMULTI)) { + /* State 2 - unicast/multicast promiscuous mode disabled + * - !IFF_PROMISC && !IFF_ALLMULTI + */ + flags = 0; + adapter->current_netdev_promisc_flags &= + ~(IFF_PROMISC | IFF_ALLMULTI); + dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); + } else { + /* State 3 - unicast/multicast promiscuous mode enabled + * - IFF_PROMISC && IFF_ALLMULTI + * - IFF_PROMISC && !IFF_ALLMULTI + */ + flags = FLAG_VF_UNICAST_PROMISC | FLAG_VF_MULTICAST_PROMISC; + adapter->current_netdev_promisc_flags |= IFF_PROMISC; + if (netdev->flags & IFF_ALLMULTI) + adapter->current_netdev_promisc_flags |= IFF_ALLMULTI; + else + adapter->current_netdev_promisc_flags &= ~IFF_ALLMULTI; - if (adapter->flags & IAVF_FLAG_ALLMULTI_ON) { - adapter->flags &= ~IAVF_FLAG_ALLMULTI_ON; - adapter->aq_required &= ~IAVF_FLAG_AQ_RELEASE_ALLMULTI; - dev_info(&adapter->pdev->dev, "%s is leaving multicast promiscuous mode\n", - adapter->netdev->name); - } + dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); } + adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE; + + /* allow changes to promiscuous flags */ + spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock); + adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE; vpi.vsi_id = adapter->vsi_res->vsi_id; vpi.flags = flags; diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 960277d78e09..00806ddf5bf0 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -34,7 +34,8 @@ ice-y := ice_main.o \ ice_lag.o \ ice_ethtool.o \ ice_repr.o \ - ice_tc_lib.o + ice_tc_lib.o \ + ice_dpll.o ice-$(CONFIG_PCI_IOV) += \ ice_sriov.o \ ice_virtchnl.o \ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 5022b036ca4f..d30ae39c19f0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -76,6 +76,7 @@ #include "ice_vsi_vlan_ops.h" #include "ice_gnss.h" #include "ice_irq.h" +#include "ice_dpll.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -195,10 +196,13 @@ #define ice_pf_to_dev(pf) (&((pf)->pdev->dev)) +#define ice_pf_src_tmr_owned(pf) ((pf)->hw.func_caps.ts_func_info.src_tmr_owned) + enum ice_feature { ICE_F_DSCP, - ICE_F_PTP_EXTTS, + ICE_F_PHY_RCLK, ICE_F_SMA_CTRL, + ICE_F_CGU, ICE_F_GNSS, ICE_F_ROCE_LAG, ICE_F_SRIOV_LAG, @@ -508,6 +512,7 @@ enum ice_pf_flags { ICE_FLAG_UNPLUG_AUX_DEV, ICE_FLAG_MTU_CHANGED, ICE_FLAG_GNSS, /* GNSS successfully initialized */ + ICE_FLAG_DPLL, /* SyncE/PTP dplls initialized */ ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -640,6 +645,7 @@ struct ice_pf { #define ICE_VF_AGG_NODE_ID_START 65 #define ICE_MAX_VF_AGG_NODES 32 struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES]; + struct ice_dplls dplls; }; extern struct workqueue_struct *ice_lag_wq; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 29f7a9852aec..24293f52f2d1 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1351,6 +1351,30 @@ struct ice_aqc_set_mac_lb { u8 reserved[15]; }; +/* Set PHY recovered clock output (direct 0x0630) */ +struct ice_aqc_set_phy_rec_clk_out { + u8 phy_output; + u8 port_num; +#define ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT 0xFF + u8 flags; +#define ICE_AQC_SET_PHY_REC_CLK_OUT_OUT_EN BIT(0) + u8 rsvd; + __le32 freq; + u8 rsvd2[6]; + __le16 node_handle; +}; + +/* Get PHY recovered clock output (direct 0x0631) */ +struct ice_aqc_get_phy_rec_clk_out { + u8 phy_output; + u8 port_num; +#define ICE_AQC_GET_PHY_REC_CLK_OUT_CURR_PORT 0xFF + u8 flags; +#define ICE_AQC_GET_PHY_REC_CLK_OUT_OUT_EN BIT(0) + u8 rsvd[11]; + __le16 node_handle; +}; + struct ice_aqc_link_topo_params { u8 lport_num; u8 lport_num_valid; @@ -1367,6 +1391,9 @@ struct ice_aqc_link_topo_params { #define ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE 6 #define ICE_AQC_LINK_TOPO_NODE_TYPE_MEZZ 7 #define ICE_AQC_LINK_TOPO_NODE_TYPE_ID_EEPROM 8 +#define ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL 9 +#define ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_MUX 10 +#define ICE_AQC_LINK_TOPO_NODE_TYPE_GPS 11 #define ICE_AQC_LINK_TOPO_NODE_CTX_S 4 #define ICE_AQC_LINK_TOPO_NODE_CTX_M \ (0xF << ICE_AQC_LINK_TOPO_NODE_CTX_S) @@ -1403,8 +1430,13 @@ struct ice_aqc_link_topo_addr { struct ice_aqc_get_link_topo { struct ice_aqc_link_topo_addr addr; u8 node_part_num; -#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21 -#define ICE_AQC_GET_LINK_TOPO_NODE_NR_C827 0x31 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 0x24 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 0x25 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_E822_PHY 0x30 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_C827 0x31 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX 0x47 +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_GPS 0x48 u8 rsvd[9]; }; @@ -2125,6 +2157,193 @@ struct ice_aqc_get_pkg_info_resp { struct ice_aqc_get_pkg_info pkg_info[]; }; +/* Get CGU abilities command response data structure (indirect 0x0C61) */ +struct ice_aqc_get_cgu_abilities { + u8 num_inputs; + u8 num_outputs; + u8 pps_dpll_idx; + u8 eec_dpll_idx; + __le32 max_in_freq; + __le32 max_in_phase_adj; + __le32 max_out_freq; + __le32 max_out_phase_adj; + u8 cgu_part_num; + u8 rsvd[3]; +}; + +/* Set CGU input config (direct 0x0C62) */ +struct ice_aqc_set_cgu_input_config { + u8 input_idx; + u8 flags1; +#define ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_FREQ BIT(6) +#define ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_DELAY BIT(7) + u8 flags2; +#define ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN BIT(5) +#define ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN BIT(6) + u8 rsvd; + __le32 freq; + __le32 phase_delay; + u8 rsvd2[2]; + __le16 node_handle; +}; + +/* Get CGU input config response descriptor structure (direct 0x0C63) */ +struct ice_aqc_get_cgu_input_config { + u8 input_idx; + u8 status; +#define ICE_AQC_GET_CGU_IN_CFG_STATUS_LOS BIT(0) +#define ICE_AQC_GET_CGU_IN_CFG_STATUS_SCM_FAIL BIT(1) +#define ICE_AQC_GET_CGU_IN_CFG_STATUS_CFM_FAIL BIT(2) +#define ICE_AQC_GET_CGU_IN_CFG_STATUS_GST_FAIL BIT(3) +#define ICE_AQC_GET_CGU_IN_CFG_STATUS_PFM_FAIL BIT(4) +#define ICE_AQC_GET_CGU_IN_CFG_STATUS_ESYNC_FAIL BIT(6) +#define ICE_AQC_GET_CGU_IN_CFG_STATUS_ESYNC_CAP BIT(7) + u8 type; +#define ICE_AQC_GET_CGU_IN_CFG_TYPE_READ_ONLY BIT(0) +#define ICE_AQC_GET_CGU_IN_CFG_TYPE_GPS BIT(4) +#define ICE_AQC_GET_CGU_IN_CFG_TYPE_EXTERNAL BIT(5) +#define ICE_AQC_GET_CGU_IN_CFG_TYPE_PHY BIT(6) + u8 flags1; +#define ICE_AQC_GET_CGU_IN_CFG_FLG1_PHASE_DELAY_SUPP BIT(0) +#define ICE_AQC_GET_CGU_IN_CFG_FLG1_1PPS_SUPP BIT(2) +#define ICE_AQC_GET_CGU_IN_CFG_FLG1_10MHZ_SUPP BIT(3) +#define ICE_AQC_GET_CGU_IN_CFG_FLG1_ANYFREQ BIT(7) + __le32 freq; + __le32 phase_delay; + u8 flags2; +#define ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN BIT(5) +#define ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN BIT(6) + u8 rsvd[1]; + __le16 node_handle; +}; + +/* Set CGU output config (direct 0x0C64) */ +struct ice_aqc_set_cgu_output_config { + u8 output_idx; + u8 flags; +#define ICE_AQC_SET_CGU_OUT_CFG_OUT_EN BIT(0) +#define ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN BIT(1) +#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_FREQ BIT(2) +#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_PHASE BIT(3) +#define ICE_AQC_SET_CGU_OUT_CFG_UPDATE_SRC_SEL BIT(4) + u8 src_sel; +#define ICE_AQC_SET_CGU_OUT_CFG_DPLL_SRC_SEL ICE_M(0x1F, 0) + u8 rsvd; + __le32 freq; + __le32 phase_delay; + u8 rsvd2[2]; + __le16 node_handle; +}; + +/* Get CGU output config (direct 0x0C65) */ +struct ice_aqc_get_cgu_output_config { + u8 output_idx; + u8 flags; +#define ICE_AQC_GET_CGU_OUT_CFG_OUT_EN BIT(0) +#define ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN BIT(1) +#define ICE_AQC_GET_CGU_OUT_CFG_ESYNC_ABILITY BIT(2) + u8 src_sel; +#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT 0 +#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL \ + ICE_M(0x1F, ICE_AQC_GET_CGU_OUT_CFG_DPLL_SRC_SEL_SHIFT) +#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT 5 +#define ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE \ + ICE_M(0x7, ICE_AQC_GET_CGU_OUT_CFG_DPLL_MODE_SHIFT) + u8 rsvd; + __le32 freq; + __le32 src_freq; + u8 rsvd2[2]; + __le16 node_handle; +}; + +/* Get CGU DPLL status (direct 0x0C66) */ +struct ice_aqc_get_cgu_dpll_status { + u8 dpll_num; + u8 ref_state; +#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_LOS BIT(0) +#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_SCM BIT(1) +#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_CFM BIT(2) +#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_GST BIT(3) +#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_PFM BIT(4) +#define ICE_AQC_GET_CGU_DPLL_STATUS_FAST_LOCK_EN BIT(5) +#define ICE_AQC_GET_CGU_DPLL_STATUS_REF_SW_ESYNC BIT(6) + u8 dpll_state; +#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_LOCK BIT(0) +#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO BIT(1) +#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO_READY BIT(2) +#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_FLHIT BIT(5) +#define ICE_AQC_GET_CGU_DPLL_STATUS_STATE_PSLHIT BIT(7) + u8 config; +#define ICE_AQC_GET_CGU_DPLL_CONFIG_CLK_REF_SEL ICE_M(0x1F, 0) +#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT 5 +#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE \ + ICE_M(0x7, ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT) +#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_FREERUN 0 +#define ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_AUTOMATIC \ + ICE_M(0x3, ICE_AQC_GET_CGU_DPLL_CONFIG_MODE_SHIFT) + __le32 phase_offset_h; + __le32 phase_offset_l; + u8 eec_mode; +#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_1 0xA +#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_2 0xB +#define ICE_AQC_GET_CGU_DPLL_STATUS_EEC_MODE_UNKNOWN 0xF + u8 rsvd[1]; + __le16 node_handle; +}; + +/* Set CGU DPLL config (direct 0x0C67) */ +struct ice_aqc_set_cgu_dpll_config { + u8 dpll_num; + u8 ref_state; +#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_LOS BIT(0) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_SCM BIT(1) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_CFM BIT(2) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_GST BIT(3) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_PFM BIT(4) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_FLOCK_EN BIT(5) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_REF_SW_ESYNC BIT(6) + u8 rsvd; + u8 config; +#define ICE_AQC_SET_CGU_DPLL_CONFIG_CLK_REF_SEL ICE_M(0x1F, 0) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT 5 +#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE \ + ICE_M(0x7, ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT) +#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_FREERUN 0 +#define ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_AUTOMATIC \ + ICE_M(0x3, ICE_AQC_SET_CGU_DPLL_CONFIG_MODE_SHIFT) + u8 rsvd2[8]; + u8 eec_mode; + u8 rsvd3[1]; + __le16 node_handle; +}; + +/* Set CGU reference priority (direct 0x0C68) */ +struct ice_aqc_set_cgu_ref_prio { + u8 dpll_num; + u8 ref_idx; + u8 ref_priority; + u8 rsvd[11]; + __le16 node_handle; +}; + +/* Get CGU reference priority (direct 0x0C69) */ +struct ice_aqc_get_cgu_ref_prio { + u8 dpll_num; + u8 ref_idx; + u8 ref_priority; /* Valid only in response */ + u8 rsvd[13]; +}; + +/* Get CGU info (direct 0x0C6A) */ +struct ice_aqc_get_cgu_info { + __le32 cgu_id; + __le32 cgu_cfg_ver; + __le32 cgu_fw_ver; + u8 node_part_num; + u8 dev_rev; + __le16 node_handle; +}; + /* Driver Shared Parameters (direct, 0x0C90) */ struct ice_aqc_driver_shared_params { u8 set_or_get_op; @@ -2194,6 +2413,8 @@ struct ice_aq_desc { struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_set_phy_cfg set_phy; struct ice_aqc_restart_an restart_an; + struct ice_aqc_set_phy_rec_clk_out set_phy_rec_clk_out; + struct ice_aqc_get_phy_rec_clk_out get_phy_rec_clk_out; struct ice_aqc_gpio read_write_gpio; struct ice_aqc_sff_eeprom read_write_sff_param; struct ice_aqc_set_port_id_led set_port_id_led; @@ -2234,6 +2455,15 @@ struct ice_aq_desc { struct ice_aqc_fw_logging fw_logging; struct ice_aqc_get_clear_fw_log get_clear_fw_log; struct ice_aqc_download_pkg download_pkg; + struct ice_aqc_set_cgu_input_config set_cgu_input_config; + struct ice_aqc_get_cgu_input_config get_cgu_input_config; + struct ice_aqc_set_cgu_output_config set_cgu_output_config; + struct ice_aqc_get_cgu_output_config get_cgu_output_config; + struct ice_aqc_get_cgu_dpll_status get_cgu_dpll_status; + struct ice_aqc_set_cgu_dpll_config set_cgu_dpll_config; + struct ice_aqc_set_cgu_ref_prio set_cgu_ref_prio; + struct ice_aqc_get_cgu_ref_prio get_cgu_ref_prio; + struct ice_aqc_get_cgu_info get_cgu_info; struct ice_aqc_driver_shared_params drv_shared_params; struct ice_aqc_set_mac_lb set_mac_lb; struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; @@ -2358,6 +2588,8 @@ enum ice_adminq_opc { ice_aqc_opc_get_link_status = 0x0607, ice_aqc_opc_set_event_mask = 0x0613, ice_aqc_opc_set_mac_lb = 0x0620, + ice_aqc_opc_set_phy_rec_clk_out = 0x0630, + ice_aqc_opc_get_phy_rec_clk_out = 0x0631, ice_aqc_opc_get_link_topo = 0x06E0, ice_aqc_opc_read_i2c = 0x06E2, ice_aqc_opc_write_i2c = 0x06E3, @@ -2413,6 +2645,18 @@ enum ice_adminq_opc { ice_aqc_opc_update_pkg = 0x0C42, ice_aqc_opc_get_pkg_info_list = 0x0C43, + /* 1588/SyncE commands/events */ + ice_aqc_opc_get_cgu_abilities = 0x0C61, + ice_aqc_opc_set_cgu_input_config = 0x0C62, + ice_aqc_opc_get_cgu_input_config = 0x0C63, + ice_aqc_opc_set_cgu_output_config = 0x0C64, + ice_aqc_opc_get_cgu_output_config = 0x0C65, + ice_aqc_opc_get_cgu_dpll_status = 0x0C66, + ice_aqc_opc_set_cgu_dpll_config = 0x0C67, + ice_aqc_opc_set_cgu_ref_prio = 0x0C68, + ice_aqc_opc_get_cgu_ref_prio = 0x0C69, + ice_aqc_opc_get_cgu_info = 0x0C6A, + ice_aqc_opc_driver_shared_params = 0x0C90, /* Standalone Commands/Events */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 80deca45ab59..8f31ae449948 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -8,6 +8,7 @@ #include "ice_ptp_hw.h" #define ICE_PF_RESET_WAIT_COUNT 300 +#define ICE_MAX_NETLIST_SIZE 10 static const char * const ice_link_mode_str_low[] = { [0] = "100BASE_TX", @@ -436,6 +437,81 @@ ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type, } /** + * ice_aq_get_netlist_node + * @hw: pointer to the hw struct + * @cmd: get_link_topo AQ structure + * @node_part_number: output node part number if node found + * @node_handle: output node handle parameter if node found + * + * Get netlist node handle. + */ +int +ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd, + u8 *node_part_number, u16 *node_handle) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); + desc.params.get_link_topo = *cmd; + + if (ice_aq_send_cmd(hw, &desc, NULL, 0, NULL)) + return -EINTR; + + if (node_handle) + *node_handle = + le16_to_cpu(desc.params.get_link_topo.addr.handle); + if (node_part_number) + *node_part_number = desc.params.get_link_topo.node_part_num; + + return 0; +} + +/** + * ice_find_netlist_node + * @hw: pointer to the hw struct + * @node_type_ctx: type of netlist node to look for + * @node_part_number: node part number to look for + * @node_handle: output parameter if node found - optional + * + * Find and return the node handle for a given node type and part number in the + * netlist. When found ICE_SUCCESS is returned, ICE_ERR_DOES_NOT_EXIST + * otherwise. If node_handle provided, it would be set to found node handle. + */ +int +ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, u8 node_part_number, + u16 *node_handle) +{ + struct ice_aqc_get_link_topo cmd; + u8 rec_node_part_number; + u16 rec_node_handle; + u8 idx; + + for (idx = 0; idx < ICE_MAX_NETLIST_SIZE; idx++) { + int status; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.addr.topo_params.node_type_ctx = + (node_type_ctx << ICE_AQC_LINK_TOPO_NODE_TYPE_S); + cmd.addr.topo_params.index = idx; + + status = ice_aq_get_netlist_node(hw, &cmd, + &rec_node_part_number, + &rec_node_handle); + if (status) + return status; + + if (rec_node_part_number == node_part_number) { + if (node_handle) + *node_handle = rec_node_handle; + return 0; + } + } + + return -ENOTBLK; +} + +/** * ice_is_media_cage_present * @pi: port information structure * @@ -2655,33 +2731,6 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, } /** - * ice_aq_get_netlist_node - * @hw: pointer to the hw struct - * @cmd: get_link_topo AQ structure - * @node_part_number: output node part number if node found - * @node_handle: output node handle parameter if node found - */ -static int -ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd, - u8 *node_part_number, u16 *node_handle) -{ - struct ice_aq_desc desc; - - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); - desc.params.get_link_topo = *cmd; - - if (ice_aq_send_cmd(hw, &desc, NULL, 0, NULL)) - return -EIO; - - if (node_handle) - *node_handle = le16_to_cpu(desc.params.get_link_topo.addr.handle); - if (node_part_number) - *node_part_number = desc.params.get_link_topo.node_part_num; - - return 0; -} - -/** * ice_is_pf_c827 - check if pf contains c827 phy * @hw: pointer to the hw struct */ @@ -2716,6 +2765,21 @@ bool ice_is_pf_c827(struct ice_hw *hw) } /** + * ice_is_gps_in_netlist + * @hw: pointer to the hw struct + * + * Check if the GPS generic device is present in the netlist + */ +bool ice_is_gps_in_netlist(struct ice_hw *hw) +{ + if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_GPS, + ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_GPS, NULL)) + return false; + + return true; +} + +/** * ice_aq_list_caps - query function/device capabilities * @hw: pointer to the HW struct * @buf: a buffer to hold the capabilities @@ -4999,6 +5063,395 @@ ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid, } /** + * ice_aq_get_cgu_abilities - get cgu abilities + * @hw: pointer to the HW struct + * @abilities: CGU abilities + * + * Get CGU abilities (0x0C61) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_get_cgu_abilities(struct ice_hw *hw, + struct ice_aqc_get_cgu_abilities *abilities) +{ + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_abilities); + return ice_aq_send_cmd(hw, &desc, abilities, sizeof(*abilities), NULL); +} + +/** + * ice_aq_set_input_pin_cfg - set input pin config + * @hw: pointer to the HW struct + * @input_idx: Input index + * @flags1: Input flags + * @flags2: Input flags + * @freq: Frequency in Hz + * @phase_delay: Delay in ps + * + * Set CGU input config (0x0C62) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_set_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 flags1, u8 flags2, + u32 freq, s32 phase_delay) +{ + struct ice_aqc_set_cgu_input_config *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_input_config); + cmd = &desc.params.set_cgu_input_config; + cmd->input_idx = input_idx; + cmd->flags1 = flags1; + cmd->flags2 = flags2; + cmd->freq = cpu_to_le32(freq); + cmd->phase_delay = cpu_to_le32(phase_delay); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_aq_get_input_pin_cfg - get input pin config + * @hw: pointer to the HW struct + * @input_idx: Input index + * @status: Pin status + * @type: Pin type + * @flags1: Input flags + * @flags2: Input flags + * @freq: Frequency in Hz + * @phase_delay: Delay in ps + * + * Get CGU input config (0x0C63) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_get_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 *status, u8 *type, + u8 *flags1, u8 *flags2, u32 *freq, s32 *phase_delay) +{ + struct ice_aqc_get_cgu_input_config *cmd; + struct ice_aq_desc desc; + int ret; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_input_config); + cmd = &desc.params.get_cgu_input_config; + cmd->input_idx = input_idx; + + ret = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (!ret) { + if (status) + *status = cmd->status; + if (type) + *type = cmd->type; + if (flags1) + *flags1 = cmd->flags1; + if (flags2) + *flags2 = cmd->flags2; + if (freq) + *freq = le32_to_cpu(cmd->freq); + if (phase_delay) + *phase_delay = le32_to_cpu(cmd->phase_delay); + } + + return ret; +} + +/** + * ice_aq_set_output_pin_cfg - set output pin config + * @hw: pointer to the HW struct + * @output_idx: Output index + * @flags: Output flags + * @src_sel: Index of DPLL block + * @freq: Output frequency + * @phase_delay: Output phase compensation + * + * Set CGU output config (0x0C64) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_set_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 flags, + u8 src_sel, u32 freq, s32 phase_delay) +{ + struct ice_aqc_set_cgu_output_config *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_output_config); + cmd = &desc.params.set_cgu_output_config; + cmd->output_idx = output_idx; + cmd->flags = flags; + cmd->src_sel = src_sel; + cmd->freq = cpu_to_le32(freq); + cmd->phase_delay = cpu_to_le32(phase_delay); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_aq_get_output_pin_cfg - get output pin config + * @hw: pointer to the HW struct + * @output_idx: Output index + * @flags: Output flags + * @src_sel: Internal DPLL source + * @freq: Output frequency + * @src_freq: Source frequency + * + * Get CGU output config (0x0C65) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_get_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 *flags, + u8 *src_sel, u32 *freq, u32 *src_freq) +{ + struct ice_aqc_get_cgu_output_config *cmd; + struct ice_aq_desc desc; + int ret; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_output_config); + cmd = &desc.params.get_cgu_output_config; + cmd->output_idx = output_idx; + + ret = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (!ret) { + if (flags) + *flags = cmd->flags; + if (src_sel) + *src_sel = cmd->src_sel; + if (freq) + *freq = le32_to_cpu(cmd->freq); + if (src_freq) + *src_freq = le32_to_cpu(cmd->src_freq); + } + + return ret; +} + +/** + * ice_aq_get_cgu_dpll_status - get dpll status + * @hw: pointer to the HW struct + * @dpll_num: DPLL index + * @ref_state: Reference clock state + * @config: current DPLL config + * @dpll_state: current DPLL state + * @phase_offset: Phase offset in ns + * @eec_mode: EEC_mode + * + * Get CGU DPLL status (0x0C66) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state, + u8 *dpll_state, u8 *config, s64 *phase_offset, + u8 *eec_mode) +{ + struct ice_aqc_get_cgu_dpll_status *cmd; + const s64 nsec_per_psec = 1000LL; + struct ice_aq_desc desc; + int status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_dpll_status); + cmd = &desc.params.get_cgu_dpll_status; + cmd->dpll_num = dpll_num; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (!status) { + *ref_state = cmd->ref_state; + *dpll_state = cmd->dpll_state; + *config = cmd->config; + *phase_offset = le32_to_cpu(cmd->phase_offset_h); + *phase_offset <<= 32; + *phase_offset += le32_to_cpu(cmd->phase_offset_l); + *phase_offset = div64_s64(sign_extend64(*phase_offset, 47), + nsec_per_psec); + *eec_mode = cmd->eec_mode; + } + + return status; +} + +/** + * ice_aq_set_cgu_dpll_config - set dpll config + * @hw: pointer to the HW struct + * @dpll_num: DPLL index + * @ref_state: Reference clock state + * @config: DPLL config + * @eec_mode: EEC mode + * + * Set CGU DPLL config (0x0C67) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_set_cgu_dpll_config(struct ice_hw *hw, u8 dpll_num, u8 ref_state, + u8 config, u8 eec_mode) +{ + struct ice_aqc_set_cgu_dpll_config *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_dpll_config); + cmd = &desc.params.set_cgu_dpll_config; + cmd->dpll_num = dpll_num; + cmd->ref_state = ref_state; + cmd->config = config; + cmd->eec_mode = eec_mode; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_aq_set_cgu_ref_prio - set input reference priority + * @hw: pointer to the HW struct + * @dpll_num: DPLL index + * @ref_idx: Reference pin index + * @ref_priority: Reference input priority + * + * Set CGU reference priority (0x0C68) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_set_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx, + u8 ref_priority) +{ + struct ice_aqc_set_cgu_ref_prio *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_cgu_ref_prio); + cmd = &desc.params.set_cgu_ref_prio; + cmd->dpll_num = dpll_num; + cmd->ref_idx = ref_idx; + cmd->ref_priority = ref_priority; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** + * ice_aq_get_cgu_ref_prio - get input reference priority + * @hw: pointer to the HW struct + * @dpll_num: DPLL index + * @ref_idx: Reference pin index + * @ref_prio: Reference input priority + * + * Get CGU reference priority (0x0C69) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_get_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx, + u8 *ref_prio) +{ + struct ice_aqc_get_cgu_ref_prio *cmd; + struct ice_aq_desc desc; + int status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_ref_prio); + cmd = &desc.params.get_cgu_ref_prio; + cmd->dpll_num = dpll_num; + cmd->ref_idx = ref_idx; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (!status) + *ref_prio = cmd->ref_priority; + + return status; +} + +/** + * ice_aq_get_cgu_info - get cgu info + * @hw: pointer to the HW struct + * @cgu_id: CGU ID + * @cgu_cfg_ver: CGU config version + * @cgu_fw_ver: CGU firmware version + * + * Get CGU info (0x0C6A) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_get_cgu_info(struct ice_hw *hw, u32 *cgu_id, u32 *cgu_cfg_ver, + u32 *cgu_fw_ver) +{ + struct ice_aqc_get_cgu_info *cmd; + struct ice_aq_desc desc; + int status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cgu_info); + cmd = &desc.params.get_cgu_info; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (!status) { + *cgu_id = le32_to_cpu(cmd->cgu_id); + *cgu_cfg_ver = le32_to_cpu(cmd->cgu_cfg_ver); + *cgu_fw_ver = le32_to_cpu(cmd->cgu_fw_ver); + } + + return status; +} + +/** + * ice_aq_set_phy_rec_clk_out - set RCLK phy out + * @hw: pointer to the HW struct + * @phy_output: PHY reference clock output pin + * @enable: GPIO state to be applied + * @freq: PHY output frequency + * + * Set phy recovered clock as reference (0x0630) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_set_phy_rec_clk_out(struct ice_hw *hw, u8 phy_output, bool enable, + u32 *freq) +{ + struct ice_aqc_set_phy_rec_clk_out *cmd; + struct ice_aq_desc desc; + int status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_rec_clk_out); + cmd = &desc.params.set_phy_rec_clk_out; + cmd->phy_output = phy_output; + cmd->port_num = ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT; + cmd->flags = enable & ICE_AQC_SET_PHY_REC_CLK_OUT_OUT_EN; + cmd->freq = cpu_to_le32(*freq); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (!status) + *freq = le32_to_cpu(cmd->freq); + + return status; +} + +/** + * ice_aq_get_phy_rec_clk_out - get phy recovered signal info + * @hw: pointer to the HW struct + * @phy_output: PHY reference clock output pin + * @port_num: Port number + * @flags: PHY flags + * @node_handle: PHY output frequency + * + * Get PHY recovered clock output info (0x0631) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_get_phy_rec_clk_out(struct ice_hw *hw, u8 *phy_output, u8 *port_num, + u8 *flags, u16 *node_handle) +{ + struct ice_aqc_get_phy_rec_clk_out *cmd; + struct ice_aq_desc desc; + int status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_rec_clk_out); + cmd = &desc.params.get_phy_rec_clk_out; + cmd->phy_output = *phy_output; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (!status) { + *phy_output = cmd->phy_output; + if (port_num) + *port_num = cmd->port_num; + if (flags) + *flags = cmd->flags; + if (node_handle) + *node_handle = le16_to_cpu(cmd->node_handle); + } + + return status; +} + +/** * ice_replay_pre_init - replay pre initialization * @hw: pointer to the HW struct * diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 226b81f97a92..47a75651ca38 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -93,6 +93,13 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, struct ice_aqc_get_phy_caps_data *caps, struct ice_sq_cd *cd); bool ice_is_pf_c827(struct ice_hw *hw); +bool ice_is_gps_in_netlist(struct ice_hw *hw); +int +ice_find_netlist_node(struct ice_hw *hw, u8 node_type_ctx, u8 node_part_number, + u16 *node_handle); +int +ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd, + u8 *node_part_number, u16 *node_handle); int ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count, enum ice_adminq_opc opc, struct ice_sq_cd *cd); @@ -196,6 +203,44 @@ void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf); struct ice_q_ctx * ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle); int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in); +int +ice_aq_get_cgu_abilities(struct ice_hw *hw, + struct ice_aqc_get_cgu_abilities *abilities); +int +ice_aq_set_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 flags1, u8 flags2, + u32 freq, s32 phase_delay); +int +ice_aq_get_input_pin_cfg(struct ice_hw *hw, u8 input_idx, u8 *status, u8 *type, + u8 *flags1, u8 *flags2, u32 *freq, s32 *phase_delay); +int +ice_aq_set_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 flags, + u8 src_sel, u32 freq, s32 phase_delay); +int +ice_aq_get_output_pin_cfg(struct ice_hw *hw, u8 output_idx, u8 *flags, + u8 *src_sel, u32 *freq, u32 *src_freq); +int +ice_aq_get_cgu_dpll_status(struct ice_hw *hw, u8 dpll_num, u8 *ref_state, + u8 *dpll_state, u8 *config, s64 *phase_offset, + u8 *eec_mode); +int +ice_aq_set_cgu_dpll_config(struct ice_hw *hw, u8 dpll_num, u8 ref_state, + u8 config, u8 eec_mode); +int +ice_aq_set_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx, + u8 ref_priority); +int +ice_aq_get_cgu_ref_prio(struct ice_hw *hw, u8 dpll_num, u8 ref_idx, + u8 *ref_prio); +int +ice_aq_get_cgu_info(struct ice_hw *hw, u32 *cgu_id, u32 *cgu_cfg_ver, + u32 *cgu_fw_ver); + +int +ice_aq_set_phy_rec_clk_out(struct ice_hw *hw, u8 phy_output, bool enable, + u32 *freq); +int +ice_aq_get_phy_rec_clk_out(struct ice_hw *hw, u8 *phy_output, u8 *port_num, + u8 *flags, u16 *node_handle); void ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c new file mode 100644 index 000000000000..1faee9cb944d --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -0,0 +1,1904 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include "ice_trace.h" +#include <linux/dpll.h> + +#define ICE_CGU_STATE_ACQ_ERR_THRESHOLD 50 +#define ICE_DPLL_PIN_IDX_INVALID 0xff +#define ICE_DPLL_RCLK_NUM_PER_PF 1 + +/** + * enum ice_dpll_pin_type - enumerate ice pin types: + * @ICE_DPLL_PIN_INVALID: invalid pin type + * @ICE_DPLL_PIN_TYPE_INPUT: input pin + * @ICE_DPLL_PIN_TYPE_OUTPUT: output pin + * @ICE_DPLL_PIN_TYPE_RCLK_INPUT: recovery clock input pin + */ +enum ice_dpll_pin_type { + ICE_DPLL_PIN_INVALID, + ICE_DPLL_PIN_TYPE_INPUT, + ICE_DPLL_PIN_TYPE_OUTPUT, + ICE_DPLL_PIN_TYPE_RCLK_INPUT, +}; + +static const char * const pin_type_name[] = { + [ICE_DPLL_PIN_TYPE_INPUT] = "input", + [ICE_DPLL_PIN_TYPE_OUTPUT] = "output", + [ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input", +}; + +/** + * ice_dpll_pin_freq_set - set pin's frequency + * @pf: private board structure + * @pin: pointer to a pin + * @pin_type: type of pin being configured + * @freq: frequency to be set + * @extack: error reporting + * + * Set requested frequency on a pin. + * + * Context: Called under pf->dplls.lock + * Return: + * * 0 - success + * * negative - error on AQ or wrong pin type given + */ +static int +ice_dpll_pin_freq_set(struct ice_pf *pf, struct ice_dpll_pin *pin, + enum ice_dpll_pin_type pin_type, const u32 freq, + struct netlink_ext_ack *extack) +{ + u8 flags; + int ret; + + switch (pin_type) { + case ICE_DPLL_PIN_TYPE_INPUT: + flags = ICE_AQC_SET_CGU_IN_CFG_FLG1_UPDATE_FREQ; + ret = ice_aq_set_input_pin_cfg(&pf->hw, pin->idx, flags, + pin->flags[0], freq, 0); + break; + case ICE_DPLL_PIN_TYPE_OUTPUT: + flags = ICE_AQC_SET_CGU_OUT_CFG_UPDATE_FREQ; + ret = ice_aq_set_output_pin_cfg(&pf->hw, pin->idx, flags, + 0, freq, 0); + break; + default: + return -EINVAL; + } + if (ret) { + NL_SET_ERR_MSG_FMT(extack, + "err:%d %s failed to set pin freq:%u on pin:%u\n", + ret, + ice_aq_str(pf->hw.adminq.sq_last_status), + freq, pin->idx); + return ret; + } + pin->freq = freq; + + return 0; +} + +/** + * ice_dpll_frequency_set - wrapper for pin callback for set frequency + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: pointer to dpll + * @dpll_priv: private data pointer passed on dpll registration + * @frequency: frequency to be set + * @extack: error reporting + * @pin_type: type of pin being configured + * + * Wraps internal set frequency command on a pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error pin not found or couldn't set in hw + */ +static int +ice_dpll_frequency_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u32 frequency, + struct netlink_ext_ack *extack, + enum ice_dpll_pin_type pin_type) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + int ret; + + mutex_lock(&pf->dplls.lock); + ret = ice_dpll_pin_freq_set(pf, p, pin_type, frequency, extack); + mutex_unlock(&pf->dplls.lock); + + return ret; +} + +/** + * ice_dpll_input_frequency_set - input pin callback for set frequency + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: pointer to dpll + * @dpll_priv: private data pointer passed on dpll registration + * @frequency: frequency to be set + * @extack: error reporting + * + * Wraps internal set frequency command on a pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error pin not found or couldn't set in hw + */ +static int +ice_dpll_input_frequency_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 frequency, struct netlink_ext_ack *extack) +{ + return ice_dpll_frequency_set(pin, pin_priv, dpll, dpll_priv, frequency, + extack, ICE_DPLL_PIN_TYPE_INPUT); +} + +/** + * ice_dpll_output_frequency_set - output pin callback for set frequency + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: pointer to dpll + * @dpll_priv: private data pointer passed on dpll registration + * @frequency: frequency to be set + * @extack: error reporting + * + * Wraps internal set frequency command on a pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error pin not found or couldn't set in hw + */ +static int +ice_dpll_output_frequency_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 frequency, struct netlink_ext_ack *extack) +{ + return ice_dpll_frequency_set(pin, pin_priv, dpll, dpll_priv, frequency, + extack, ICE_DPLL_PIN_TYPE_OUTPUT); +} + +/** + * ice_dpll_frequency_get - wrapper for pin callback for get frequency + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: pointer to dpll + * @dpll_priv: private data pointer passed on dpll registration + * @frequency: on success holds pin's frequency + * @extack: error reporting + * @pin_type: type of pin being configured + * + * Wraps internal get frequency command of a pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error pin not found or couldn't get from hw + */ +static int +ice_dpll_frequency_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack, + enum ice_dpll_pin_type pin_type) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + + mutex_lock(&pf->dplls.lock); + *frequency = p->freq; + mutex_unlock(&pf->dplls.lock); + + return 0; +} + +/** + * ice_dpll_input_frequency_get - input pin callback for get frequency + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: pointer to dpll + * @dpll_priv: private data pointer passed on dpll registration + * @frequency: on success holds pin's frequency + * @extack: error reporting + * + * Wraps internal get frequency command of a input pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error pin not found or couldn't get from hw + */ +static int +ice_dpll_input_frequency_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack) +{ + return ice_dpll_frequency_get(pin, pin_priv, dpll, dpll_priv, frequency, + extack, ICE_DPLL_PIN_TYPE_INPUT); +} + +/** + * ice_dpll_output_frequency_get - output pin callback for get frequency + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: pointer to dpll + * @dpll_priv: private data pointer passed on dpll registration + * @frequency: on success holds pin's frequency + * @extack: error reporting + * + * Wraps internal get frequency command of a pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - error pin not found or couldn't get from hw + */ +static int +ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack) +{ + return ice_dpll_frequency_get(pin, pin_priv, dpll, dpll_priv, frequency, + extack, ICE_DPLL_PIN_TYPE_OUTPUT); +} + +/** + * ice_dpll_pin_enable - enable a pin on dplls + * @hw: board private hw structure + * @pin: pointer to a pin + * @pin_type: type of pin being enabled + * @extack: error reporting + * + * Enable a pin on both dplls. Store current state in pin->flags. + * + * Context: Called under pf->dplls.lock + * Return: + * * 0 - OK + * * negative - error + */ +static int +ice_dpll_pin_enable(struct ice_hw *hw, struct ice_dpll_pin *pin, + enum ice_dpll_pin_type pin_type, + struct netlink_ext_ack *extack) +{ + u8 flags = 0; + int ret; + + switch (pin_type) { + case ICE_DPLL_PIN_TYPE_INPUT: + if (pin->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN) + flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN; + flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_INPUT_EN; + ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0); + break; + case ICE_DPLL_PIN_TYPE_OUTPUT: + if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) + flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN; + flags |= ICE_AQC_SET_CGU_OUT_CFG_OUT_EN; + ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0); + break; + default: + return -EINVAL; + } + if (ret) + NL_SET_ERR_MSG_FMT(extack, + "err:%d %s failed to enable %s pin:%u\n", + ret, ice_aq_str(hw->adminq.sq_last_status), + pin_type_name[pin_type], pin->idx); + + return ret; +} + +/** + * ice_dpll_pin_disable - disable a pin on dplls + * @hw: board private hw structure + * @pin: pointer to a pin + * @pin_type: type of pin being disabled + * @extack: error reporting + * + * Disable a pin on both dplls. Store current state in pin->flags. + * + * Context: Called under pf->dplls.lock + * Return: + * * 0 - OK + * * negative - error + */ +static int +ice_dpll_pin_disable(struct ice_hw *hw, struct ice_dpll_pin *pin, + enum ice_dpll_pin_type pin_type, + struct netlink_ext_ack *extack) +{ + u8 flags = 0; + int ret; + + switch (pin_type) { + case ICE_DPLL_PIN_TYPE_INPUT: + if (pin->flags[0] & ICE_AQC_GET_CGU_IN_CFG_FLG2_ESYNC_EN) + flags |= ICE_AQC_SET_CGU_IN_CFG_FLG2_ESYNC_EN; + ret = ice_aq_set_input_pin_cfg(hw, pin->idx, 0, flags, 0, 0); + break; + case ICE_DPLL_PIN_TYPE_OUTPUT: + if (pin->flags[0] & ICE_AQC_GET_CGU_OUT_CFG_ESYNC_EN) + flags |= ICE_AQC_SET_CGU_OUT_CFG_ESYNC_EN; + ret = ice_aq_set_output_pin_cfg(hw, pin->idx, flags, 0, 0, 0); + break; + default: + return -EINVAL; + } + if (ret) + NL_SET_ERR_MSG_FMT(extack, + "err:%d %s failed to disable %s pin:%u\n", + ret, ice_aq_str(hw->adminq.sq_last_status), + pin_type_name[pin_type], pin->idx); + + return ret; +} + +/** + * ice_dpll_pin_state_update - update pin's state + * @pf: private board struct + * @pin: structure with pin attributes to be updated + * @pin_type: type of pin being updated + * @extack: error reporting + * + * Determine pin current state and frequency, then update struct + * holding the pin info. For input pin states are separated for each + * dpll, for rclk pins states are separated for each parent. + * + * Context: Called under pf->dplls.lock + * Return: + * * 0 - OK + * * negative - error + */ +static int +ice_dpll_pin_state_update(struct ice_pf *pf, struct ice_dpll_pin *pin, + enum ice_dpll_pin_type pin_type, + struct netlink_ext_ack *extack) +{ + u8 parent, port_num = ICE_AQC_SET_PHY_REC_CLK_OUT_CURR_PORT; + int ret; + + switch (pin_type) { + case ICE_DPLL_PIN_TYPE_INPUT: + ret = ice_aq_get_input_pin_cfg(&pf->hw, pin->idx, NULL, NULL, + NULL, &pin->flags[0], + &pin->freq, NULL); + if (ret) + goto err; + if (ICE_AQC_GET_CGU_IN_CFG_FLG2_INPUT_EN & pin->flags[0]) { + if (pin->pin) { + pin->state[pf->dplls.eec.dpll_idx] = + pin->pin == pf->dplls.eec.active_input ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_SELECTABLE; + pin->state[pf->dplls.pps.dpll_idx] = + pin->pin == pf->dplls.pps.active_input ? + DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_SELECTABLE; + } else { + pin->state[pf->dplls.eec.dpll_idx] = + DPLL_PIN_STATE_SELECTABLE; + pin->state[pf->dplls.pps.dpll_idx] = + DPLL_PIN_STATE_SELECTABLE; + } + } else { + pin->state[pf->dplls.eec.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + pin->state[pf->dplls.pps.dpll_idx] = + DPLL_PIN_STATE_DISCONNECTED; + } + break; + case ICE_DPLL_PIN_TYPE_OUTPUT: + ret = ice_aq_get_output_pin_cfg(&pf->hw, pin->idx, + &pin->flags[0], NULL, + &pin->freq, NULL); + if (ret) + goto err; + if (ICE_AQC_SET_CGU_OUT_CFG_OUT_EN & pin->flags[0]) + pin->state[0] = DPLL_PIN_STATE_CONNECTED; + else + pin->state[0] = DPLL_PIN_STATE_DISCONNECTED; + break; + case ICE_DPLL_PIN_TYPE_RCLK_INPUT: + for (parent = 0; parent < pf->dplls.rclk.num_parents; + parent++) { + u8 p = parent; + + ret = ice_aq_get_phy_rec_clk_out(&pf->hw, &p, + &port_num, + &pin->flags[parent], + NULL); + if (ret) + goto err; + if (ICE_AQC_GET_PHY_REC_CLK_OUT_OUT_EN & + pin->flags[parent]) + pin->state[parent] = DPLL_PIN_STATE_CONNECTED; + else + pin->state[parent] = + DPLL_PIN_STATE_DISCONNECTED; + } + break; + default: + return -EINVAL; + } + + return 0; +err: + if (extack) + NL_SET_ERR_MSG_FMT(extack, + "err:%d %s failed to update %s pin:%u\n", + ret, + ice_aq_str(pf->hw.adminq.sq_last_status), + pin_type_name[pin_type], pin->idx); + else + dev_err_ratelimited(ice_pf_to_dev(pf), + "err:%d %s failed to update %s pin:%u\n", + ret, + ice_aq_str(pf->hw.adminq.sq_last_status), + pin_type_name[pin_type], pin->idx); + return ret; +} + +/** + * ice_dpll_hw_input_prio_set - set input priority value in hardware + * @pf: board private structure + * @dpll: ice dpll pointer + * @pin: ice pin pointer + * @prio: priority value being set on a dpll + * @extack: error reporting + * + * Internal wrapper for setting the priority in the hardware. + * + * Context: Called under pf->dplls.lock + * Return: + * * 0 - success + * * negative - failure + */ +static int +ice_dpll_hw_input_prio_set(struct ice_pf *pf, struct ice_dpll *dpll, + struct ice_dpll_pin *pin, const u32 prio, + struct netlink_ext_ack *extack) +{ + int ret; + + ret = ice_aq_set_cgu_ref_prio(&pf->hw, dpll->dpll_idx, pin->idx, + (u8)prio); + if (ret) + NL_SET_ERR_MSG_FMT(extack, + "err:%d %s failed to set pin prio:%u on pin:%u\n", + ret, + ice_aq_str(pf->hw.adminq.sq_last_status), + prio, pin->idx); + else + dpll->input_prio[pin->idx] = prio; + + return ret; +} + +/** + * ice_dpll_lock_status_get - get dpll lock status callback + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @status: on success holds dpll's lock status + * @extack: error reporting + * + * Dpll subsystem callback, provides dpll's lock status. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failure + */ +static int +ice_dpll_lock_status_get(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack) +{ + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + + mutex_lock(&pf->dplls.lock); + *status = d->dpll_state; + mutex_unlock(&pf->dplls.lock); + + return 0; +} + +/** + * ice_dpll_mode_supported - check if dpll's working mode is supported + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @mode: mode to be checked for support + * @extack: error reporting + * + * Dpll subsystem callback. Provides information if working mode is supported + * by dpll. + * + * Return: + * * true - mode is supported + * * false - mode is not supported + */ +static bool ice_dpll_mode_supported(const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_mode mode, + struct netlink_ext_ack *extack) +{ + if (mode == DPLL_MODE_AUTOMATIC) + return true; + + return false; +} + +/** + * ice_dpll_mode_get - get dpll's working mode + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @mode: on success holds current working mode of dpll + * @extack: error reporting + * + * Dpll subsystem callback. Provides working mode of dpll. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failure + */ +static int ice_dpll_mode_get(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, + struct netlink_ext_ack *extack) +{ + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + + mutex_lock(&pf->dplls.lock); + *mode = d->mode; + mutex_unlock(&pf->dplls.lock); + + return 0; +} + +/** + * ice_dpll_pin_state_set - set pin's state on dpll + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @enable: if pin shalll be enabled + * @extack: error reporting + * @pin_type: type of a pin + * + * Set pin state on a pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - OK or no change required + * * negative - error + */ +static int +ice_dpll_pin_state_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + bool enable, struct netlink_ext_ack *extack, + enum ice_dpll_pin_type pin_type) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + int ret; + + mutex_lock(&pf->dplls.lock); + if (enable) + ret = ice_dpll_pin_enable(&pf->hw, p, pin_type, extack); + else + ret = ice_dpll_pin_disable(&pf->hw, p, pin_type, extack); + if (!ret) + ret = ice_dpll_pin_state_update(pf, p, pin_type, extack); + mutex_unlock(&pf->dplls.lock); + + return ret; +} + +/** + * ice_dpll_output_state_set - enable/disable output pin on dpll device + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: dpll being configured + * @dpll_priv: private data pointer passed on dpll registration + * @state: state of pin to be set + * @extack: error reporting + * + * Dpll subsystem callback. Set given state on output type pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - successfully enabled mode + * * negative - failed to enable mode + */ +static int +ice_dpll_output_state_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + bool enable = state == DPLL_PIN_STATE_CONNECTED; + + return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable, + extack, ICE_DPLL_PIN_TYPE_OUTPUT); +} + +/** + * ice_dpll_input_state_set - enable/disable input pin on dpll levice + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: dpll being configured + * @dpll_priv: private data pointer passed on dpll registration + * @state: state of pin to be set + * @extack: error reporting + * + * Dpll subsystem callback. Enables given mode on input type pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - successfully enabled mode + * * negative - failed to enable mode + */ +static int +ice_dpll_input_state_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + bool enable = state == DPLL_PIN_STATE_SELECTABLE; + + return ice_dpll_pin_state_set(pin, pin_priv, dpll, dpll_priv, enable, + extack, ICE_DPLL_PIN_TYPE_INPUT); +} + +/** + * ice_dpll_pin_state_get - set pin's state on dpll + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @state: on success holds state of the pin + * @extack: error reporting + * @pin_type: type of questioned pin + * + * Determine pin state set it on a pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failed to get state + */ +static int +ice_dpll_pin_state_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack, + enum ice_dpll_pin_type pin_type) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + int ret; + + mutex_lock(&pf->dplls.lock); + ret = ice_dpll_pin_state_update(pf, p, pin_type, extack); + if (ret) + goto unlock; + if (pin_type == ICE_DPLL_PIN_TYPE_INPUT) + *state = p->state[d->dpll_idx]; + else if (pin_type == ICE_DPLL_PIN_TYPE_OUTPUT) + *state = p->state[0]; + ret = 0; +unlock: + mutex_unlock(&pf->dplls.lock); + + return ret; +} + +/** + * ice_dpll_output_state_get - get output pin state on dpll device + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @state: on success holds state of the pin + * @extack: error reporting + * + * Dpll subsystem callback. Check state of a pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failed to get state + */ +static int +ice_dpll_output_state_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + return ice_dpll_pin_state_get(pin, pin_priv, dpll, dpll_priv, state, + extack, ICE_DPLL_PIN_TYPE_OUTPUT); +} + +/** + * ice_dpll_input_state_get - get input pin state on dpll device + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @state: on success holds state of the pin + * @extack: error reporting + * + * Dpll subsystem callback. Check state of a input pin. + * + * Context: Calls a function which acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failed to get state + */ +static int +ice_dpll_input_state_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + return ice_dpll_pin_state_get(pin, pin_priv, dpll, dpll_priv, state, + extack, ICE_DPLL_PIN_TYPE_INPUT); +} + +/** + * ice_dpll_input_prio_get - get dpll's input prio + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @prio: on success - returns input priority on dpll + * @extack: error reporting + * + * Dpll subsystem callback. Handler for getting priority of a input pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failure + */ +static int +ice_dpll_input_prio_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + + mutex_lock(&pf->dplls.lock); + *prio = d->input_prio[p->idx]; + mutex_unlock(&pf->dplls.lock); + + return 0; +} + +/** + * ice_dpll_input_prio_set - set dpll input prio + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @prio: input priority to be set on dpll + * @extack: error reporting + * + * Dpll subsystem callback. Handler for setting priority of a input pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failure + */ +static int +ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 prio, struct netlink_ext_ack *extack) +{ + struct ice_dpll_pin *p = pin_priv; + struct ice_dpll *d = dpll_priv; + struct ice_pf *pf = d->pf; + int ret; + + if (prio > ICE_DPLL_PRIO_MAX) { + NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d", + ICE_DPLL_PRIO_MAX); + return -EINVAL; + } + + mutex_lock(&pf->dplls.lock); + ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack); + mutex_unlock(&pf->dplls.lock); + + return ret; +} + +/** + * ice_dpll_input_direction - callback for get input pin direction + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @direction: holds input pin direction + * @extack: error reporting + * + * Dpll subsystem callback. Handler for getting direction of a input pin. + * + * Return: + * * 0 - success + */ +static int +ice_dpll_input_direction(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack) +{ + *direction = DPLL_PIN_DIRECTION_INPUT; + + return 0; +} + +/** + * ice_dpll_output_direction - callback for get output pin direction + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @dpll: registered dpll pointer + * @dpll_priv: private data pointer passed on dpll registration + * @direction: holds output pin direction + * @extack: error reporting + * + * Dpll subsystem callback. Handler for getting direction of an output pin. + * + * Return: + * * 0 - success + */ +static int +ice_dpll_output_direction(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack) +{ + *direction = DPLL_PIN_DIRECTION_OUTPUT; + + return 0; +} + +/** + * ice_dpll_rclk_state_on_pin_set - set a state on rclk pin + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @parent_pin: pin parent pointer + * @parent_pin_priv: parent private data pointer passed on pin registration + * @state: state to be set on pin + * @extack: error reporting + * + * Dpll subsystem callback, set a state of a rclk pin on a parent pin + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failure + */ +static int +ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + struct ice_dpll_pin *p = pin_priv, *parent = parent_pin_priv; + bool enable = state == DPLL_PIN_STATE_CONNECTED; + struct ice_pf *pf = p->pf; + int ret = -EINVAL; + u32 hw_idx; + + mutex_lock(&pf->dplls.lock); + hw_idx = parent->idx - pf->dplls.base_rclk_idx; + if (hw_idx >= pf->dplls.num_inputs) + goto unlock; + + if ((enable && p->state[hw_idx] == DPLL_PIN_STATE_CONNECTED) || + (!enable && p->state[hw_idx] == DPLL_PIN_STATE_DISCONNECTED)) { + NL_SET_ERR_MSG_FMT(extack, + "pin:%u state:%u on parent:%u already set", + p->idx, state, parent->idx); + goto unlock; + } + ret = ice_aq_set_phy_rec_clk_out(&pf->hw, hw_idx, enable, + &p->freq); + if (ret) + NL_SET_ERR_MSG_FMT(extack, + "err:%d %s failed to set pin state:%u for pin:%u on parent:%u\n", + ret, + ice_aq_str(pf->hw.adminq.sq_last_status), + state, p->idx, parent->idx); +unlock: + mutex_unlock(&pf->dplls.lock); + + return ret; +} + +/** + * ice_dpll_rclk_state_on_pin_get - get a state of rclk pin + * @pin: pointer to a pin + * @pin_priv: private data pointer passed on pin registration + * @parent_pin: pin parent pointer + * @parent_pin_priv: pin parent priv data pointer passed on pin registration + * @state: on success holds pin state on parent pin + * @extack: error reporting + * + * dpll subsystem callback, get a state of a recovered clock pin. + * + * Context: Acquires pf->dplls.lock + * Return: + * * 0 - success + * * negative - failure + */ +static int +ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + struct ice_dpll_pin *p = pin_priv, *parent = parent_pin_priv; + struct ice_pf *pf = p->pf; + int ret = -EINVAL; + u32 hw_idx; + + mutex_lock(&pf->dplls.lock); + hw_idx = parent->idx - pf->dplls.base_rclk_idx; + if (hw_idx >= pf->dplls.num_inputs) + goto unlock; + + ret = ice_dpll_pin_state_update(pf, p, ICE_DPLL_PIN_TYPE_RCLK_INPUT, + extack); + if (ret) + goto unlock; + + *state = p->state[hw_idx]; + ret = 0; +unlock: + mutex_unlock(&pf->dplls.lock); + + return ret; +} + +static const struct dpll_pin_ops ice_dpll_rclk_ops = { + .state_on_pin_set = ice_dpll_rclk_state_on_pin_set, + .state_on_pin_get = ice_dpll_rclk_state_on_pin_get, + .direction_get = ice_dpll_input_direction, +}; + +static const struct dpll_pin_ops ice_dpll_input_ops = { + .frequency_get = ice_dpll_input_frequency_get, + .frequency_set = ice_dpll_input_frequency_set, + .state_on_dpll_get = ice_dpll_input_state_get, + .state_on_dpll_set = ice_dpll_input_state_set, + .prio_get = ice_dpll_input_prio_get, + .prio_set = ice_dpll_input_prio_set, + .direction_get = ice_dpll_input_direction, +}; + +static const struct dpll_pin_ops ice_dpll_output_ops = { + .frequency_get = ice_dpll_output_frequency_get, + .frequency_set = ice_dpll_output_frequency_set, + .state_on_dpll_get = ice_dpll_output_state_get, + .state_on_dpll_set = ice_dpll_output_state_set, + .direction_get = ice_dpll_output_direction, +}; + +static const struct dpll_device_ops ice_dpll_ops = { + .lock_status_get = ice_dpll_lock_status_get, + .mode_supported = ice_dpll_mode_supported, + .mode_get = ice_dpll_mode_get, +}; + +/** + * ice_generate_clock_id - generates unique clock_id for registering dpll. + * @pf: board private structure + * + * Generates unique (per board) clock_id for allocation and search of dpll + * devices in Linux dpll subsystem. + * + * Return: generated clock id for the board + */ +static u64 ice_generate_clock_id(struct ice_pf *pf) +{ + return pci_get_dsn(pf->pdev); +} + +/** + * ice_dpll_notify_changes - notify dpll subsystem about changes + * @d: pointer do dpll + * + * Once change detected appropriate event is submitted to the dpll subsystem. + */ +static void ice_dpll_notify_changes(struct ice_dpll *d) +{ + if (d->prev_dpll_state != d->dpll_state) { + d->prev_dpll_state = d->dpll_state; + dpll_device_change_ntf(d->dpll); + } + if (d->prev_input != d->active_input) { + if (d->prev_input) + dpll_pin_change_ntf(d->prev_input); + d->prev_input = d->active_input; + if (d->active_input) + dpll_pin_change_ntf(d->active_input); + } +} + +/** + * ice_dpll_update_state - update dpll state + * @pf: pf private structure + * @d: pointer to queried dpll device + * @init: if function called on initialization of ice dpll + * + * Poll current state of dpll from hw and update ice_dpll struct. + * + * Context: Called by kworker under pf->dplls.lock + * Return: + * * 0 - success + * * negative - AQ failure + */ +static int +ice_dpll_update_state(struct ice_pf *pf, struct ice_dpll *d, bool init) +{ + struct ice_dpll_pin *p = NULL; + int ret; + + ret = ice_get_cgu_state(&pf->hw, d->dpll_idx, d->prev_dpll_state, + &d->input_idx, &d->ref_state, &d->eec_mode, + &d->phase_shift, &d->dpll_state); + + dev_dbg(ice_pf_to_dev(pf), + "update dpll=%d, prev_src_idx:%u, src_idx:%u, state:%d, prev:%d mode:%d\n", + d->dpll_idx, d->prev_input_idx, d->input_idx, + d->dpll_state, d->prev_dpll_state, d->mode); + if (ret) { + dev_err(ice_pf_to_dev(pf), + "update dpll=%d state failed, ret=%d %s\n", + d->dpll_idx, ret, + ice_aq_str(pf->hw.adminq.sq_last_status)); + return ret; + } + if (init) { + if (d->dpll_state == DPLL_LOCK_STATUS_LOCKED || + d->dpll_state == DPLL_LOCK_STATUS_LOCKED_HO_ACQ) + d->active_input = pf->dplls.inputs[d->input_idx].pin; + p = &pf->dplls.inputs[d->input_idx]; + return ice_dpll_pin_state_update(pf, p, + ICE_DPLL_PIN_TYPE_INPUT, NULL); + } + if (d->dpll_state == DPLL_LOCK_STATUS_HOLDOVER || + d->dpll_state == DPLL_LOCK_STATUS_UNLOCKED) { + d->active_input = NULL; + if (d->input_idx != ICE_DPLL_PIN_IDX_INVALID) + p = &pf->dplls.inputs[d->input_idx]; + d->prev_input_idx = ICE_DPLL_PIN_IDX_INVALID; + d->input_idx = ICE_DPLL_PIN_IDX_INVALID; + if (!p) + return 0; + ret = ice_dpll_pin_state_update(pf, p, + ICE_DPLL_PIN_TYPE_INPUT, NULL); + } else if (d->input_idx != d->prev_input_idx) { + if (d->prev_input_idx != ICE_DPLL_PIN_IDX_INVALID) { + p = &pf->dplls.inputs[d->prev_input_idx]; + ice_dpll_pin_state_update(pf, p, + ICE_DPLL_PIN_TYPE_INPUT, + NULL); + } + if (d->input_idx != ICE_DPLL_PIN_IDX_INVALID) { + p = &pf->dplls.inputs[d->input_idx]; + d->active_input = p->pin; + ice_dpll_pin_state_update(pf, p, + ICE_DPLL_PIN_TYPE_INPUT, + NULL); + } + d->prev_input_idx = d->input_idx; + } + + return ret; +} + +/** + * ice_dpll_periodic_work - DPLLs periodic worker + * @work: pointer to kthread_work structure + * + * DPLLs periodic worker is responsible for polling state of dpll. + * Context: Holds pf->dplls.lock + */ +static void ice_dpll_periodic_work(struct kthread_work *work) +{ + struct ice_dplls *d = container_of(work, struct ice_dplls, work.work); + struct ice_pf *pf = container_of(d, struct ice_pf, dplls); + struct ice_dpll *de = &pf->dplls.eec; + struct ice_dpll *dp = &pf->dplls.pps; + int ret; + + mutex_lock(&pf->dplls.lock); + ret = ice_dpll_update_state(pf, de, false); + if (!ret) + ret = ice_dpll_update_state(pf, dp, false); + if (ret) { + d->cgu_state_acq_err_num++; + /* stop rescheduling this worker */ + if (d->cgu_state_acq_err_num > + ICE_CGU_STATE_ACQ_ERR_THRESHOLD) { + dev_err(ice_pf_to_dev(pf), + "EEC/PPS DPLLs periodic work disabled\n"); + mutex_unlock(&pf->dplls.lock); + return; + } + } + mutex_unlock(&pf->dplls.lock); + ice_dpll_notify_changes(de); + ice_dpll_notify_changes(dp); + + /* Run twice a second or reschedule if update failed */ + kthread_queue_delayed_work(d->kworker, &d->work, + ret ? msecs_to_jiffies(10) : + msecs_to_jiffies(500)); +} + +/** + * ice_dpll_release_pins - release pins resources from dpll subsystem + * @pins: pointer to pins array + * @count: number of pins + * + * Release resources of given pins array in the dpll subsystem. + */ +static void ice_dpll_release_pins(struct ice_dpll_pin *pins, int count) +{ + int i; + + for (i = 0; i < count; i++) + dpll_pin_put(pins[i].pin); +} + +/** + * ice_dpll_get_pins - get pins from dpll subsystem + * @pf: board private structure + * @pins: pointer to pins array + * @start_idx: get starts from this pin idx value + * @count: number of pins + * @clock_id: clock_id of dpll device + * + * Get pins - allocate - in dpll subsystem, store them in pin field of given + * pins array. + * + * Return: + * * 0 - success + * * negative - allocation failure reason + */ +static int +ice_dpll_get_pins(struct ice_pf *pf, struct ice_dpll_pin *pins, + int start_idx, int count, u64 clock_id) +{ + int i, ret; + + for (i = 0; i < count; i++) { + pins[i].pin = dpll_pin_get(clock_id, i + start_idx, THIS_MODULE, + &pins[i].prop); + if (IS_ERR(pins[i].pin)) { + ret = PTR_ERR(pins[i].pin); + goto release_pins; + } + } + + return 0; + +release_pins: + while (--i >= 0) + dpll_pin_put(pins[i].pin); + return ret; +} + +/** + * ice_dpll_unregister_pins - unregister pins from a dpll + * @dpll: dpll device pointer + * @pins: pointer to pins array + * @ops: callback ops registered with the pins + * @count: number of pins + * + * Unregister pins of a given array of pins from given dpll device registered in + * dpll subsystem. + */ +static void +ice_dpll_unregister_pins(struct dpll_device *dpll, struct ice_dpll_pin *pins, + const struct dpll_pin_ops *ops, int count) +{ + int i; + + for (i = 0; i < count; i++) + dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]); +} + +/** + * ice_dpll_register_pins - register pins with a dpll + * @dpll: dpll pointer to register pins with + * @pins: pointer to pins array + * @ops: callback ops registered with the pins + * @count: number of pins + * + * Register pins of a given array with given dpll in dpll subsystem. + * + * Return: + * * 0 - success + * * negative - registration failure reason + */ +static int +ice_dpll_register_pins(struct dpll_device *dpll, struct ice_dpll_pin *pins, + const struct dpll_pin_ops *ops, int count) +{ + int ret, i; + + for (i = 0; i < count; i++) { + ret = dpll_pin_register(dpll, pins[i].pin, ops, &pins[i]); + if (ret) + goto unregister_pins; + } + + return 0; + +unregister_pins: + while (--i >= 0) + dpll_pin_unregister(dpll, pins[i].pin, ops, &pins[i]); + return ret; +} + +/** + * ice_dpll_deinit_direct_pins - deinitialize direct pins + * @cgu: if cgu is present and controlled by this NIC + * @pins: pointer to pins array + * @count: number of pins + * @ops: callback ops registered with the pins + * @first: dpll device pointer + * @second: dpll device pointer + * + * If cgu is owned unregister pins from given dplls. + * Release pins resources to the dpll subsystem. + */ +static void +ice_dpll_deinit_direct_pins(bool cgu, struct ice_dpll_pin *pins, int count, + const struct dpll_pin_ops *ops, + struct dpll_device *first, + struct dpll_device *second) +{ + if (cgu) { + ice_dpll_unregister_pins(first, pins, ops, count); + ice_dpll_unregister_pins(second, pins, ops, count); + } + ice_dpll_release_pins(pins, count); +} + +/** + * ice_dpll_init_direct_pins - initialize direct pins + * @pf: board private structure + * @cgu: if cgu is present and controlled by this NIC + * @pins: pointer to pins array + * @start_idx: on which index shall allocation start in dpll subsystem + * @count: number of pins + * @ops: callback ops registered with the pins + * @first: dpll device pointer + * @second: dpll device pointer + * + * Allocate directly connected pins of a given array in dpll subsystem. + * If cgu is owned register allocated pins with given dplls. + * + * Return: + * * 0 - success + * * negative - registration failure reason + */ +static int +ice_dpll_init_direct_pins(struct ice_pf *pf, bool cgu, + struct ice_dpll_pin *pins, int start_idx, int count, + const struct dpll_pin_ops *ops, + struct dpll_device *first, struct dpll_device *second) +{ + int ret; + + ret = ice_dpll_get_pins(pf, pins, start_idx, count, pf->dplls.clock_id); + if (ret) + return ret; + if (cgu) { + ret = ice_dpll_register_pins(first, pins, ops, count); + if (ret) + goto release_pins; + ret = ice_dpll_register_pins(second, pins, ops, count); + if (ret) + goto unregister_first; + } + + return 0; + +unregister_first: + ice_dpll_unregister_pins(first, pins, ops, count); +release_pins: + ice_dpll_release_pins(pins, count); + return ret; +} + +/** + * ice_dpll_deinit_rclk_pin - release rclk pin resources + * @pf: board private structure + * + * Deregister rclk pin from parent pins and release resources in dpll subsystem. + */ +static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf) +{ + struct ice_dpll_pin *rclk = &pf->dplls.rclk; + struct ice_vsi *vsi = ice_get_main_vsi(pf); + struct dpll_pin *parent; + int i; + + for (i = 0; i < rclk->num_parents; i++) { + parent = pf->dplls.inputs[rclk->parent_idx[i]].pin; + if (!parent) + continue; + dpll_pin_on_pin_unregister(parent, rclk->pin, + &ice_dpll_rclk_ops, rclk); + } + if (WARN_ON_ONCE(!vsi || !vsi->netdev)) + return; + netdev_dpll_pin_clear(vsi->netdev); + dpll_pin_put(rclk->pin); +} + +/** + * ice_dpll_init_rclk_pins - initialize recovered clock pin + * @pf: board private structure + * @pin: pin to register + * @start_idx: on which index shall allocation start in dpll subsystem + * @ops: callback ops registered with the pins + * + * Allocate resource for recovered clock pin in dpll subsystem. Register the + * pin with the parents it has in the info. Register pin with the pf's main vsi + * netdev. + * + * Return: + * * 0 - success + * * negative - registration failure reason + */ +static int +ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin, + int start_idx, const struct dpll_pin_ops *ops) +{ + struct ice_vsi *vsi = ice_get_main_vsi(pf); + struct dpll_pin *parent; + int ret, i; + + ret = ice_dpll_get_pins(pf, pin, start_idx, ICE_DPLL_RCLK_NUM_PER_PF, + pf->dplls.clock_id); + if (ret) + return ret; + for (i = 0; i < pf->dplls.rclk.num_parents; i++) { + parent = pf->dplls.inputs[pf->dplls.rclk.parent_idx[i]].pin; + if (!parent) { + ret = -ENODEV; + goto unregister_pins; + } + ret = dpll_pin_on_pin_register(parent, pf->dplls.rclk.pin, + ops, &pf->dplls.rclk); + if (ret) + goto unregister_pins; + } + if (WARN_ON((!vsi || !vsi->netdev))) + return -EINVAL; + netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin); + + return 0; + +unregister_pins: + while (i) { + parent = pf->dplls.inputs[pf->dplls.rclk.parent_idx[--i]].pin; + dpll_pin_on_pin_unregister(parent, pf->dplls.rclk.pin, + &ice_dpll_rclk_ops, &pf->dplls.rclk); + } + ice_dpll_release_pins(pin, ICE_DPLL_RCLK_NUM_PER_PF); + return ret; +} + +/** + * ice_dpll_deinit_pins - deinitialize direct pins + * @pf: board private structure + * @cgu: if cgu is controlled by this pf + * + * If cgu is owned unregister directly connected pins from the dplls. + * Release resources of directly connected pins from the dpll subsystem. + */ +static void ice_dpll_deinit_pins(struct ice_pf *pf, bool cgu) +{ + struct ice_dpll_pin *outputs = pf->dplls.outputs; + struct ice_dpll_pin *inputs = pf->dplls.inputs; + int num_outputs = pf->dplls.num_outputs; + int num_inputs = pf->dplls.num_inputs; + struct ice_dplls *d = &pf->dplls; + struct ice_dpll *de = &d->eec; + struct ice_dpll *dp = &d->pps; + + ice_dpll_deinit_rclk_pin(pf); + if (cgu) { + ice_dpll_unregister_pins(dp->dpll, inputs, &ice_dpll_input_ops, + num_inputs); + ice_dpll_unregister_pins(de->dpll, inputs, &ice_dpll_input_ops, + num_inputs); + } + ice_dpll_release_pins(inputs, num_inputs); + if (cgu) { + ice_dpll_unregister_pins(dp->dpll, outputs, + &ice_dpll_output_ops, num_outputs); + ice_dpll_unregister_pins(de->dpll, outputs, + &ice_dpll_output_ops, num_outputs); + ice_dpll_release_pins(outputs, num_outputs); + } +} + +/** + * ice_dpll_init_pins - init pins and register pins with a dplls + * @pf: board private structure + * @cgu: if cgu is present and controlled by this NIC + * + * Initialize directly connected pf's pins within pf's dplls in a Linux dpll + * subsystem. + * + * Return: + * * 0 - success + * * negative - initialization failure reason + */ +static int ice_dpll_init_pins(struct ice_pf *pf, bool cgu) +{ + u32 rclk_idx; + int ret; + + ret = ice_dpll_init_direct_pins(pf, cgu, pf->dplls.inputs, 0, + pf->dplls.num_inputs, + &ice_dpll_input_ops, + pf->dplls.eec.dpll, pf->dplls.pps.dpll); + if (ret) + return ret; + if (cgu) { + ret = ice_dpll_init_direct_pins(pf, cgu, pf->dplls.outputs, + pf->dplls.num_inputs, + pf->dplls.num_outputs, + &ice_dpll_output_ops, + pf->dplls.eec.dpll, + pf->dplls.pps.dpll); + if (ret) + goto deinit_inputs; + } + rclk_idx = pf->dplls.num_inputs + pf->dplls.num_outputs + pf->hw.pf_id; + ret = ice_dpll_init_rclk_pins(pf, &pf->dplls.rclk, rclk_idx, + &ice_dpll_rclk_ops); + if (ret) + goto deinit_outputs; + + return 0; +deinit_outputs: + ice_dpll_deinit_direct_pins(cgu, pf->dplls.outputs, + pf->dplls.num_outputs, + &ice_dpll_output_ops, pf->dplls.pps.dpll, + pf->dplls.eec.dpll); +deinit_inputs: + ice_dpll_deinit_direct_pins(cgu, pf->dplls.inputs, pf->dplls.num_inputs, + &ice_dpll_input_ops, pf->dplls.pps.dpll, + pf->dplls.eec.dpll); + return ret; +} + +/** + * ice_dpll_deinit_dpll - deinitialize dpll device + * @pf: board private structure + * @d: pointer to ice_dpll + * @cgu: if cgu is present and controlled by this NIC + * + * If cgu is owned unregister the dpll from dpll subsystem. + * Release resources of dpll device from dpll subsystem. + */ +static void +ice_dpll_deinit_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu) +{ + if (cgu) + dpll_device_unregister(d->dpll, &ice_dpll_ops, d); + dpll_device_put(d->dpll); +} + +/** + * ice_dpll_init_dpll - initialize dpll device in dpll subsystem + * @pf: board private structure + * @d: dpll to be initialized + * @cgu: if cgu is present and controlled by this NIC + * @type: type of dpll being initialized + * + * Allocate dpll instance for this board in dpll subsystem, if cgu is controlled + * by this NIC, register dpll with the callback ops. + * + * Return: + * * 0 - success + * * negative - initialization failure reason + */ +static int +ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu, + enum dpll_type type) +{ + u64 clock_id = pf->dplls.clock_id; + int ret; + + d->dpll = dpll_device_get(clock_id, d->dpll_idx, THIS_MODULE); + if (IS_ERR(d->dpll)) { + ret = PTR_ERR(d->dpll); + dev_err(ice_pf_to_dev(pf), + "dpll_device_get failed (%p) err=%d\n", d, ret); + return ret; + } + d->pf = pf; + if (cgu) { + ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d); + if (ret) { + dpll_device_put(d->dpll); + return ret; + } + } + + return 0; +} + +/** + * ice_dpll_deinit_worker - deinitialize dpll kworker + * @pf: board private structure + * + * Stop dpll's kworker, release it's resources. + */ +static void ice_dpll_deinit_worker(struct ice_pf *pf) +{ + struct ice_dplls *d = &pf->dplls; + + kthread_cancel_delayed_work_sync(&d->work); + kthread_destroy_worker(d->kworker); +} + +/** + * ice_dpll_init_worker - Initialize DPLLs periodic worker + * @pf: board private structure + * + * Create and start DPLLs periodic worker. + * + * Context: Shall be called after pf->dplls.lock is initialized. + * Return: + * * 0 - success + * * negative - create worker failure + */ +static int ice_dpll_init_worker(struct ice_pf *pf) +{ + struct ice_dplls *d = &pf->dplls; + struct kthread_worker *kworker; + + ice_dpll_update_state(pf, &d->eec, true); + ice_dpll_update_state(pf, &d->pps, true); + kthread_init_delayed_work(&d->work, ice_dpll_periodic_work); + kworker = kthread_create_worker(0, "ice-dplls-%s", + dev_name(ice_pf_to_dev(pf))); + if (IS_ERR(kworker)) + return PTR_ERR(kworker); + d->kworker = kworker; + d->cgu_state_acq_err_num = 0; + kthread_queue_delayed_work(d->kworker, &d->work, 0); + + return 0; +} + +/** + * ice_dpll_init_info_direct_pins - initializes direct pins info + * @pf: board private structure + * @pin_type: type of pins being initialized + * + * Init information for directly connected pins, cache them in pf's pins + * structures. + * + * Return: + * * 0 - success + * * negative - init failure reason + */ +static int +ice_dpll_init_info_direct_pins(struct ice_pf *pf, + enum ice_dpll_pin_type pin_type) +{ + struct ice_dpll *de = &pf->dplls.eec, *dp = &pf->dplls.pps; + int num_pins, i, ret = -EINVAL; + struct ice_hw *hw = &pf->hw; + struct ice_dpll_pin *pins; + u8 freq_supp_num; + bool input; + + switch (pin_type) { + case ICE_DPLL_PIN_TYPE_INPUT: + pins = pf->dplls.inputs; + num_pins = pf->dplls.num_inputs; + input = true; + break; + case ICE_DPLL_PIN_TYPE_OUTPUT: + pins = pf->dplls.outputs; + num_pins = pf->dplls.num_outputs; + input = false; + break; + default: + return -EINVAL; + } + + for (i = 0; i < num_pins; i++) { + pins[i].idx = i; + pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input); + pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input); + if (input) { + ret = ice_aq_get_cgu_ref_prio(hw, de->dpll_idx, i, + &de->input_prio[i]); + if (ret) + return ret; + ret = ice_aq_get_cgu_ref_prio(hw, dp->dpll_idx, i, + &dp->input_prio[i]); + if (ret) + return ret; + pins[i].prop.capabilities |= + DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE; + } + pins[i].prop.capabilities |= + DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL); + if (ret) + return ret; + pins[i].prop.freq_supported = + ice_cgu_get_pin_freq_supp(hw, i, input, &freq_supp_num); + pins[i].prop.freq_supported_num = freq_supp_num; + pins[i].pf = pf; + } + + return ret; +} + +/** + * ice_dpll_init_info_rclk_pin - initializes rclk pin information + * @pf: board private structure + * + * Init information for rclk pin, cache them in pf->dplls.rclk. + * + * Return: + * * 0 - success + * * negative - init failure reason + */ +static int ice_dpll_init_info_rclk_pin(struct ice_pf *pf) +{ + struct ice_dpll_pin *pin = &pf->dplls.rclk; + + pin->prop.type = DPLL_PIN_TYPE_SYNCE_ETH_PORT; + pin->prop.capabilities |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE; + pin->pf = pf; + + return ice_dpll_pin_state_update(pf, pin, + ICE_DPLL_PIN_TYPE_RCLK_INPUT, NULL); +} + +/** + * ice_dpll_init_pins_info - init pins info wrapper + * @pf: board private structure + * @pin_type: type of pins being initialized + * + * Wraps functions for pin initialization. + * + * Return: + * * 0 - success + * * negative - init failure reason + */ +static int +ice_dpll_init_pins_info(struct ice_pf *pf, enum ice_dpll_pin_type pin_type) +{ + switch (pin_type) { + case ICE_DPLL_PIN_TYPE_INPUT: + case ICE_DPLL_PIN_TYPE_OUTPUT: + return ice_dpll_init_info_direct_pins(pf, pin_type); + case ICE_DPLL_PIN_TYPE_RCLK_INPUT: + return ice_dpll_init_info_rclk_pin(pf); + default: + return -EINVAL; + } +} + +/** + * ice_dpll_deinit_info - release memory allocated for pins info + * @pf: board private structure + * + * Release memory allocated for pins by ice_dpll_init_info function. + */ +static void ice_dpll_deinit_info(struct ice_pf *pf) +{ + kfree(pf->dplls.inputs); + kfree(pf->dplls.outputs); + kfree(pf->dplls.eec.input_prio); + kfree(pf->dplls.pps.input_prio); +} + +/** + * ice_dpll_init_info - prepare pf's dpll information structure + * @pf: board private structure + * @cgu: if cgu is present and controlled by this NIC + * + * Acquire (from HW) and set basic dpll information (on pf->dplls struct). + * + * Return: + * * 0 - success + * * negative - init failure reason + */ +static int ice_dpll_init_info(struct ice_pf *pf, bool cgu) +{ + struct ice_aqc_get_cgu_abilities abilities; + struct ice_dpll *de = &pf->dplls.eec; + struct ice_dpll *dp = &pf->dplls.pps; + struct ice_dplls *d = &pf->dplls; + struct ice_hw *hw = &pf->hw; + int ret, alloc_size, i; + + d->clock_id = ice_generate_clock_id(pf); + ret = ice_aq_get_cgu_abilities(hw, &abilities); + if (ret) { + dev_err(ice_pf_to_dev(pf), + "err:%d %s failed to read cgu abilities\n", + ret, ice_aq_str(hw->adminq.sq_last_status)); + return ret; + } + + de->dpll_idx = abilities.eec_dpll_idx; + dp->dpll_idx = abilities.pps_dpll_idx; + d->num_inputs = abilities.num_inputs; + d->num_outputs = abilities.num_outputs; + d->input_phase_adj_max = le32_to_cpu(abilities.max_in_phase_adj); + d->output_phase_adj_max = le32_to_cpu(abilities.max_out_phase_adj); + + alloc_size = sizeof(*d->inputs) * d->num_inputs; + d->inputs = kzalloc(alloc_size, GFP_KERNEL); + if (!d->inputs) + return -ENOMEM; + + alloc_size = sizeof(*de->input_prio) * d->num_inputs; + de->input_prio = kzalloc(alloc_size, GFP_KERNEL); + if (!de->input_prio) + return -ENOMEM; + + dp->input_prio = kzalloc(alloc_size, GFP_KERNEL); + if (!dp->input_prio) + return -ENOMEM; + + ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_INPUT); + if (ret) + goto deinit_info; + + if (cgu) { + alloc_size = sizeof(*d->outputs) * d->num_outputs; + d->outputs = kzalloc(alloc_size, GFP_KERNEL); + if (!d->outputs) { + ret = -ENOMEM; + goto deinit_info; + } + + ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_OUTPUT); + if (ret) + goto deinit_info; + } + + ret = ice_get_cgu_rclk_pin_info(&pf->hw, &d->base_rclk_idx, + &pf->dplls.rclk.num_parents); + if (ret) + return ret; + for (i = 0; i < pf->dplls.rclk.num_parents; i++) + pf->dplls.rclk.parent_idx[i] = d->base_rclk_idx + i; + ret = ice_dpll_init_pins_info(pf, ICE_DPLL_PIN_TYPE_RCLK_INPUT); + if (ret) + return ret; + de->mode = DPLL_MODE_AUTOMATIC; + dp->mode = DPLL_MODE_AUTOMATIC; + + dev_dbg(ice_pf_to_dev(pf), + "%s - success, inputs:%u, outputs:%u rclk-parents:%u\n", + __func__, d->num_inputs, d->num_outputs, d->rclk.num_parents); + + return 0; + +deinit_info: + dev_err(ice_pf_to_dev(pf), + "%s - fail: d->inputs:%p, de->input_prio:%p, dp->input_prio:%p, d->outputs:%p\n", + __func__, d->inputs, de->input_prio, + dp->input_prio, d->outputs); + ice_dpll_deinit_info(pf); + return ret; +} + +/** + * ice_dpll_deinit - Disable the driver/HW support for dpll subsystem + * the dpll device. + * @pf: board private structure + * + * Handles the cleanup work required after dpll initialization, freeing + * resources and unregistering the dpll, pin and all resources used for + * handling them. + * + * Context: Destroys pf->dplls.lock mutex. Call only if ICE_FLAG_DPLL was set. + */ +void ice_dpll_deinit(struct ice_pf *pf) +{ + bool cgu = ice_is_feature_supported(pf, ICE_F_CGU); + + clear_bit(ICE_FLAG_DPLL, pf->flags); + if (cgu) + ice_dpll_deinit_worker(pf); + + ice_dpll_deinit_pins(pf, cgu); + ice_dpll_deinit_dpll(pf, &pf->dplls.pps, cgu); + ice_dpll_deinit_dpll(pf, &pf->dplls.eec, cgu); + ice_dpll_deinit_info(pf); + mutex_destroy(&pf->dplls.lock); +} + +/** + * ice_dpll_init - initialize support for dpll subsystem + * @pf: board private structure + * + * Set up the device dplls, register them and pins connected within Linux dpll + * subsystem. Allow userspace to obtain state of DPLL and handling of DPLL + * configuration requests. + * + * Context: Initializes pf->dplls.lock mutex. + */ +void ice_dpll_init(struct ice_pf *pf) +{ + bool cgu = ice_is_feature_supported(pf, ICE_F_CGU); + struct ice_dplls *d = &pf->dplls; + int err = 0; + + err = ice_dpll_init_info(pf, cgu); + if (err) + goto err_exit; + err = ice_dpll_init_dpll(pf, &pf->dplls.eec, cgu, DPLL_TYPE_EEC); + if (err) + goto deinit_info; + err = ice_dpll_init_dpll(pf, &pf->dplls.pps, cgu, DPLL_TYPE_PPS); + if (err) + goto deinit_eec; + err = ice_dpll_init_pins(pf, cgu); + if (err) + goto deinit_pps; + mutex_init(&d->lock); + if (cgu) { + err = ice_dpll_init_worker(pf); + if (err) + goto deinit_pins; + } + set_bit(ICE_FLAG_DPLL, pf->flags); + + return; + +deinit_pins: + ice_dpll_deinit_pins(pf, cgu); +deinit_pps: + ice_dpll_deinit_dpll(pf, &pf->dplls.pps, cgu); +deinit_eec: + ice_dpll_deinit_dpll(pf, &pf->dplls.eec, cgu); +deinit_info: + ice_dpll_deinit_info(pf); +err_exit: + mutex_destroy(&d->lock); + dev_warn(ice_pf_to_dev(pf), "DPLLs init failure err:%d\n", err); +} diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h new file mode 100644 index 000000000000..9c524c4bdfd7 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_dpll.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2022, Intel Corporation. */ + +#ifndef _ICE_DPLL_H_ +#define _ICE_DPLL_H_ + +#include "ice.h" + +#define ICE_DPLL_PRIO_MAX 0xF +#define ICE_DPLL_RCLK_NUM_MAX 4 + +/** ice_dpll_pin - store info about pins + * @pin: dpll pin structure + * @pf: pointer to pf, which has registered the dpll_pin + * @idx: ice pin private idx + * @num_parents: hols number of parent pins + * @parent_idx: hold indexes of parent pins + * @flags: pin flags returned from HW + * @state: state of a pin + * @prop: pin properties + * @freq: current frequency of a pin + */ +struct ice_dpll_pin { + struct dpll_pin *pin; + struct ice_pf *pf; + u8 idx; + u8 num_parents; + u8 parent_idx[ICE_DPLL_RCLK_NUM_MAX]; + u8 flags[ICE_DPLL_RCLK_NUM_MAX]; + u8 state[ICE_DPLL_RCLK_NUM_MAX]; + struct dpll_pin_properties prop; + u32 freq; +}; + +/** ice_dpll - store info required for DPLL control + * @dpll: pointer to dpll dev + * @pf: pointer to pf, which has registered the dpll_device + * @dpll_idx: index of dpll on the NIC + * @input_idx: currently selected input index + * @prev_input_idx: previously selected input index + * @ref_state: state of dpll reference signals + * @eec_mode: eec_mode dpll is configured for + * @phase_shift: phase shift delay of a dpll + * @input_prio: priorities of each input + * @dpll_state: current dpll sync state + * @prev_dpll_state: last dpll sync state + * @active_input: pointer to active input pin + * @prev_input: pointer to previous active input pin + */ +struct ice_dpll { + struct dpll_device *dpll; + struct ice_pf *pf; + u8 dpll_idx; + u8 input_idx; + u8 prev_input_idx; + u8 ref_state; + u8 eec_mode; + s64 phase_shift; + u8 *input_prio; + enum dpll_lock_status dpll_state; + enum dpll_lock_status prev_dpll_state; + enum dpll_mode mode; + struct dpll_pin *active_input; + struct dpll_pin *prev_input; +}; + +/** ice_dplls - store info required for CCU (clock controlling unit) + * @kworker: periodic worker + * @work: periodic work + * @lock: locks access to configuration of a dpll + * @eec: pointer to EEC dpll dev + * @pps: pointer to PPS dpll dev + * @inputs: input pins pointer + * @outputs: output pins pointer + * @rclk: recovered pins pointer + * @num_inputs: number of input pins available on dpll + * @num_outputs: number of output pins available on dpll + * @cgu_state_acq_err_num: number of errors returned during periodic work + * @base_rclk_idx: idx of first pin used for clock revocery pins + * @clock_id: clock_id of dplls + */ +struct ice_dplls { + struct kthread_worker *kworker; + struct kthread_delayed_work work; + struct mutex lock; + struct ice_dpll eec; + struct ice_dpll pps; + struct ice_dpll_pin *inputs; + struct ice_dpll_pin *outputs; + struct ice_dpll_pin rclk; + u8 num_inputs; + u8 num_outputs; + int cgu_state_acq_err_num; + u8 base_rclk_idx; + u64 clock_id; + s32 input_phase_adj_max; + s32 output_phase_adj_max; +}; + +void ice_dpll_init(struct ice_pf *pf); + +void ice_dpll_deinit(struct ice_pf *pf); + +#endif diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index 75c9de675f20..c8ea1af51ad3 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -389,6 +389,9 @@ bool ice_gnss_is_gps_present(struct ice_hw *hw) if (!hw->func_caps.ts_func_info.src_tmr_owned) return false; + if (!ice_is_gps_in_netlist(hw)) + return false; + #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) if (ice_is_e810t(hw)) { int err; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 73bbf06a76db..031abe311ea3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3990,13 +3990,21 @@ void ice_init_feature_support(struct ice_pf *pf) case ICE_DEV_ID_E810C_BACKPLANE: case ICE_DEV_ID_E810C_QSFP: case ICE_DEV_ID_E810C_SFP: + case ICE_DEV_ID_E810_XXV_BACKPLANE: + case ICE_DEV_ID_E810_XXV_QSFP: + case ICE_DEV_ID_E810_XXV_SFP: ice_set_feature_support(pf, ICE_F_DSCP); - ice_set_feature_support(pf, ICE_F_PTP_EXTTS); - if (ice_is_e810t(&pf->hw)) { + if (ice_is_phy_rclk_present(&pf->hw)) + ice_set_feature_support(pf, ICE_F_PHY_RCLK); + /* If we don't own the timer - don't enable other caps */ + if (!ice_pf_src_tmr_owned(pf)) + break; + if (ice_is_cgu_present(&pf->hw)) + ice_set_feature_support(pf, ICE_F_CGU); + if (ice_is_clock_mux_present_e810t(&pf->hw)) ice_set_feature_support(pf, ICE_F_SMA_CTRL); - if (ice_gnss_is_gps_present(&pf->hw)) - ice_set_feature_support(pf, ICE_F_GNSS); - } + if (ice_gnss_is_gps_present(&pf->hw)) + ice_set_feature_support(pf, ICE_F_GNSS); break; default: break; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 7784135160fd..4fd245f55b2d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3160,7 +3160,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) ena_mask &= ~PFINT_OICR_TSYN_EVNT_M; - if (hw->func_caps.ts_func_info.src_tmr_owned) { + if (ice_pf_src_tmr_owned(pf)) { /* Save EVENTs from GLTSYN register */ pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M | @@ -4666,6 +4666,10 @@ static void ice_init_features(struct ice_pf *pf) if (ice_is_feature_supported(pf, ICE_F_GNSS)) ice_gnss_init(pf); + if (ice_is_feature_supported(pf, ICE_F_CGU) || + ice_is_feature_supported(pf, ICE_F_PHY_RCLK)) + ice_dpll_init(pf); + /* Note: Flow director init failure is non-fatal to load */ if (ice_init_fdir(pf)) dev_err(dev, "could not initialize flow director\n"); @@ -4695,6 +4699,8 @@ static void ice_deinit_features(struct ice_pf *pf) ice_gnss_exit(pf); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_release(pf); + if (test_bit(ICE_FLAG_DPLL, pf->flags)) + ice_dpll_deinit(pf); } static void ice_init_wakeup(struct ice_pf *pf) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 81d96a40d5a7..05f922d3b316 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -436,7 +436,7 @@ static void ice_clear_ptp_clock_index(struct ice_pf *pf) int err; /* Do not clear the index if we don't own the timer */ - if (!hw->func_caps.ts_func_info.src_tmr_owned) + if (!ice_pf_src_tmr_owned(pf)) return; tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc; @@ -1366,6 +1366,7 @@ out_unlock: void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) { struct ice_ptp_port *ptp_port; + struct ice_hw *hw = &pf->hw; if (!test_bit(ICE_FLAG_PTP, pf->flags)) return; @@ -1380,11 +1381,16 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; - /* E810 devices do not need to reconfigure the PHY */ - if (ice_is_e810(&pf->hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: + /* Do not reconfigure E810 PHY */ return; - - ice_ptp_port_phy_restart(ptp_port); + case ICE_PHY_E822: + ice_ptp_port_phy_restart(ptp_port); + return; + default: + dev_warn(ice_pf_to_dev(pf), "%s: Unknown PHY type\n", __func__); + } } /** @@ -1976,21 +1982,32 @@ ice_ptp_get_syncdevicetime(ktime_t *device, u32 hh_lock, hh_art_ctl; int i; - /* Get the HW lock */ - hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); +#define MAX_HH_HW_LOCK_TRIES 5 +#define MAX_HH_CTL_LOCK_TRIES 100 + + for (i = 0; i < MAX_HH_HW_LOCK_TRIES; i++) { + /* Get the HW lock */ + hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); + if (hh_lock & PFHH_SEM_BUSY_M) { + usleep_range(10000, 15000); + continue; + } + break; + } if (hh_lock & PFHH_SEM_BUSY_M) { dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n"); - return -EFAULT; + return -EBUSY; } + /* Program cmd to master timer */ + ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME); + /* Start the ART and device clock sync sequence */ hh_art_ctl = rd32(hw, GLHH_ART_CTL); hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M; wr32(hw, GLHH_ART_CTL, hh_art_ctl); -#define MAX_HH_LOCK_TRIES 100 - - for (i = 0; i < MAX_HH_LOCK_TRIES; i++) { + for (i = 0; i < MAX_HH_CTL_LOCK_TRIES; i++) { /* Wait for sync to complete */ hh_art_ctl = rd32(hw, GLHH_ART_CTL); if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) { @@ -2014,19 +2031,23 @@ ice_ptp_get_syncdevicetime(ktime_t *device, break; } } + + /* Clear the master timer */ + ice_ptp_src_cmd(hw, ICE_PTP_NOP); + /* Release HW lock */ hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); hh_lock = hh_lock & ~PFHH_SEM_BUSY_M; wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock); - if (i == MAX_HH_LOCK_TRIES) + if (i == MAX_HH_CTL_LOCK_TRIES) return -ETIMEDOUT; return 0; } /** - * ice_ptp_getcrosststamp_e822 - Capture a device cross timestamp + * ice_ptp_getcrosststamp_e82x - Capture a device cross timestamp * @info: the driver's PTP info structure * @cts: The memory to fill the cross timestamp info * @@ -2034,14 +2055,14 @@ ice_ptp_get_syncdevicetime(ktime_t *device, * clock. Fill the cross timestamp information and report it back to the * caller. * - * This is only valid for E822 devices which have support for generating the - * cross timestamp via PCIe PTM. + * This is only valid for E822 and E823 devices which have support for + * generating the cross timestamp via PCIe PTM. * * In order to correctly correlate the ART timestamp back to the TSC time, the * CPU must have X86_FEATURE_TSC_KNOWN_FREQ. */ static int -ice_ptp_getcrosststamp_e822(struct ptp_clock_info *info, +ice_ptp_getcrosststamp_e82x(struct ptp_clock_info *info, struct system_device_crosststamp *cts) { struct ice_pf *pf = ptp_info_to_pf(info); @@ -2246,18 +2267,20 @@ ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) static void ice_ptp_setup_pins_e810(struct ice_pf *pf, struct ptp_clock_info *info) { - info->n_per_out = N_PER_OUT_E810; - - if (ice_is_feature_supported(pf, ICE_F_PTP_EXTTS)) - info->n_ext_ts = N_EXT_TS_E810; - if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) { info->n_ext_ts = N_EXT_TS_E810; + info->n_per_out = N_PER_OUT_E810T; info->n_pins = NUM_PTP_PINS_E810T; info->verify = ice_verify_pin_e810t; /* Complete setup of the SMA pins */ ice_ptp_setup_sma_pins_e810t(pf, info); + } else if (ice_is_e810t(&pf->hw)) { + info->n_ext_ts = N_EXT_TS_NO_SMA_E810T; + info->n_per_out = N_PER_OUT_NO_SMA_E810T; + } else { + info->n_per_out = N_PER_OUT_E810; + info->n_ext_ts = N_EXT_TS_E810; } } @@ -2275,22 +2298,22 @@ ice_ptp_setup_pins_e823(struct ice_pf *pf, struct ptp_clock_info *info) } /** - * ice_ptp_set_funcs_e822 - Set specialized functions for E822 support + * ice_ptp_set_funcs_e82x - Set specialized functions for E82x support * @pf: Board private structure * @info: PTP info to fill * - * Assign functions to the PTP capabiltiies structure for E822 devices. + * Assign functions to the PTP capabiltiies structure for E82x devices. * Functions which operate across all device families should be set directly - * in ice_ptp_set_caps. Only add functions here which are distinct for E822 + * in ice_ptp_set_caps. Only add functions here which are distinct for E82x * devices. */ static void -ice_ptp_set_funcs_e822(struct ice_pf *pf, struct ptp_clock_info *info) +ice_ptp_set_funcs_e82x(struct ice_pf *pf, struct ptp_clock_info *info) { #ifdef CONFIG_ICE_HWTS if (boot_cpu_has(X86_FEATURE_ART) && boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) - info->getcrosststamp = ice_ptp_getcrosststamp_e822; + info->getcrosststamp = ice_ptp_getcrosststamp_e82x; #endif /* CONFIG_ICE_HWTS */ } @@ -2324,6 +2347,8 @@ ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info) static void ice_ptp_set_funcs_e823(struct ice_pf *pf, struct ptp_clock_info *info) { + ice_ptp_set_funcs_e82x(pf, info); + info->enable = ice_ptp_gpio_enable_e823; ice_ptp_setup_pins_e823(pf, info); } @@ -2351,7 +2376,7 @@ static void ice_ptp_set_caps(struct ice_pf *pf) else if (ice_is_e823(&pf->hw)) ice_ptp_set_funcs_e823(pf, info); else - ice_ptp_set_funcs_e822(pf, info); + ice_ptp_set_funcs_e82x(pf, info); } /** @@ -2474,7 +2499,7 @@ void ice_ptp_reset(struct ice_pf *pf) if (test_bit(ICE_PFR_REQ, pf->state)) goto pfr; - if (!hw->func_caps.ts_func_info.src_tmr_owned) + if (!ice_pf_src_tmr_owned(pf)) goto reset_ts; err = ice_ptp_init_phc(hw); @@ -2685,14 +2710,22 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp) */ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) { + struct ice_hw *hw = &pf->hw; + mutex_init(&ptp_port->ps_lock); - if (ice_is_e810(&pf->hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: return ice_ptp_init_tx_e810(pf, &ptp_port->tx); + case ICE_PHY_E822: + kthread_init_delayed_work(&ptp_port->ov_work, + ice_ptp_wait_for_offsets); - kthread_init_delayed_work(&ptp_port->ov_work, - ice_ptp_wait_for_offsets); - return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num); + return ice_ptp_init_tx_e822(pf, &ptp_port->tx, + ptp_port->port_num); + default: + return -ENODEV; + } } /** @@ -2713,10 +2746,12 @@ void ice_ptp_init(struct ice_pf *pf) struct ice_hw *hw = &pf->hw; int err; + ice_ptp_init_phy_model(hw); + /* If this function owns the clock hardware, it must allocate and * configure the PTP clock device to represent it. */ - if (hw->func_caps.ts_func_info.src_tmr_owned) { + if (ice_pf_src_tmr_owned(pf)) { err = ice_ptp_init_owner(pf); if (err) goto err; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index f818dd215c05..f839f186797d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -7,6 +7,132 @@ #include "ice_ptp_consts.h" #include "ice_cgu_regs.h" +static struct dpll_pin_frequency ice_cgu_pin_freq_common[] = { + DPLL_PIN_FREQUENCY_1PPS, + DPLL_PIN_FREQUENCY_10MHZ, +}; + +static struct dpll_pin_frequency ice_cgu_pin_freq_1_hz[] = { + DPLL_PIN_FREQUENCY_1PPS, +}; + +static struct dpll_pin_frequency ice_cgu_pin_freq_10_mhz[] = { + DPLL_PIN_FREQUENCY_10MHZ, +}; + +static const struct ice_cgu_pin_desc ice_e810t_sfp_cgu_inputs[] = { + { "CVL-SDP22", ZL_REF0P, DPLL_PIN_TYPE_INT_OSCILLATOR, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "CVL-SDP20", ZL_REF0N, DPLL_PIN_TYPE_INT_OSCILLATOR, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "C827_0-RCLKA", ZL_REF1P, DPLL_PIN_TYPE_MUX, 0, }, + { "C827_0-RCLKB", ZL_REF1N, DPLL_PIN_TYPE_MUX, 0, }, + { "SMA1", ZL_REF3P, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "SMA2/U.FL2", ZL_REF3N, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "GNSS-1PPS", ZL_REF4P, DPLL_PIN_TYPE_GNSS, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, + { "OCXO", ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, 0, }, +}; + +static const struct ice_cgu_pin_desc ice_e810t_qsfp_cgu_inputs[] = { + { "CVL-SDP22", ZL_REF0P, DPLL_PIN_TYPE_INT_OSCILLATOR, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "CVL-SDP20", ZL_REF0N, DPLL_PIN_TYPE_INT_OSCILLATOR, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "C827_0-RCLKA", ZL_REF1P, DPLL_PIN_TYPE_MUX, }, + { "C827_0-RCLKB", ZL_REF1N, DPLL_PIN_TYPE_MUX, }, + { "C827_1-RCLKA", ZL_REF2P, DPLL_PIN_TYPE_MUX, }, + { "C827_1-RCLKB", ZL_REF2N, DPLL_PIN_TYPE_MUX, }, + { "SMA1", ZL_REF3P, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "SMA2/U.FL2", ZL_REF3N, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "GNSS-1PPS", ZL_REF4P, DPLL_PIN_TYPE_GNSS, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, + { "OCXO", ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, }, +}; + +static const struct ice_cgu_pin_desc ice_e810t_sfp_cgu_outputs[] = { + { "REF-SMA1", ZL_OUT0, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "REF-SMA2/U.FL2", ZL_OUT1, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "PHY-CLK", ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, }, + { "MAC-CLK", ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, }, + { "CVL-SDP21", ZL_OUT4, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, + { "CVL-SDP23", ZL_OUT5, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, +}; + +static const struct ice_cgu_pin_desc ice_e810t_qsfp_cgu_outputs[] = { + { "REF-SMA1", ZL_OUT0, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "REF-SMA2/U.FL2", ZL_OUT1, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "PHY-CLK", ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 }, + { "PHY2-CLK", ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 }, + { "MAC-CLK", ZL_OUT4, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 }, + { "CVL-SDP21", ZL_OUT5, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, + { "CVL-SDP23", ZL_OUT6, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, +}; + +static const struct ice_cgu_pin_desc ice_e823_si_cgu_inputs[] = { + { "NONE", SI_REF0P, 0, 0 }, + { "NONE", SI_REF0N, 0, 0 }, + { "SYNCE0_DP", SI_REF1P, DPLL_PIN_TYPE_MUX, 0 }, + { "SYNCE0_DN", SI_REF1N, DPLL_PIN_TYPE_MUX, 0 }, + { "EXT_CLK_SYNC", SI_REF2P, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "NONE", SI_REF2N, 0, 0 }, + { "EXT_PPS_OUT", SI_REF3, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "INT_PPS_OUT", SI_REF4, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, +}; + +static const struct ice_cgu_pin_desc ice_e823_si_cgu_outputs[] = { + { "1588-TIME_SYNC", SI_OUT0, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "PHY-CLK", SI_OUT1, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 }, + { "10MHZ-SMA2", SI_OUT2, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_10_mhz), ice_cgu_pin_freq_10_mhz }, + { "PPS-SMA1", SI_OUT3, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, +}; + +static const struct ice_cgu_pin_desc ice_e823_zl_cgu_inputs[] = { + { "NONE", ZL_REF0P, 0, 0 }, + { "INT_PPS_OUT", ZL_REF0N, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, + { "SYNCE0_DP", ZL_REF1P, DPLL_PIN_TYPE_MUX, 0 }, + { "SYNCE0_DN", ZL_REF1N, DPLL_PIN_TYPE_MUX, 0 }, + { "NONE", ZL_REF2P, 0, 0 }, + { "NONE", ZL_REF2N, 0, 0 }, + { "EXT_CLK_SYNC", ZL_REF3P, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "NONE", ZL_REF3N, 0, 0 }, + { "EXT_PPS_OUT", ZL_REF4P, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, + { "OCXO", ZL_REF4N, DPLL_PIN_TYPE_INT_OSCILLATOR, 0 }, +}; + +static const struct ice_cgu_pin_desc ice_e823_zl_cgu_outputs[] = { + { "PPS-SMA1", ZL_OUT0, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_1_hz), ice_cgu_pin_freq_1_hz }, + { "10MHZ-SMA2", ZL_OUT1, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_10_mhz), ice_cgu_pin_freq_10_mhz }, + { "PHY-CLK", ZL_OUT2, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 }, + { "1588-TIME_REF", ZL_OUT3, DPLL_PIN_TYPE_SYNCE_ETH_PORT, 0 }, + { "CPK-TIME_SYNC", ZL_OUT4, DPLL_PIN_TYPE_EXT, + ARRAY_SIZE(ice_cgu_pin_freq_common), ice_cgu_pin_freq_common }, + { "NONE", ZL_OUT5, 0, 0 }, +}; + /* Low level functions for interacting with and managing the device clock used * for the Precision Time Protocol. * @@ -107,7 +233,7 @@ static u64 ice_ptp_read_src_incval(struct ice_hw *hw) * * Prepare the source timer for an upcoming timer sync command. */ -static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) +void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) { u32 cmd_val; u8 tmr_idx; @@ -116,19 +242,19 @@ static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) cmd_val = tmr_idx << SEL_CPK_SRC; switch (cmd) { - case INIT_TIME: + case ICE_PTP_INIT_TIME: cmd_val |= GLTSYN_CMD_INIT_TIME; break; - case INIT_INCVAL: + case ICE_PTP_INIT_INCVAL: cmd_val |= GLTSYN_CMD_INIT_INCVAL; break; - case ADJ_TIME: + case ICE_PTP_ADJ_TIME: cmd_val |= GLTSYN_CMD_ADJ_TIME; break; - case ADJ_TIME_AT_TIME: + case ICE_PTP_ADJ_TIME_AT_TIME: cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME; break; - case READ_TIME: + case ICE_PTP_READ_TIME: cmd_val |= GLTSYN_CMD_READ_TIME; break; case ICE_PTP_NOP: @@ -168,9 +294,9 @@ ice_fill_phy_msg_e822(struct ice_sbq_msg_input *msg, u8 port, u16 offset) { int phy_port, phy, quadtype; - phy_port = port % ICE_PORTS_PER_PHY; - phy = port / ICE_PORTS_PER_PHY; - quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_NUM_QUAD_TYPE; + phy_port = port % ICE_PORTS_PER_PHY_E822; + phy = port / ICE_PORTS_PER_PHY_E822; + quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_QUADS_PER_PHY_E822; if (quadtype == 0) { msg->msg_addr_low = P_Q0_L(P_0_BASE + offset, phy_port); @@ -495,20 +621,25 @@ ice_write_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val) * Fill a message buffer for accessing a register in a quad shared between * multiple PHYs. */ -static void +static int ice_fill_quad_msg_e822(struct ice_sbq_msg_input *msg, u8 quad, u16 offset) { u32 addr; + if (quad >= ICE_MAX_QUAD) + return -EINVAL; + msg->dest_dev = rmn_0; - if ((quad % ICE_NUM_QUAD_TYPE) == 0) + if ((quad % ICE_QUADS_PER_PHY_E822) == 0) addr = Q_0_BASE + offset; else addr = Q_1_BASE + offset; msg->msg_addr_low = lower_16_bits(addr); msg->msg_addr_high = upper_16_bits(addr); + + return 0; } /** @@ -527,10 +658,10 @@ ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val) struct ice_sbq_msg_input msg = {0}; int err; - if (quad >= ICE_MAX_QUAD) - return -EINVAL; + err = ice_fill_quad_msg_e822(&msg, quad, offset); + if (err) + return err; - ice_fill_quad_msg_e822(&msg, quad, offset); msg.opcode = ice_sbq_msg_rd; err = ice_sbq_rw_reg(hw, &msg); @@ -561,10 +692,10 @@ ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val) struct ice_sbq_msg_input msg = {0}; int err; - if (quad >= ICE_MAX_QUAD) - return -EINVAL; + err = ice_fill_quad_msg_e822(&msg, quad, offset); + if (err) + return err; - ice_fill_quad_msg_e822(&msg, quad, offset); msg.opcode = ice_sbq_msg_wr; msg.data = val; @@ -628,29 +759,32 @@ ice_read_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp) * @quad: the quad to read from * @idx: the timestamp index to reset * - * Clear a timestamp, resetting its valid bit, from the PHY quad block that is - * shared between the internal PHYs on the E822 devices. + * Read the timestamp out of the quad to clear its timestamp status bit from + * the PHY quad block that is shared between the internal PHYs of the E822 + * devices. + * + * Note that unlike E810, software cannot directly write to the quad memory + * bank registers. E822 relies on the ice_get_phy_tx_tstamp_ready() function + * to determine which timestamps are valid. Reading a timestamp auto-clears + * the valid bit. + * + * To directly clear the contents of the timestamp block entirely, discarding + * all timestamp data at once, software should instead use + * ice_ptp_reset_ts_memory_quad_e822(). + * + * This function should only be called on an idx whose bit is set according to + * ice_get_phy_tx_tstamp_ready(). */ static int ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx) { - u16 lo_addr, hi_addr; + u64 unused_tstamp; int err; - lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx); - hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx); - - err = ice_write_quad_reg_e822(hw, quad, lo_addr, 0); + err = ice_read_phy_tstamp_e822(hw, quad, idx, &unused_tstamp); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n", - err); - return err; - } - - err = ice_write_quad_reg_e822(hw, quad, hi_addr, 0); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n", - err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read the timestamp register for quad %u, idx %u, err %d\n", + quad, idx, err); return err; } @@ -1025,7 +1159,7 @@ static int ice_ptp_init_phc_e822(struct ice_hw *hw) * @time: Time to initialize the PHY port clocks to * * Program the PHY port registers with a new initial time value. The port - * clock will be initialized once the driver issues an INIT_TIME sync + * clock will be initialized once the driver issues an ICE_PTP_INIT_TIME sync * command. The time value is the upper 32 bits of the PHY timer, usually in * units of nominal nanoseconds. */ @@ -1074,7 +1208,7 @@ exit_err: * * Program the port for an atomic adjustment by writing the Tx and Rx timer * registers. The atomic adjustment won't be completed until the driver issues - * an ADJ_TIME command. + * an ICE_PTP_ADJ_TIME command. * * Note that time is not in units of nanoseconds. It is in clock time * including the lower sub-nanosecond portion of the port timer. @@ -1127,7 +1261,7 @@ exit_err: * * Prepare the PHY ports for an atomic time adjustment by programming the PHY * Tx and Rx port registers. The actual adjustment is completed by issuing an - * ADJ_TIME or ADJ_TIME_AT_TIME sync command. + * ICE_PTP_ADJ_TIME or ICE_PTP_ADJ_TIME_AT_TIME sync command. */ static int ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj) @@ -1162,7 +1296,7 @@ ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj) * * Prepare each of the PHY ports for a new increment value by programming the * port's TIMETUS registers. The new increment value will be updated after - * issuing an INIT_INCVAL command. + * issuing an ICE_PTP_INIT_INCVAL command. */ static int ice_ptp_prep_phy_incval_e822(struct ice_hw *hw, u64 incval) @@ -1248,19 +1382,19 @@ ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd tmr_idx = ice_get_ptp_src_clock_index(hw); cmd_val = tmr_idx << SEL_PHY_SRC; switch (cmd) { - case INIT_TIME: + case ICE_PTP_INIT_TIME: cmd_val |= PHY_CMD_INIT_TIME; break; - case INIT_INCVAL: + case ICE_PTP_INIT_INCVAL: cmd_val |= PHY_CMD_INIT_INCVAL; break; - case ADJ_TIME: + case ICE_PTP_ADJ_TIME: cmd_val |= PHY_CMD_ADJ_TIME; break; - case READ_TIME: + case ICE_PTP_READ_TIME: cmd_val |= PHY_CMD_READ_TIME; break; - case ADJ_TIME_AT_TIME: + case ICE_PTP_ADJ_TIME_AT_TIME: cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME; break; case ICE_PTP_NOP: @@ -2196,8 +2330,8 @@ int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) * @phy_time: on return, the 64bit PHY timer value * @phc_time: on return, the lower 64bits of PHC time * - * Issue a READ_TIME timer command to simultaneously capture the PHY and PHC - * timer values. + * Issue a ICE_PTP_READ_TIME timer command to simultaneously capture the PHY + * and PHC timer values. */ static int ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time, @@ -2210,15 +2344,15 @@ ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time, tmr_idx = ice_get_ptp_src_clock_index(hw); - /* Prepare the PHC timer for a READ_TIME capture command */ - ice_ptp_src_cmd(hw, READ_TIME); + /* Prepare the PHC timer for a ICE_PTP_READ_TIME capture command */ + ice_ptp_src_cmd(hw, ICE_PTP_READ_TIME); - /* Prepare the PHY timer for a READ_TIME capture command */ - err = ice_ptp_one_port_cmd(hw, port, READ_TIME); + /* Prepare the PHY timer for a ICE_PTP_READ_TIME capture command */ + err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_READ_TIME); if (err) return err; - /* Issue the sync to start the READ_TIME capture */ + /* Issue the sync to start the ICE_PTP_READ_TIME capture */ ice_ptp_exec_tmr_cmd(hw); /* Read the captured PHC time from the shadow time registers */ @@ -2252,10 +2386,11 @@ ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time, * @port: the PHY port to synchronize * * Perform an adjustment to ensure that the PHY and PHC timers are in sync. - * This is done by issuing a READ_TIME command which triggers a simultaneous - * read of the PHY timer and PHC timer. Then we use the difference to - * calculate an appropriate 2s complement addition to add to the PHY timer in - * order to ensure it reads the same value as the primary PHC timer. + * This is done by issuing a ICE_PTP_READ_TIME command which triggers a + * simultaneous read of the PHY timer and PHC timer. Then we use the + * difference to calculate an appropriate 2s complement addition to add + * to the PHY timer in order to ensure it reads the same value as the + * primary PHC timer. */ static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port) { @@ -2285,7 +2420,7 @@ static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port) if (err) goto err_unlock; - err = ice_ptp_one_port_cmd(hw, port, ADJ_TIME); + err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_ADJ_TIME); if (err) goto err_unlock; @@ -2408,7 +2543,7 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) if (err) return err; - err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL); + err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL); if (err) return err; @@ -2436,7 +2571,7 @@ int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) if (err) return err; - err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL); + err = ice_ptp_one_port_cmd(hw, port, ICE_PTP_INIT_INCVAL); if (err) return err; @@ -2685,28 +2820,39 @@ ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) * @lport: the lport to read from * @idx: the timestamp index to reset * - * Clear a timestamp, resetting its valid bit, from the timestamp block of the - * external PHY on the E810 device. + * Read the timestamp and then forcibly overwrite its value to clear the valid + * bit from the timestamp block of the external PHY on the E810 device. + * + * This function should only be called on an idx whose bit is set according to + * ice_get_phy_tx_tstamp_ready(). */ static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx) { u32 lo_addr, hi_addr; + u64 unused_tstamp; int err; + err = ice_read_phy_tstamp_e810(hw, lport, idx, &unused_tstamp); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read the timestamp register for lport %u, idx %u, err %d\n", + lport, idx, err); + return err; + } + lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx); hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx); err = ice_write_phy_reg_e810(hw, lo_addr, 0); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n", - err); + ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register for lport %u, idx %u, err %d\n", + lport, idx, err); return err; } err = ice_write_phy_reg_e810(hw, hi_addr, 0); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n", - err); + ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register for lport %u, idx %u, err %d\n", + lport, idx, err); return err; } @@ -2757,7 +2903,7 @@ static int ice_ptp_init_phc_e810(struct ice_hw *hw) * * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the * initial clock time. The time will not actually be programmed until the - * driver issues an INIT_TIME command. + * driver issues an ICE_PTP_INIT_TIME command. * * The time value is the upper 32 bits of the PHY timer, usually in units of * nominal nanoseconds. @@ -2792,7 +2938,7 @@ static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time) * * Prepare the PHY port for an atomic adjustment by programming the PHY * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual adjustment - * is completed by issuing an ADJ_TIME sync command. + * is completed by issuing an ICE_PTP_ADJ_TIME sync command. * * The adjustment value only contains the portion used for the upper 32bits of * the PHY timer, usually in units of nominal nanoseconds. Negative @@ -2832,7 +2978,7 @@ static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj) * * Prepare the PHY port for a new increment value by programming the PHY * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual change is - * completed by issuing an INIT_INCVAL command. + * completed by issuing an ICE_PTP_INIT_INCVAL command. */ static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval) { @@ -2875,19 +3021,19 @@ static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) int err; switch (cmd) { - case INIT_TIME: + case ICE_PTP_INIT_TIME: cmd_val = GLTSYN_CMD_INIT_TIME; break; - case INIT_INCVAL: + case ICE_PTP_INIT_INCVAL: cmd_val = GLTSYN_CMD_INIT_INCVAL; break; - case ADJ_TIME: + case ICE_PTP_ADJ_TIME: cmd_val = GLTSYN_CMD_ADJ_TIME; break; - case READ_TIME: + case ICE_PTP_READ_TIME: cmd_val = GLTSYN_CMD_READ_TIME; break; - case ADJ_TIME_AT_TIME: + case ICE_PTP_ADJ_TIME_AT_TIME: cmd_val = GLTSYN_CMD_ADJ_INIT_TIME; break; case ICE_PTP_NOP: @@ -3150,6 +3296,21 @@ void ice_ptp_unlock(struct ice_hw *hw) } /** + * ice_ptp_init_phy_model - Initialize hw->phy_model based on device type + * @hw: pointer to the HW structure + * + * Determine the PHY model for the device, and initialize hw->phy_model + * for use by other functions. + */ +void ice_ptp_init_phy_model(struct ice_hw *hw) +{ + if (ice_is_e810(hw)) + hw->phy_model = ICE_PHY_E810; + else + hw->phy_model = ICE_PHY_E822; +} + +/** * ice_ptp_tmr_cmd - Prepare and trigger a timer sync command * @hw: pointer to HW struct * @cmd: the command to issue @@ -3167,10 +3328,17 @@ static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd) ice_ptp_src_cmd(hw, cmd); /* Next, prepare the ports */ - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: err = ice_ptp_port_cmd_e810(hw, cmd); - else + break; + case ICE_PHY_E822: err = ice_ptp_port_cmd_e822(hw, cmd); + break; + default: + err = -EOPNOTSUPP; + } + if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, err %d\n", cmd, err); @@ -3212,14 +3380,21 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time) /* PHY timers */ /* Fill Rx and Tx ports and send msg to PHY */ - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF); - else + break; + case ICE_PHY_E822: err = ice_ptp_prep_phy_time_e822(hw, time & 0xFFFFFFFF); + break; + default: + err = -EOPNOTSUPP; + } + if (err) return err; - return ice_ptp_tmr_cmd(hw, INIT_TIME); + return ice_ptp_tmr_cmd(hw, ICE_PTP_INIT_TIME); } /** @@ -3232,8 +3407,8 @@ int ice_ptp_init_time(struct ice_hw *hw, u64 time) * * 1) Write the increment value to the source timer shadow registers * 2) Write the increment value to the PHY timer shadow registers - * 3) Issue an INIT_INCVAL timer command to synchronously switch both the - * source and port timers to the new increment value at the next clock + * 3) Issue an ICE_PTP_INIT_INCVAL timer command to synchronously switch both + * the source and port timers to the new increment value at the next clock * cycle. */ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) @@ -3247,14 +3422,21 @@ int ice_ptp_write_incval(struct ice_hw *hw, u64 incval) wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval)); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval)); - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: err = ice_ptp_prep_phy_incval_e810(hw, incval); - else + break; + case ICE_PHY_E822: err = ice_ptp_prep_phy_incval_e822(hw, incval); + break; + default: + err = -EOPNOTSUPP; + } + if (err) return err; - return ice_ptp_tmr_cmd(hw, INIT_INCVAL); + return ice_ptp_tmr_cmd(hw, ICE_PTP_INIT_INCVAL); } /** @@ -3288,8 +3470,8 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval) * * 1) Write the adjustment to the source timer shadow registers * 2) Write the adjustment to the PHY timer shadow registers - * 3) Issue an ADJ_TIME timer command to synchronously apply the adjustment to - * both the source and port timers at the next clock cycle. + * 3) Issue an ICE_PTP_ADJ_TIME timer command to synchronously apply the + * adjustment to both the source and port timers at the next clock cycle. */ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) { @@ -3299,21 +3481,28 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* Write the desired clock adjustment into the GLTSYN_SHADJ register. - * For an ADJ_TIME command, this set of registers represents the value - * to add to the clock time. It supports subtraction by interpreting - * the value as a 2's complement integer. + * For an ICE_PTP_ADJ_TIME command, this set of registers represents + * the value to add to the clock time. It supports subtraction by + * interpreting the value as a 2's complement integer. */ wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0); wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj); - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: err = ice_ptp_prep_phy_adj_e810(hw, adj); - else + break; + case ICE_PHY_E822: err = ice_ptp_prep_phy_adj_e822(hw, adj); + break; + default: + err = -EOPNOTSUPP; + } + if (err) return err; - return ice_ptp_tmr_cmd(hw, ADJ_TIME); + return ice_ptp_tmr_cmd(hw, ICE_PTP_ADJ_TIME); } /** @@ -3329,10 +3518,14 @@ int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj) */ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) { - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: return ice_read_phy_tstamp_e810(hw, block, idx, tstamp); - else + case ICE_PHY_E822: return ice_read_phy_tstamp_e822(hw, block, idx, tstamp); + default: + return -EOPNOTSUPP; + } } /** @@ -3341,16 +3534,110 @@ int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp) * @block: the block to read from * @idx: the timestamp index to reset * - * Clear a timestamp, resetting its valid bit, from the timestamp block. For - * E822 devices, the block is the quad to clear from. For E810 devices, the - * block is the logical port to clear from. + * Clear a timestamp from the timestamp block, discarding its value without + * returning it. This resets the memory status bit for the timestamp index + * allowing it to be reused for another timestamp in the future. + * + * For E822 devices, the block number is the PHY quad to clear from. For E810 + * devices, the block number is the logical port to clear from. + * + * This function must only be called on a timestamp index whose valid bit is + * set according to ice_get_phy_tx_tstamp_ready(). */ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) { - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: return ice_clear_phy_tstamp_e810(hw, block, idx); - else + case ICE_PHY_E822: return ice_clear_phy_tstamp_e822(hw, block, idx); + default: + return -EOPNOTSUPP; + } +} + +/** + * ice_is_phy_rclk_present - check recovered clk presence + * @hw: pointer to the hw struct + * + * Check if the PHY Recovered Clock device is present in the netlist + * Return: + * * true - device found in netlist + * * false - device not found + */ +bool ice_is_phy_rclk_present(struct ice_hw *hw) +{ + if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + ICE_AQC_GET_LINK_TOPO_NODE_NR_C827, NULL) && + ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + ICE_AQC_GET_LINK_TOPO_NODE_NR_E822_PHY, NULL)) + return false; + + return true; +} + +/** + * ice_is_clock_mux_present_e810t + * @hw: pointer to the hw struct + * + * Check if the Clock Multiplexer device is present in the netlist + * Return: + * * true - device found in netlist + * * false - device not found + */ +bool ice_is_clock_mux_present_e810t(struct ice_hw *hw) +{ + if (ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_MUX, + ICE_AQC_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX, + NULL)) + return false; + + return true; +} + +/** + * ice_get_pf_c827_idx - find and return the C827 index for the current pf + * @hw: pointer to the hw struct + * @idx: index of the found C827 PHY + * Return: + * * 0 - success + * * negative - failure + */ +int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx) +{ + struct ice_aqc_get_link_topo cmd; + u8 node_part_number; + u16 node_handle; + int status; + u8 ctx; + + if (hw->mac_type != ICE_MAC_E810) + return -ENODEV; + + if (hw->device_id != ICE_DEV_ID_E810C_QSFP) { + *idx = C827_0; + return 0; + } + + memset(&cmd, 0, sizeof(cmd)); + + ctx = ICE_AQC_LINK_TOPO_NODE_TYPE_PHY << ICE_AQC_LINK_TOPO_NODE_TYPE_S; + ctx |= ICE_AQC_LINK_TOPO_NODE_CTX_PORT << ICE_AQC_LINK_TOPO_NODE_CTX_S; + cmd.addr.topo_params.node_type_ctx = ctx; + + status = ice_aq_get_netlist_node(hw, &cmd, &node_part_number, + &node_handle); + if (status || node_part_number != ICE_AQC_GET_LINK_TOPO_NODE_NR_C827) + return -ENOENT; + + if (node_handle == E810C_QSFP_C827_0_HANDLE) + *idx = C827_0; + else if (node_handle == E810C_QSFP_C827_1_HANDLE) + *idx = C827_1; + else + return -EIO; + + return 0; } /** @@ -3359,10 +3646,14 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) */ void ice_ptp_reset_ts_memory(struct ice_hw *hw) { - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E822: + ice_ptp_reset_ts_memory_e822(hw); + break; + case ICE_PHY_E810: + default: return; - - ice_ptp_reset_ts_memory_e822(hw); + } } /** @@ -3381,10 +3672,14 @@ int ice_ptp_init_phc(struct ice_hw *hw) /* Clear event err indications for auxiliary pins */ (void)rd32(hw, GLTSYN_STAT(src_idx)); - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: return ice_ptp_init_phc_e810(hw); - else + case ICE_PHY_E822: return ice_ptp_init_phc_e822(hw); + default: + return -EOPNOTSUPP; + } } /** @@ -3400,10 +3695,335 @@ int ice_ptp_init_phc(struct ice_hw *hw) */ int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) { - if (ice_is_e810(hw)) + switch (hw->phy_model) { + case ICE_PHY_E810: return ice_get_phy_tx_tstamp_ready_e810(hw, block, tstamp_ready); - else + case ICE_PHY_E822: return ice_get_phy_tx_tstamp_ready_e822(hw, block, tstamp_ready); + break; + default: + return -EOPNOTSUPP; + } +} + +/** + * ice_is_cgu_present - check for CGU presence + * @hw: pointer to the hw struct + * + * Check if the Clock Generation Unit (CGU) device is present in the netlist + * Return: + * * true - cgu is present + * * false - cgu is not present + */ +bool ice_is_cgu_present(struct ice_hw *hw) +{ + if (!ice_find_netlist_node(hw, ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032, + NULL)) { + hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032; + return true; + } else if (!ice_find_netlist_node(hw, + ICE_AQC_LINK_TOPO_NODE_TYPE_CLK_CTRL, + ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384, + NULL)) { + hw->cgu_part_number = ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384; + return true; + } + + return false; +} + +/** + * ice_cgu_get_pin_desc_e823 - get pin description array + * @hw: pointer to the hw struct + * @input: if request is done against input or output pin + * @size: number of inputs/outputs + * + * Return: pointer to pin description array associated to given hw. + */ +static const struct ice_cgu_pin_desc * +ice_cgu_get_pin_desc_e823(struct ice_hw *hw, bool input, int *size) +{ + static const struct ice_cgu_pin_desc *t; + + if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032) { + if (input) { + t = ice_e823_zl_cgu_inputs; + *size = ARRAY_SIZE(ice_e823_zl_cgu_inputs); + } else { + t = ice_e823_zl_cgu_outputs; + *size = ARRAY_SIZE(ice_e823_zl_cgu_outputs); + } + } else if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384) { + if (input) { + t = ice_e823_si_cgu_inputs; + *size = ARRAY_SIZE(ice_e823_si_cgu_inputs); + } else { + t = ice_e823_si_cgu_outputs; + *size = ARRAY_SIZE(ice_e823_si_cgu_outputs); + } + } else { + t = NULL; + *size = 0; + } + + return t; +} + +/** + * ice_cgu_get_pin_desc - get pin description array + * @hw: pointer to the hw struct + * @input: if request is done against input or output pins + * @size: size of array returned by function + * + * Return: pointer to pin description array associated to given hw. + */ +static const struct ice_cgu_pin_desc * +ice_cgu_get_pin_desc(struct ice_hw *hw, bool input, int *size) +{ + const struct ice_cgu_pin_desc *t = NULL; + + switch (hw->device_id) { + case ICE_DEV_ID_E810C_SFP: + if (input) { + t = ice_e810t_sfp_cgu_inputs; + *size = ARRAY_SIZE(ice_e810t_sfp_cgu_inputs); + } else { + t = ice_e810t_sfp_cgu_outputs; + *size = ARRAY_SIZE(ice_e810t_sfp_cgu_outputs); + } + break; + case ICE_DEV_ID_E810C_QSFP: + if (input) { + t = ice_e810t_qsfp_cgu_inputs; + *size = ARRAY_SIZE(ice_e810t_qsfp_cgu_inputs); + } else { + t = ice_e810t_qsfp_cgu_outputs; + *size = ARRAY_SIZE(ice_e810t_qsfp_cgu_outputs); + } + break; + case ICE_DEV_ID_E823L_10G_BASE_T: + case ICE_DEV_ID_E823L_1GBE: + case ICE_DEV_ID_E823L_BACKPLANE: + case ICE_DEV_ID_E823L_QSFP: + case ICE_DEV_ID_E823L_SFP: + case ICE_DEV_ID_E823C_10G_BASE_T: + case ICE_DEV_ID_E823C_BACKPLANE: + case ICE_DEV_ID_E823C_QSFP: + case ICE_DEV_ID_E823C_SFP: + case ICE_DEV_ID_E823C_SGMII: + t = ice_cgu_get_pin_desc_e823(hw, input, size); + break; + default: + break; + } + + return t; +} + +/** + * ice_cgu_get_pin_type - get pin's type + * @hw: pointer to the hw struct + * @pin: pin index + * @input: if request is done against input or output pin + * + * Return: type of a pin. + */ +enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input) +{ + const struct ice_cgu_pin_desc *t; + int t_size; + + t = ice_cgu_get_pin_desc(hw, input, &t_size); + + if (!t) + return 0; + + if (pin >= t_size) + return 0; + + return t[pin].type; +} + +/** + * ice_cgu_get_pin_freq_supp - get pin's supported frequency + * @hw: pointer to the hw struct + * @pin: pin index + * @input: if request is done against input or output pin + * @num: output number of supported frequencies + * + * Get frequency supported number and array of supported frequencies. + * + * Return: array of supported frequencies for given pin. + */ +struct dpll_pin_frequency * +ice_cgu_get_pin_freq_supp(struct ice_hw *hw, u8 pin, bool input, u8 *num) +{ + const struct ice_cgu_pin_desc *t; + int t_size; + + *num = 0; + t = ice_cgu_get_pin_desc(hw, input, &t_size); + if (!t) + return NULL; + if (pin >= t_size) + return NULL; + *num = t[pin].freq_supp_num; + + return t[pin].freq_supp; +} + +/** + * ice_cgu_get_pin_name - get pin's name + * @hw: pointer to the hw struct + * @pin: pin index + * @input: if request is done against input or output pin + * + * Return: + * * null terminated char array with name + * * NULL in case of failure + */ +const char *ice_cgu_get_pin_name(struct ice_hw *hw, u8 pin, bool input) +{ + const struct ice_cgu_pin_desc *t; + int t_size; + + t = ice_cgu_get_pin_desc(hw, input, &t_size); + + if (!t) + return NULL; + + if (pin >= t_size) + return NULL; + + return t[pin].name; +} + +/** + * ice_get_cgu_state - get the state of the DPLL + * @hw: pointer to the hw struct + * @dpll_idx: Index of internal DPLL unit + * @last_dpll_state: last known state of DPLL + * @pin: pointer to a buffer for returning currently active pin + * @ref_state: reference clock state + * @eec_mode: eec mode of the DPLL + * @phase_offset: pointer to a buffer for returning phase offset + * @dpll_state: state of the DPLL (output) + * + * This function will read the state of the DPLL(dpll_idx). Non-null + * 'pin', 'ref_state', 'eec_mode' and 'phase_offset' parameters are used to + * retrieve currently active pin, state, mode and phase_offset respectively. + * + * Return: state of the DPLL + */ +int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx, + enum dpll_lock_status last_dpll_state, u8 *pin, + u8 *ref_state, u8 *eec_mode, s64 *phase_offset, + enum dpll_lock_status *dpll_state) +{ + u8 hw_ref_state, hw_dpll_state, hw_eec_mode, hw_config; + s64 hw_phase_offset; + int status; + + status = ice_aq_get_cgu_dpll_status(hw, dpll_idx, &hw_ref_state, + &hw_dpll_state, &hw_config, + &hw_phase_offset, &hw_eec_mode); + if (status) + return status; + + if (pin) + /* current ref pin in dpll_state_refsel_status_X register */ + *pin = hw_config & ICE_AQC_GET_CGU_DPLL_CONFIG_CLK_REF_SEL; + if (phase_offset) + *phase_offset = hw_phase_offset; + if (ref_state) + *ref_state = hw_ref_state; + if (eec_mode) + *eec_mode = hw_eec_mode; + if (!dpll_state) + return 0; + + /* According to ZL DPLL documentation, once state reach LOCKED_HO_ACQ + * it would never return to FREERUN. This aligns to ITU-T G.781 + * Recommendation. We cannot report HOLDOVER as HO memory is cleared + * while switching to another reference. + * Only for situations where previous state was either: "LOCKED without + * HO_ACQ" or "HOLDOVER" we actually back to FREERUN. + */ + if (hw_dpll_state & ICE_AQC_GET_CGU_DPLL_STATUS_STATE_LOCK) { + if (hw_dpll_state & ICE_AQC_GET_CGU_DPLL_STATUS_STATE_HO_READY) + *dpll_state = DPLL_LOCK_STATUS_LOCKED_HO_ACQ; + else + *dpll_state = DPLL_LOCK_STATUS_LOCKED; + } else if (last_dpll_state == DPLL_LOCK_STATUS_LOCKED_HO_ACQ || + last_dpll_state == DPLL_LOCK_STATUS_HOLDOVER) { + *dpll_state = DPLL_LOCK_STATUS_HOLDOVER; + } else { + *dpll_state = DPLL_LOCK_STATUS_UNLOCKED; + } + + return 0; +} + +/** + * ice_get_cgu_rclk_pin_info - get info on available recovered clock pins + * @hw: pointer to the hw struct + * @base_idx: returns index of first recovered clock pin on device + * @pin_num: returns number of recovered clock pins available on device + * + * Based on hw provide caller info about recovery clock pins available on the + * board. + * + * Return: + * * 0 - success, information is valid + * * negative - failure, information is not valid + */ +int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num) +{ + u8 phy_idx; + int ret; + + switch (hw->device_id) { + case ICE_DEV_ID_E810C_SFP: + case ICE_DEV_ID_E810C_QSFP: + + ret = ice_get_pf_c827_idx(hw, &phy_idx); + if (ret) + return ret; + *base_idx = E810T_CGU_INPUT_C827(phy_idx, ICE_RCLKA_PIN); + *pin_num = ICE_E810_RCLK_PINS_NUM; + ret = 0; + break; + case ICE_DEV_ID_E823L_10G_BASE_T: + case ICE_DEV_ID_E823L_1GBE: + case ICE_DEV_ID_E823L_BACKPLANE: + case ICE_DEV_ID_E823L_QSFP: + case ICE_DEV_ID_E823L_SFP: + case ICE_DEV_ID_E823C_10G_BASE_T: + case ICE_DEV_ID_E823C_BACKPLANE: + case ICE_DEV_ID_E823C_QSFP: + case ICE_DEV_ID_E823C_SFP: + case ICE_DEV_ID_E823C_SGMII: + *pin_num = ICE_E822_RCLK_PINS_NUM; + ret = 0; + if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032) + *base_idx = ZL_REF1P; + else if (hw->cgu_part_number == + ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384) + *base_idx = SI_REF1P; + else + ret = -ENODEV; + + break; + default: + ret = -ENODEV; + break; + } + + return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 9aa10b0426fd..6f277e7b06b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -3,13 +3,14 @@ #ifndef _ICE_PTP_HW_H_ #define _ICE_PTP_HW_H_ +#include <linux/dpll.h> enum ice_ptp_tmr_cmd { - INIT_TIME, - INIT_INCVAL, - ADJ_TIME, - ADJ_TIME_AT_TIME, - READ_TIME, + ICE_PTP_INIT_TIME, + ICE_PTP_INIT_INCVAL, + ICE_PTP_ADJ_TIME, + ICE_PTP_ADJ_TIME_AT_TIME, + ICE_PTP_READ_TIME, ICE_PTP_NOP, }; @@ -110,6 +111,77 @@ struct ice_cgu_pll_params_e822 { u32 post_pll_div; }; +#define E810C_QSFP_C827_0_HANDLE 2 +#define E810C_QSFP_C827_1_HANDLE 3 +enum ice_e810_c827_idx { + C827_0, + C827_1 +}; + +enum ice_phy_rclk_pins { + ICE_RCLKA_PIN = 0, /* SCL pin */ + ICE_RCLKB_PIN, /* SDA pin */ +}; + +#define ICE_E810_RCLK_PINS_NUM (ICE_RCLKB_PIN + 1) +#define ICE_E822_RCLK_PINS_NUM (ICE_RCLKA_PIN + 1) +#define E810T_CGU_INPUT_C827(_phy, _pin) ((_phy) * ICE_E810_RCLK_PINS_NUM + \ + (_pin) + ZL_REF1P) + +enum ice_zl_cgu_in_pins { + ZL_REF0P = 0, + ZL_REF0N, + ZL_REF1P, + ZL_REF1N, + ZL_REF2P, + ZL_REF2N, + ZL_REF3P, + ZL_REF3N, + ZL_REF4P, + ZL_REF4N, + NUM_ZL_CGU_INPUT_PINS +}; + +enum ice_zl_cgu_out_pins { + ZL_OUT0 = 0, + ZL_OUT1, + ZL_OUT2, + ZL_OUT3, + ZL_OUT4, + ZL_OUT5, + ZL_OUT6, + NUM_ZL_CGU_OUTPUT_PINS +}; + +enum ice_si_cgu_in_pins { + SI_REF0P = 0, + SI_REF0N, + SI_REF1P, + SI_REF1N, + SI_REF2P, + SI_REF2N, + SI_REF3, + SI_REF4, + NUM_SI_CGU_INPUT_PINS +}; + +enum ice_si_cgu_out_pins { + SI_OUT0 = 0, + SI_OUT1, + SI_OUT2, + SI_OUT3, + SI_OUT4, + NUM_SI_CGU_OUTPUT_PINS +}; + +struct ice_cgu_pin_desc { + char *name; + u8 index; + enum dpll_pin_type type; + u32 freq_supp_num; + struct dpll_pin_frequency *freq_supp; +}; + extern const struct ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ]; @@ -131,6 +203,7 @@ extern const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD]; u8 ice_get_ptp_src_clock_index(struct ice_hw *hw); bool ice_ptp_lock(struct ice_hw *hw); void ice_ptp_unlock(struct ice_hw *hw); +void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd); int ice_ptp_init_time(struct ice_hw *hw, u64 time); int ice_ptp_write_incval(struct ice_hw *hw, u64 incval); int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval); @@ -197,6 +270,22 @@ int ice_ptp_init_phy_e810(struct ice_hw *hw); int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data); int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data); int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data); +bool ice_is_pca9575_present(struct ice_hw *hw); +bool ice_is_phy_rclk_present(struct ice_hw *hw); +bool ice_is_clock_mux_present_e810t(struct ice_hw *hw); +int ice_get_pf_c827_idx(struct ice_hw *hw, u8 *idx); +bool ice_is_cgu_present(struct ice_hw *hw); +enum dpll_pin_type ice_cgu_get_pin_type(struct ice_hw *hw, u8 pin, bool input); +struct dpll_pin_frequency * +ice_cgu_get_pin_freq_supp(struct ice_hw *hw, u8 pin, bool input, u8 *num); +const char *ice_cgu_get_pin_name(struct ice_hw *hw, u8 pin, bool input); +int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx, + enum dpll_lock_status last_dpll_state, u8 *pin, + u8 *ref_state, u8 *eec_mode, s64 *phase_offset, + enum dpll_lock_status *dpll_state); +int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num); + +void ice_ptp_init_phy_model(struct ice_hw *hw); #define PFTSYN_SEM_BYTES 4 diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 5e353b0cbe6f..bb5d8b681bc2 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -822,6 +822,13 @@ struct ice_mbx_data { u16 async_watermark_val; }; +/* PHY model */ +enum ice_phy_model { + ICE_PHY_UNSUP = -1, + ICE_PHY_E810 = 1, + ICE_PHY_E822, +}; + /* Port hardware description */ struct ice_hw { u8 __iomem *hw_addr; @@ -843,6 +850,7 @@ struct ice_hw { u8 revision_id; u8 pf_id; /* device profile info */ + enum ice_phy_model phy_model; u16 max_burst_size; /* driver sets this value */ @@ -901,13 +909,13 @@ struct ice_hw { /* INTRL granularity in 1 us */ u8 intrl_gran; -#define ICE_PHY_PER_NAC 1 -#define ICE_MAX_QUAD 2 -#define ICE_NUM_QUAD_TYPE 2 -#define ICE_PORTS_PER_QUAD 4 -#define ICE_PHY_0_LAST_QUAD 1 -#define ICE_PORTS_PER_PHY 8 -#define ICE_NUM_EXTERNAL_PORTS ICE_PORTS_PER_PHY +#define ICE_PHY_PER_NAC_E822 1 +#define ICE_MAX_QUAD 2 +#define ICE_QUADS_PER_PHY_E822 2 +#define ICE_PORTS_PER_PHY_E822 8 +#define ICE_PORTS_PER_QUAD 4 +#define ICE_PORTS_PER_PHY_E810 4 +#define ICE_NUM_EXTERNAL_PORTS (ICE_MAX_QUAD * ICE_PORTS_PER_QUAD) /* Active package version (currently active) */ struct ice_pkg_ver active_pkg_ver; @@ -965,6 +973,7 @@ struct ice_hw { DECLARE_BITMAP(hw_ptype, ICE_FLOW_PTYPE_MAX); u8 dvm_ena; u16 io_expander_handle; + u8 cgu_part_number; }; /* Statistics collected by each port, VSI, VEB, and S-channel */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 48fea6fa0362..31a082e8a827 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -123,6 +123,9 @@ struct ice_vf { u8 num_req_qs; /* num of queue pairs requested by VF */ u16 num_mac; u16 num_vf_qs; /* num of queue configured per VF */ + u8 vlan_strip_ena; /* Outer and Inner VLAN strip enable */ +#define ICE_INNER_VLAN_STRIP_ENA BIT(0) +#define ICE_OUTER_VLAN_STRIP_ENA BIT(1) struct ice_mdd_vf_events mdd_rx_events; struct ice_mdd_vf_events mdd_tx_events; DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index db97353efd06..01e88b6e43a1 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -486,6 +486,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_CRC; + if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED; @@ -1621,6 +1624,15 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } for (i = 0; i < qci->num_queue_pairs; i++) { + if (!qci->qpair[i].rxq.crc_disable) + continue; + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) || + vf->vlan_strip_ena) + goto error_param; + } + + for (i = 0; i < qci->num_queue_pairs; i++) { qpi = &qci->qpair[i]; if (qpi->txq.vsi_id != qci->vsi_id || qpi->rxq.vsi_id != qci->vsi_id || @@ -1666,6 +1678,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; + if (qpi->rxq.crc_disable) + vsi->rx_rings[q_idx]->flags |= + ICE_RX_FLAGS_CRC_STRIP_DIS; + else + vsi->rx_rings[q_idx]->flags &= + ~ICE_RX_FLAGS_CRC_STRIP_DIS; + if (qpi->rxq.databuffer_size != 0 && (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || qpi->rxq.databuffer_size < 1024)) @@ -2411,6 +2430,21 @@ static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg) } /** + * ice_vsi_is_rxq_crc_strip_dis - check if Rx queue CRC strip is disabled or not + * @vsi: pointer to the VF VSI info + */ +static bool ice_vsi_is_rxq_crc_strip_dis(struct ice_vsi *vsi) +{ + unsigned int i; + + ice_for_each_alloc_rxq(vsi, i) + if (vsi->rx_rings[i]->flags & ICE_RX_FLAGS_CRC_STRIP_DIS) + return true; + + return false; +} + +/** * ice_vc_ena_vlan_stripping * @vf: pointer to the VF info * @@ -2439,6 +2473,8 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf) if (vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; + else + vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, @@ -2474,6 +2510,8 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf) if (vsi->inner_vlan_ops.dis_stripping(vsi)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; + else + vf->vlan_strip_ena &= ~ICE_INNER_VLAN_STRIP_ENA; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, @@ -2651,6 +2689,8 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf) { struct ice_vsi *vsi = ice_get_vf_vsi(vf); + vf->vlan_strip_ena = 0; + if (!vsi) return -EINVAL; @@ -2660,10 +2700,16 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf) if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&vsi->back->hw)) return 0; - if (ice_vf_vlan_offload_ena(vf->driver_caps)) - return vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q); - else - return vsi->inner_vlan_ops.dis_stripping(vsi); + if (ice_vf_vlan_offload_ena(vf->driver_caps)) { + int err; + + err = vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q); + if (!err) + vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA; + return err; + } + + return vsi->inner_vlan_ops.dis_stripping(vsi); } static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf) @@ -3437,6 +3483,11 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) goto out; } + if (ice_vsi_is_rxq_crc_strip_dis(vsi)) { + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto out; + } + ethertype_setting = strip_msg->outer_ethertype_setting; if (ethertype_setting) { if (ice_vc_ena_vlan_offload(vsi, @@ -3457,6 +3508,8 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) * enabled, is extracted in L2TAG1. */ ice_vsi_update_l2tsel(vsi, l2tsel); + + vf->vlan_strip_ena |= ICE_OUTER_VLAN_STRIP_ENA; } } @@ -3468,6 +3521,9 @@ static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) goto out; } + if (ethertype_setting) + vf->vlan_strip_ena |= ICE_INNER_VLAN_STRIP_ENA; + out: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0); @@ -3529,6 +3585,8 @@ static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) * in L2TAG1. */ ice_vsi_update_l2tsel(vsi, l2tsel); + + vf->vlan_strip_ena &= ~ICE_OUTER_VLAN_STRIP_ENA; } } @@ -3538,6 +3596,9 @@ static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg) goto out; } + if (ethertype_setting) + vf->vlan_strip_ena &= ~ICE_INNER_VLAN_STRIP_ENA; + out: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2, v_ret, NULL, 0); diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile new file mode 100644 index 000000000000..6844ead2f3ac --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2023 Intel Corporation + +# Makefile for Intel(R) Infrastructure Data Path Function Linux Driver + +obj-$(CONFIG_IDPF) += idpf.o + +idpf-y := \ + idpf_controlq.o \ + idpf_controlq_setup.o \ + idpf_dev.o \ + idpf_ethtool.o \ + idpf_lib.o \ + idpf_main.o \ + idpf_singleq_txrx.o \ + idpf_txrx.o \ + idpf_virtchnl.o \ + idpf_vf_dev.o diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h new file mode 100644 index 000000000000..bee73353b56a --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -0,0 +1,968 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_H_ +#define _IDPF_H_ + +/* Forward declaration */ +struct idpf_adapter; +struct idpf_vport; +struct idpf_vport_max_q; + +#include <net/pkt_sched.h> +#include <linux/aer.h> +#include <linux/etherdevice.h> +#include <linux/pci.h> +#include <linux/bitfield.h> +#include <linux/sctp.h> +#include <linux/ethtool.h> +#include <net/gro.h> +#include <linux/dim.h> + +#include "virtchnl2.h" +#include "idpf_lan_txrx.h" +#include "idpf_txrx.h" +#include "idpf_controlq.h" + +#define GETMAXVAL(num_bits) GENMASK((num_bits) - 1, 0) + +#define IDPF_NO_FREE_SLOT 0xffff + +/* Default Mailbox settings */ +#define IDPF_NUM_FILTERS_PER_MSG 20 +#define IDPF_NUM_DFLT_MBX_Q 2 /* includes both TX and RX */ +#define IDPF_DFLT_MBX_Q_LEN 64 +#define IDPF_DFLT_MBX_ID -1 +/* maximum number of times to try before resetting mailbox */ +#define IDPF_MB_MAX_ERR 20 +#define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \ + ((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz)) +#define IDPF_WAIT_FOR_EVENT_TIMEO_MIN 2000 +#define IDPF_WAIT_FOR_EVENT_TIMEO 60000 + +#define IDPF_MAX_WAIT 500 + +/* available message levels */ +#define IDPF_AVAIL_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) + +#define IDPF_DIM_PROFILE_SLOTS 5 + +#define IDPF_VIRTCHNL_VERSION_MAJOR VIRTCHNL2_VERSION_MAJOR_2 +#define IDPF_VIRTCHNL_VERSION_MINOR VIRTCHNL2_VERSION_MINOR_0 + +/** + * struct idpf_mac_filter + * @list: list member field + * @macaddr: MAC address + * @remove: filter should be removed (virtchnl) + * @add: filter should be added (virtchnl) + */ +struct idpf_mac_filter { + struct list_head list; + u8 macaddr[ETH_ALEN]; + bool remove; + bool add; +}; + +/** + * enum idpf_state - State machine to handle bring up + * @__IDPF_STARTUP: Start the state machine + * @__IDPF_VER_CHECK: Negotiate virtchnl version + * @__IDPF_GET_CAPS: Negotiate capabilities + * @__IDPF_INIT_SW: Init based on given capabilities + * @__IDPF_STATE_LAST: Must be last, used to determine size + */ +enum idpf_state { + __IDPF_STARTUP, + __IDPF_VER_CHECK, + __IDPF_GET_CAPS, + __IDPF_INIT_SW, + __IDPF_STATE_LAST, +}; + +/** + * enum idpf_flags - Hard reset causes. + * @IDPF_HR_FUNC_RESET: Hard reset when TxRx timeout + * @IDPF_HR_DRV_LOAD: Set on driver load for a clean HW + * @IDPF_HR_RESET_IN_PROG: Reset in progress + * @IDPF_REMOVE_IN_PROG: Driver remove in progress + * @IDPF_MB_INTR_MODE: Mailbox in interrupt mode + * @IDPF_FLAGS_NBITS: Must be last + */ +enum idpf_flags { + IDPF_HR_FUNC_RESET, + IDPF_HR_DRV_LOAD, + IDPF_HR_RESET_IN_PROG, + IDPF_REMOVE_IN_PROG, + IDPF_MB_INTR_MODE, + IDPF_FLAGS_NBITS, +}; + +/** + * enum idpf_cap_field - Offsets into capabilities struct for specific caps + * @IDPF_BASE_CAPS: generic base capabilities + * @IDPF_CSUM_CAPS: checksum offload capabilities + * @IDPF_SEG_CAPS: segmentation offload capabilities + * @IDPF_RSS_CAPS: RSS offload capabilities + * @IDPF_HSPLIT_CAPS: Header split capabilities + * @IDPF_RSC_CAPS: RSC offload capabilities + * @IDPF_OTHER_CAPS: miscellaneous offloads + * + * Used when checking for a specific capability flag since different capability + * sets are not mutually exclusive numerically, the caller must specify which + * type of capability they are checking for. + */ +enum idpf_cap_field { + IDPF_BASE_CAPS = -1, + IDPF_CSUM_CAPS = offsetof(struct virtchnl2_get_capabilities, + csum_caps), + IDPF_SEG_CAPS = offsetof(struct virtchnl2_get_capabilities, + seg_caps), + IDPF_RSS_CAPS = offsetof(struct virtchnl2_get_capabilities, + rss_caps), + IDPF_HSPLIT_CAPS = offsetof(struct virtchnl2_get_capabilities, + hsplit_caps), + IDPF_RSC_CAPS = offsetof(struct virtchnl2_get_capabilities, + rsc_caps), + IDPF_OTHER_CAPS = offsetof(struct virtchnl2_get_capabilities, + other_caps), +}; + +/** + * enum idpf_vport_state - Current vport state + * @__IDPF_VPORT_DOWN: Vport is down + * @__IDPF_VPORT_UP: Vport is up + * @__IDPF_VPORT_STATE_LAST: Must be last, number of states + */ +enum idpf_vport_state { + __IDPF_VPORT_DOWN, + __IDPF_VPORT_UP, + __IDPF_VPORT_STATE_LAST, +}; + +/** + * struct idpf_netdev_priv - Struct to store vport back pointer + * @adapter: Adapter back pointer + * @vport: Vport back pointer + * @vport_id: Vport identifier + * @vport_idx: Relative vport index + * @state: See enum idpf_vport_state + * @netstats: Packet and byte stats + * @stats_lock: Lock to protect stats update + */ +struct idpf_netdev_priv { + struct idpf_adapter *adapter; + struct idpf_vport *vport; + u32 vport_id; + u16 vport_idx; + enum idpf_vport_state state; + struct rtnl_link_stats64 netstats; + spinlock_t stats_lock; +}; + +/** + * struct idpf_reset_reg - Reset register offsets/masks + * @rstat: Reset status register + * @rstat_m: Reset status mask + */ +struct idpf_reset_reg { + void __iomem *rstat; + u32 rstat_m; +}; + +/** + * struct idpf_vport_max_q - Queue limits + * @max_rxq: Maximum number of RX queues supported + * @max_txq: Maixmum number of TX queues supported + * @max_bufq: In splitq, maximum number of buffer queues supported + * @max_complq: In splitq, maximum number of completion queues supported + */ +struct idpf_vport_max_q { + u16 max_rxq; + u16 max_txq; + u16 max_bufq; + u16 max_complq; +}; + +/** + * struct idpf_reg_ops - Device specific register operation function pointers + * @ctlq_reg_init: Mailbox control queue register initialization + * @intr_reg_init: Traffic interrupt register initialization + * @mb_intr_reg_init: Mailbox interrupt register initialization + * @reset_reg_init: Reset register initialization + * @trigger_reset: Trigger a reset to occur + */ +struct idpf_reg_ops { + void (*ctlq_reg_init)(struct idpf_ctlq_create_info *cq); + int (*intr_reg_init)(struct idpf_vport *vport); + void (*mb_intr_reg_init)(struct idpf_adapter *adapter); + void (*reset_reg_init)(struct idpf_adapter *adapter); + void (*trigger_reset)(struct idpf_adapter *adapter, + enum idpf_flags trig_cause); +}; + +/** + * struct idpf_dev_ops - Device specific operations + * @reg_ops: Register operations + */ +struct idpf_dev_ops { + struct idpf_reg_ops reg_ops; +}; + +/* These macros allow us to generate an enum and a matching char * array of + * stringified enums that are always in sync. Checkpatch issues a bogus warning + * about this being a complex macro; but it's wrong, these are never used as a + * statement and instead only used to define the enum and array. + */ +#define IDPF_FOREACH_VPORT_VC_STATE(STATE) \ + STATE(IDPF_VC_CREATE_VPORT) \ + STATE(IDPF_VC_CREATE_VPORT_ERR) \ + STATE(IDPF_VC_ENA_VPORT) \ + STATE(IDPF_VC_ENA_VPORT_ERR) \ + STATE(IDPF_VC_DIS_VPORT) \ + STATE(IDPF_VC_DIS_VPORT_ERR) \ + STATE(IDPF_VC_DESTROY_VPORT) \ + STATE(IDPF_VC_DESTROY_VPORT_ERR) \ + STATE(IDPF_VC_CONFIG_TXQ) \ + STATE(IDPF_VC_CONFIG_TXQ_ERR) \ + STATE(IDPF_VC_CONFIG_RXQ) \ + STATE(IDPF_VC_CONFIG_RXQ_ERR) \ + STATE(IDPF_VC_ENA_QUEUES) \ + STATE(IDPF_VC_ENA_QUEUES_ERR) \ + STATE(IDPF_VC_DIS_QUEUES) \ + STATE(IDPF_VC_DIS_QUEUES_ERR) \ + STATE(IDPF_VC_MAP_IRQ) \ + STATE(IDPF_VC_MAP_IRQ_ERR) \ + STATE(IDPF_VC_UNMAP_IRQ) \ + STATE(IDPF_VC_UNMAP_IRQ_ERR) \ + STATE(IDPF_VC_ADD_QUEUES) \ + STATE(IDPF_VC_ADD_QUEUES_ERR) \ + STATE(IDPF_VC_DEL_QUEUES) \ + STATE(IDPF_VC_DEL_QUEUES_ERR) \ + STATE(IDPF_VC_ALLOC_VECTORS) \ + STATE(IDPF_VC_ALLOC_VECTORS_ERR) \ + STATE(IDPF_VC_DEALLOC_VECTORS) \ + STATE(IDPF_VC_DEALLOC_VECTORS_ERR) \ + STATE(IDPF_VC_SET_SRIOV_VFS) \ + STATE(IDPF_VC_SET_SRIOV_VFS_ERR) \ + STATE(IDPF_VC_GET_RSS_LUT) \ + STATE(IDPF_VC_GET_RSS_LUT_ERR) \ + STATE(IDPF_VC_SET_RSS_LUT) \ + STATE(IDPF_VC_SET_RSS_LUT_ERR) \ + STATE(IDPF_VC_GET_RSS_KEY) \ + STATE(IDPF_VC_GET_RSS_KEY_ERR) \ + STATE(IDPF_VC_SET_RSS_KEY) \ + STATE(IDPF_VC_SET_RSS_KEY_ERR) \ + STATE(IDPF_VC_GET_STATS) \ + STATE(IDPF_VC_GET_STATS_ERR) \ + STATE(IDPF_VC_ADD_MAC_ADDR) \ + STATE(IDPF_VC_ADD_MAC_ADDR_ERR) \ + STATE(IDPF_VC_DEL_MAC_ADDR) \ + STATE(IDPF_VC_DEL_MAC_ADDR_ERR) \ + STATE(IDPF_VC_GET_PTYPE_INFO) \ + STATE(IDPF_VC_GET_PTYPE_INFO_ERR) \ + STATE(IDPF_VC_LOOPBACK_STATE) \ + STATE(IDPF_VC_LOOPBACK_STATE_ERR) \ + STATE(IDPF_VC_NBITS) + +#define IDPF_GEN_ENUM(ENUM) ENUM, +#define IDPF_GEN_STRING(STRING) #STRING, + +enum idpf_vport_vc_state { + IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_ENUM) +}; + +extern const char * const idpf_vport_vc_state_str[]; + +/** + * enum idpf_vport_reset_cause - Vport soft reset causes + * @IDPF_SR_Q_CHANGE: Soft reset queue change + * @IDPF_SR_Q_DESC_CHANGE: Soft reset descriptor change + * @IDPF_SR_MTU_CHANGE: Soft reset MTU change + * @IDPF_SR_RSC_CHANGE: Soft reset RSC change + */ +enum idpf_vport_reset_cause { + IDPF_SR_Q_CHANGE, + IDPF_SR_Q_DESC_CHANGE, + IDPF_SR_MTU_CHANGE, + IDPF_SR_RSC_CHANGE, +}; + +/** + * enum idpf_vport_flags - Vport flags + * @IDPF_VPORT_DEL_QUEUES: To send delete queues message + * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets + * processing is done + * @IDPF_VPORT_FLAGS_NBITS: Must be last + */ +enum idpf_vport_flags { + IDPF_VPORT_DEL_QUEUES, + IDPF_VPORT_SW_MARKER, + IDPF_VPORT_FLAGS_NBITS, +}; + +struct idpf_port_stats { + struct u64_stats_sync stats_sync; + u64_stats_t rx_hw_csum_err; + u64_stats_t rx_hsplit; + u64_stats_t rx_hsplit_hbo; + u64_stats_t rx_bad_descs; + u64_stats_t tx_linearize; + u64_stats_t tx_busy; + u64_stats_t tx_drops; + u64_stats_t tx_dma_map_errs; + struct virtchnl2_vport_stats vport_stats; +}; + +/** + * struct idpf_vport - Handle for netdevices and queue resources + * @num_txq: Number of allocated TX queues + * @num_complq: Number of allocated completion queues + * @txq_desc_count: TX queue descriptor count + * @complq_desc_count: Completion queue descriptor count + * @compln_clean_budget: Work budget for completion clean + * @num_txq_grp: Number of TX queue groups + * @txq_grps: Array of TX queue groups + * @txq_model: Split queue or single queue queuing model + * @txqs: Used only in hotpath to get to the right queue very fast + * @crc_enable: Enable CRC insertion offload + * @num_rxq: Number of allocated RX queues + * @num_bufq: Number of allocated buffer queues + * @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors + * to complete all buffer descriptors for all buffer queues in + * the worst case. + * @num_bufqs_per_qgrp: Buffer queues per RX queue in a given grouping + * @bufq_desc_count: Buffer queue descriptor count + * @bufq_size: Size of buffers in ring (e.g. 2K, 4K, etc) + * @num_rxq_grp: Number of RX queues in a group + * @rxq_grps: Total number of RX groups. Number of groups * number of RX per + * group will yield total number of RX queues. + * @rxq_model: Splitq queue or single queue queuing model + * @rx_ptype_lkup: Lookup table for ptypes on RX + * @adapter: back pointer to associated adapter + * @netdev: Associated net_device. Each vport should have one and only one + * associated netdev. + * @flags: See enum idpf_vport_flags + * @vport_type: Default SRIOV, SIOV, etc. + * @vport_id: Device given vport identifier + * @idx: Software index in adapter vports struct + * @default_vport: Use this vport if one isn't specified + * @base_rxd: True if the driver should use base descriptors instead of flex + * @num_q_vectors: Number of IRQ vectors allocated + * @q_vectors: Array of queue vectors + * @q_vector_idxs: Starting index of queue vectors + * @max_mtu: device given max possible MTU + * @default_mac_addr: device will give a default MAC to use + * @rx_itr_profile: RX profiles for Dynamic Interrupt Moderation + * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation + * @port_stats: per port csum, header split, and other offload stats + * @link_up: True if link is up + * @link_speed_mbps: Link speed in mbps + * @vc_msg: Virtchnl message buffer + * @vc_state: Virtchnl message state + * @vchnl_wq: Wait queue for virtchnl messages + * @sw_marker_wq: workqueue for marker packets + * @vc_buf_lock: Lock to protect virtchnl buffer + */ +struct idpf_vport { + u16 num_txq; + u16 num_complq; + u32 txq_desc_count; + u32 complq_desc_count; + u32 compln_clean_budget; + u16 num_txq_grp; + struct idpf_txq_group *txq_grps; + u32 txq_model; + struct idpf_queue **txqs; + bool crc_enable; + + u16 num_rxq; + u16 num_bufq; + u32 rxq_desc_count; + u8 num_bufqs_per_qgrp; + u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP]; + u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP]; + u16 num_rxq_grp; + struct idpf_rxq_group *rxq_grps; + u32 rxq_model; + struct idpf_rx_ptype_decoded rx_ptype_lkup[IDPF_RX_MAX_PTYPE]; + + struct idpf_adapter *adapter; + struct net_device *netdev; + DECLARE_BITMAP(flags, IDPF_VPORT_FLAGS_NBITS); + u16 vport_type; + u32 vport_id; + u16 idx; + bool default_vport; + bool base_rxd; + + u16 num_q_vectors; + struct idpf_q_vector *q_vectors; + u16 *q_vector_idxs; + u16 max_mtu; + u8 default_mac_addr[ETH_ALEN]; + u16 rx_itr_profile[IDPF_DIM_PROFILE_SLOTS]; + u16 tx_itr_profile[IDPF_DIM_PROFILE_SLOTS]; + struct idpf_port_stats port_stats; + + bool link_up; + u32 link_speed_mbps; + + char vc_msg[IDPF_CTLQ_MAX_BUF_LEN]; + DECLARE_BITMAP(vc_state, IDPF_VC_NBITS); + + wait_queue_head_t vchnl_wq; + wait_queue_head_t sw_marker_wq; + struct mutex vc_buf_lock; +}; + +/** + * enum idpf_user_flags + * @__IDPF_PROMISC_UC: Unicast promiscuous mode + * @__IDPF_PROMISC_MC: Multicast promiscuous mode + * @__IDPF_USER_FLAGS_NBITS: Must be last + */ +enum idpf_user_flags { + __IDPF_PROMISC_UC = 32, + __IDPF_PROMISC_MC, + + __IDPF_USER_FLAGS_NBITS, +}; + +/** + * struct idpf_rss_data - Associated RSS data + * @rss_key_size: Size of RSS hash key + * @rss_key: RSS hash key + * @rss_lut_size: Size of RSS lookup table + * @rss_lut: RSS lookup table + * @cached_lut: Used to restore previously init RSS lut + */ +struct idpf_rss_data { + u16 rss_key_size; + u8 *rss_key; + u16 rss_lut_size; + u32 *rss_lut; + u32 *cached_lut; +}; + +/** + * struct idpf_vport_user_config_data - User defined configuration values for + * each vport. + * @rss_data: See struct idpf_rss_data + * @num_req_tx_qs: Number of user requested TX queues through ethtool + * @num_req_rx_qs: Number of user requested RX queues through ethtool + * @num_req_txq_desc: Number of user requested TX queue descriptors through + * ethtool + * @num_req_rxq_desc: Number of user requested RX queue descriptors through + * ethtool + * @user_flags: User toggled config flags + * @mac_filter_list: List of MAC filters + * + * Used to restore configuration after a reset as the vport will get wiped. + */ +struct idpf_vport_user_config_data { + struct idpf_rss_data rss_data; + u16 num_req_tx_qs; + u16 num_req_rx_qs; + u32 num_req_txq_desc; + u32 num_req_rxq_desc; + DECLARE_BITMAP(user_flags, __IDPF_USER_FLAGS_NBITS); + struct list_head mac_filter_list; +}; + +/** + * enum idpf_vport_config_flags - Vport config flags + * @IDPF_VPORT_REG_NETDEV: Register netdev + * @IDPF_VPORT_UP_REQUESTED: Set if interface up is requested on core reset + * @IDPF_VPORT_ADD_MAC_REQ: Asynchronous add ether address in flight + * @IDPF_VPORT_DEL_MAC_REQ: Asynchronous delete ether address in flight + * @IDPF_VPORT_CONFIG_FLAGS_NBITS: Must be last + */ +enum idpf_vport_config_flags { + IDPF_VPORT_REG_NETDEV, + IDPF_VPORT_UP_REQUESTED, + IDPF_VPORT_ADD_MAC_REQ, + IDPF_VPORT_DEL_MAC_REQ, + IDPF_VPORT_CONFIG_FLAGS_NBITS, +}; + +/** + * struct idpf_avail_queue_info + * @avail_rxq: Available RX queues + * @avail_txq: Available TX queues + * @avail_bufq: Available buffer queues + * @avail_complq: Available completion queues + * + * Maintain total queues available after allocating max queues to each vport. + */ +struct idpf_avail_queue_info { + u16 avail_rxq; + u16 avail_txq; + u16 avail_bufq; + u16 avail_complq; +}; + +/** + * struct idpf_vector_info - Utility structure to pass function arguments as a + * structure + * @num_req_vecs: Vectors required based on the number of queues updated by the + * user via ethtool + * @num_curr_vecs: Current number of vectors, must be >= @num_req_vecs + * @index: Relative starting index for vectors + * @default_vport: Vectors are for default vport + */ +struct idpf_vector_info { + u16 num_req_vecs; + u16 num_curr_vecs; + u16 index; + bool default_vport; +}; + +/** + * struct idpf_vector_lifo - Stack to maintain vector indexes used for vector + * distribution algorithm + * @top: Points to stack top i.e. next available vector index + * @base: Always points to start of the free pool + * @size: Total size of the vector stack + * @vec_idx: Array to store all the vector indexes + * + * Vector stack maintains all the relative vector indexes at the *adapter* + * level. This stack is divided into 2 parts, first one is called as 'default + * pool' and other one is called 'free pool'. Vector distribution algorithm + * gives priority to default vports in a way that at least IDPF_MIN_Q_VEC + * vectors are allocated per default vport and the relative vector indexes for + * those are maintained in default pool. Free pool contains all the unallocated + * vector indexes which can be allocated on-demand basis. Mailbox vector index + * is maintained in the default pool of the stack. + */ +struct idpf_vector_lifo { + u16 top; + u16 base; + u16 size; + u16 *vec_idx; +}; + +/** + * struct idpf_vport_config - Vport configuration data + * @user_config: see struct idpf_vport_user_config_data + * @max_q: Maximum possible queues + * @req_qs_chunks: Queue chunk data for requested queues + * @mac_filter_list_lock: Lock to protect mac filters + * @flags: See enum idpf_vport_config_flags + */ +struct idpf_vport_config { + struct idpf_vport_user_config_data user_config; + struct idpf_vport_max_q max_q; + void *req_qs_chunks; + spinlock_t mac_filter_list_lock; + DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS); +}; + +/** + * struct idpf_adapter - Device data struct generated on probe + * @pdev: PCI device struct given on probe + * @virt_ver_maj: Virtchnl version major + * @virt_ver_min: Virtchnl version minor + * @msg_enable: Debug message level enabled + * @mb_wait_count: Number of times mailbox was attempted initialization + * @state: Init state machine + * @flags: See enum idpf_flags + * @reset_reg: See struct idpf_reset_reg + * @hw: Device access data + * @num_req_msix: Requested number of MSIX vectors + * @num_avail_msix: Available number of MSIX vectors + * @num_msix_entries: Number of entries in MSIX table + * @msix_entries: MSIX table + * @req_vec_chunks: Requested vector chunk data + * @mb_vector: Mailbox vector data + * @vector_stack: Stack to store the msix vector indexes + * @irq_mb_handler: Handler for hard interrupt for mailbox + * @tx_timeout_count: Number of TX timeouts that have occurred + * @avail_queues: Device given queue limits + * @vports: Array to store vports created by the driver + * @netdevs: Associated Vport netdevs + * @vport_params_reqd: Vport params requested + * @vport_params_recvd: Vport params received + * @vport_ids: Array of device given vport identifiers + * @vport_config: Vport config parameters + * @max_vports: Maximum vports that can be allocated + * @num_alloc_vports: Current number of vports allocated + * @next_vport: Next free slot in pf->vport[] - 0-based! + * @init_task: Initialization task + * @init_wq: Workqueue for initialization task + * @serv_task: Periodically recurring maintenance task + * @serv_wq: Workqueue for service task + * @mbx_task: Task to handle mailbox interrupts + * @mbx_wq: Workqueue for mailbox responses + * @vc_event_task: Task to handle out of band virtchnl event notifications + * @vc_event_wq: Workqueue for virtchnl events + * @stats_task: Periodic statistics retrieval task + * @stats_wq: Workqueue for statistics task + * @caps: Negotiated capabilities with device + * @vchnl_wq: Wait queue for virtchnl messages + * @vc_state: Virtchnl message state + * @vc_msg: Virtchnl message buffer + * @dev_ops: See idpf_dev_ops + * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk + * to VFs but is used to initialize them + * @crc_enable: Enable CRC insertion offload + * @req_tx_splitq: TX split or single queue model to request + * @req_rx_splitq: RX split or single queue model to request + * @vport_ctrl_lock: Lock to protect the vport control flow + * @vector_lock: Lock to protect vector distribution + * @queue_lock: Lock to protect queue distribution + * @vc_buf_lock: Lock to protect virtchnl buffer + */ +struct idpf_adapter { + struct pci_dev *pdev; + u32 virt_ver_maj; + u32 virt_ver_min; + + u32 msg_enable; + u32 mb_wait_count; + enum idpf_state state; + DECLARE_BITMAP(flags, IDPF_FLAGS_NBITS); + struct idpf_reset_reg reset_reg; + struct idpf_hw hw; + u16 num_req_msix; + u16 num_avail_msix; + u16 num_msix_entries; + struct msix_entry *msix_entries; + struct virtchnl2_alloc_vectors *req_vec_chunks; + struct idpf_q_vector mb_vector; + struct idpf_vector_lifo vector_stack; + irqreturn_t (*irq_mb_handler)(int irq, void *data); + + u32 tx_timeout_count; + struct idpf_avail_queue_info avail_queues; + struct idpf_vport **vports; + struct net_device **netdevs; + struct virtchnl2_create_vport **vport_params_reqd; + struct virtchnl2_create_vport **vport_params_recvd; + u32 *vport_ids; + + struct idpf_vport_config **vport_config; + u16 max_vports; + u16 num_alloc_vports; + u16 next_vport; + + struct delayed_work init_task; + struct workqueue_struct *init_wq; + struct delayed_work serv_task; + struct workqueue_struct *serv_wq; + struct delayed_work mbx_task; + struct workqueue_struct *mbx_wq; + struct delayed_work vc_event_task; + struct workqueue_struct *vc_event_wq; + struct delayed_work stats_task; + struct workqueue_struct *stats_wq; + struct virtchnl2_get_capabilities caps; + + wait_queue_head_t vchnl_wq; + DECLARE_BITMAP(vc_state, IDPF_VC_NBITS); + char vc_msg[IDPF_CTLQ_MAX_BUF_LEN]; + struct idpf_dev_ops dev_ops; + int num_vfs; + bool crc_enable; + bool req_tx_splitq; + bool req_rx_splitq; + + struct mutex vport_ctrl_lock; + struct mutex vector_lock; + struct mutex queue_lock; + struct mutex vc_buf_lock; +}; + +/** + * idpf_is_queue_model_split - check if queue model is split + * @q_model: queue model single or split + * + * Returns true if queue model is split else false + */ +static inline int idpf_is_queue_model_split(u16 q_model) +{ + return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; +} + +#define idpf_is_cap_ena(adapter, field, flag) \ + idpf_is_capability_ena(adapter, false, field, flag) +#define idpf_is_cap_ena_all(adapter, field, flag) \ + idpf_is_capability_ena(adapter, true, field, flag) + +bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, + enum idpf_cap_field field, u64 flag); + +#define IDPF_CAP_RSS (\ + VIRTCHNL2_CAP_RSS_IPV4_TCP |\ + VIRTCHNL2_CAP_RSS_IPV4_TCP |\ + VIRTCHNL2_CAP_RSS_IPV4_UDP |\ + VIRTCHNL2_CAP_RSS_IPV4_SCTP |\ + VIRTCHNL2_CAP_RSS_IPV4_OTHER |\ + VIRTCHNL2_CAP_RSS_IPV6_TCP |\ + VIRTCHNL2_CAP_RSS_IPV6_TCP |\ + VIRTCHNL2_CAP_RSS_IPV6_UDP |\ + VIRTCHNL2_CAP_RSS_IPV6_SCTP |\ + VIRTCHNL2_CAP_RSS_IPV6_OTHER) + +#define IDPF_CAP_RSC (\ + VIRTCHNL2_CAP_RSC_IPV4_TCP |\ + VIRTCHNL2_CAP_RSC_IPV6_TCP) + +#define IDPF_CAP_HSPLIT (\ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |\ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6) + +#define IDPF_CAP_RX_CSUM_L4V4 (\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP) + +#define IDPF_CAP_RX_CSUM_L4V6 (\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) + +#define IDPF_CAP_RX_CSUM (\ + VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) + +#define IDPF_CAP_SCTP_CSUM (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP) + +#define IDPF_CAP_TUNNEL_TX_CSUM (\ + VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |\ + VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL) + +/** + * idpf_get_reserved_vecs - Get reserved vectors + * @adapter: private data struct + */ +static inline u16 idpf_get_reserved_vecs(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.num_allocated_vectors); +} + +/** + * idpf_get_default_vports - Get default number of vports + * @adapter: private data struct + */ +static inline u16 idpf_get_default_vports(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.default_num_vports); +} + +/** + * idpf_get_max_vports - Get max number of vports + * @adapter: private data struct + */ +static inline u16 idpf_get_max_vports(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.max_vports); +} + +/** + * idpf_get_max_tx_bufs - Get max scatter-gather buffers supported by the device + * @adapter: private data struct + */ +static inline unsigned int idpf_get_max_tx_bufs(struct idpf_adapter *adapter) +{ + return adapter->caps.max_sg_bufs_per_tx_pkt; +} + +/** + * idpf_get_min_tx_pkt_len - Get min packet length supported by the device + * @adapter: private data struct + */ +static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter) +{ + u8 pkt_len = adapter->caps.min_sso_packet_len; + + return pkt_len ? pkt_len : IDPF_TX_MIN_PKT_LEN; +} + +/** + * idpf_get_reg_addr - Get BAR0 register address + * @adapter: private data struct + * @reg_offset: register offset value + * + * Based on the register offset, return the actual BAR0 register address + */ +static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter, + resource_size_t reg_offset) +{ + return (void __iomem *)(adapter->hw.hw_addr + reg_offset); +} + +/** + * idpf_is_reset_detected - check if we were reset at some point + * @adapter: driver specific private structure + * + * Returns true if we are either in reset currently or were previously reset. + */ +static inline bool idpf_is_reset_detected(struct idpf_adapter *adapter) +{ + if (!adapter->hw.arq) + return true; + + return !(readl(idpf_get_reg_addr(adapter, adapter->hw.arq->reg.len)) & + adapter->hw.arq->reg.len_mask); +} + +/** + * idpf_is_reset_in_prog - check if reset is in progress + * @adapter: driver specific private structure + * + * Returns true if hard reset is in progress, false otherwise + */ +static inline bool idpf_is_reset_in_prog(struct idpf_adapter *adapter) +{ + return (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags) || + test_bit(IDPF_HR_FUNC_RESET, adapter->flags) || + test_bit(IDPF_HR_DRV_LOAD, adapter->flags)); +} + +/** + * idpf_netdev_to_vport - get a vport handle from a netdev + * @netdev: network interface device structure + */ +static inline struct idpf_vport *idpf_netdev_to_vport(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return np->vport; +} + +/** + * idpf_netdev_to_adapter - Get adapter handle from a netdev + * @netdev: Network interface device structure + */ +static inline struct idpf_adapter *idpf_netdev_to_adapter(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return np->adapter; +} + +/** + * idpf_is_feature_ena - Determine if a particular feature is enabled + * @vport: Vport to check + * @feature: Netdev flag to check + * + * Returns true or false if a particular feature is enabled. + */ +static inline bool idpf_is_feature_ena(const struct idpf_vport *vport, + netdev_features_t feature) +{ + return vport->netdev->features & feature; +} + +/** + * idpf_get_max_tx_hdr_size -- get the size of tx header + * @adapter: Driver specific private structure + */ +static inline u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.max_tx_hdr_size); +} + +/** + * idpf_vport_ctrl_lock - Acquire the vport control lock + * @netdev: Network interface device structure + * + * This lock should be used by non-datapath code to protect against vport + * destruction. + */ +static inline void idpf_vport_ctrl_lock(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + mutex_lock(&np->adapter->vport_ctrl_lock); +} + +/** + * idpf_vport_ctrl_unlock - Release the vport control lock + * @netdev: Network interface device structure + */ +static inline void idpf_vport_ctrl_unlock(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + mutex_unlock(&np->adapter->vport_ctrl_lock); +} + +void idpf_statistics_task(struct work_struct *work); +void idpf_init_task(struct work_struct *work); +void idpf_service_task(struct work_struct *work); +void idpf_mbx_task(struct work_struct *work); +void idpf_vc_event_task(struct work_struct *work); +void idpf_dev_ops_init(struct idpf_adapter *adapter); +void idpf_vf_dev_ops_init(struct idpf_adapter *adapter); +int idpf_vport_adjust_qs(struct idpf_vport *vport); +int idpf_init_dflt_mbx(struct idpf_adapter *adapter); +void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter); +int idpf_vc_core_init(struct idpf_adapter *adapter); +void idpf_vc_core_deinit(struct idpf_adapter *adapter); +int idpf_intr_req(struct idpf_adapter *adapter); +void idpf_intr_rel(struct idpf_adapter *adapter); +int idpf_get_reg_intr_vecs(struct idpf_vport *vport, + struct idpf_vec_regs *reg_vals); +u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter); +int idpf_send_delete_queues_msg(struct idpf_vport *vport); +int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, + u16 num_complq, u16 num_rx_q, u16 num_rx_bufq); +int idpf_initiate_soft_reset(struct idpf_vport *vport, + enum idpf_vport_reset_cause reset_cause); +int idpf_send_enable_vport_msg(struct idpf_vport *vport); +int idpf_send_disable_vport_msg(struct idpf_vport *vport); +int idpf_send_destroy_vport_msg(struct idpf_vport *vport); +int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport); +int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport); +int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get); +int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get); +int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter); +int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors); +void idpf_deinit_task(struct idpf_adapter *adapter); +int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter, + u16 *q_vector_idxs, + struct idpf_vector_info *vec_info); +int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport); +int idpf_send_get_stats_msg(struct idpf_vport *vport); +int idpf_get_vec_ids(struct idpf_adapter *adapter, + u16 *vecids, int num_vecids, + struct virtchnl2_vector_chunks *chunks); +int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op, + void *msg, int msg_size); +int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, + u16 msg_size, u8 *msg); +void idpf_set_ethtool_ops(struct net_device *netdev); +int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +int idpf_add_del_mac_filters(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + bool add, bool async); +int idpf_set_promiscuous(struct idpf_adapter *adapter, + struct idpf_vport_user_config_data *config_data, + u32 vport_id); +int idpf_send_disable_queues_msg(struct idpf_vport *vport); +void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q); +u32 idpf_get_vport_id(struct idpf_vport *vport); +int idpf_vport_queue_ids_init(struct idpf_vport *vport); +int idpf_queue_reg_init(struct idpf_vport *vport); +int idpf_send_config_queues_msg(struct idpf_vport *vport); +int idpf_send_enable_queues_msg(struct idpf_vport *vport); +int idpf_send_create_vport_msg(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +int idpf_check_supported_desc_ids(struct idpf_vport *vport); +void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector, + u16 itr, bool tx); +int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map); +int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs); +int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs); + +#endif /* !_IDPF_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c new file mode 100644 index 000000000000..c7f43d2fcd13 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf_controlq.h" + +/** + * idpf_ctlq_setup_regs - initialize control queue registers + * @cq: pointer to the specific control queue + * @q_create_info: structs containing info for each queue to be initialized + */ +static void idpf_ctlq_setup_regs(struct idpf_ctlq_info *cq, + struct idpf_ctlq_create_info *q_create_info) +{ + /* set control queue registers in our local struct */ + cq->reg.head = q_create_info->reg.head; + cq->reg.tail = q_create_info->reg.tail; + cq->reg.len = q_create_info->reg.len; + cq->reg.bah = q_create_info->reg.bah; + cq->reg.bal = q_create_info->reg.bal; + cq->reg.len_mask = q_create_info->reg.len_mask; + cq->reg.len_ena_mask = q_create_info->reg.len_ena_mask; + cq->reg.head_mask = q_create_info->reg.head_mask; +} + +/** + * idpf_ctlq_init_regs - Initialize control queue registers + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * @is_rxq: true if receive control queue, false otherwise + * + * Initialize registers. The caller is expected to have already initialized the + * descriptor ring memory and buffer memory + */ +static void idpf_ctlq_init_regs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, + bool is_rxq) +{ + /* Update tail to post pre-allocated buffers for rx queues */ + if (is_rxq) + wr32(hw, cq->reg.tail, (u32)(cq->ring_size - 1)); + + /* For non-Mailbox control queues only TAIL need to be set */ + if (cq->q_id != -1) + return; + + /* Clear Head for both send or receive */ + wr32(hw, cq->reg.head, 0); + + /* set starting point */ + wr32(hw, cq->reg.bal, lower_32_bits(cq->desc_ring.pa)); + wr32(hw, cq->reg.bah, upper_32_bits(cq->desc_ring.pa)); + wr32(hw, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask)); +} + +/** + * idpf_ctlq_init_rxq_bufs - populate receive queue descriptors with buf + * @cq: pointer to the specific Control queue + * + * Record the address of the receive queue DMA buffers in the descriptors. + * The buffers must have been previously allocated. + */ +static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq) +{ + int i; + + for (i = 0; i < cq->ring_size; i++) { + struct idpf_ctlq_desc *desc = IDPF_CTLQ_DESC(cq, i); + struct idpf_dma_mem *bi = cq->bi.rx_buff[i]; + + /* No buffer to post to descriptor, continue */ + if (!bi) + continue; + + desc->flags = + cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD); + desc->opcode = 0; + desc->datalen = cpu_to_le16(bi->size); + desc->ret_val = 0; + desc->v_opcode_dtype = 0; + desc->v_retval = 0; + desc->params.indirect.addr_high = + cpu_to_le32(upper_32_bits(bi->pa)); + desc->params.indirect.addr_low = + cpu_to_le32(lower_32_bits(bi->pa)); + desc->params.indirect.param0 = 0; + desc->params.indirect.sw_cookie = 0; + desc->params.indirect.v_flags = 0; + } +} + +/** + * idpf_ctlq_shutdown - shutdown the CQ + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * The main shutdown routine for any controq queue + */ +static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + mutex_lock(&cq->cq_lock); + + /* free ring buffers and the ring itself */ + idpf_ctlq_dealloc_ring_res(hw, cq); + + /* Set ring_size to 0 to indicate uninitialized queue */ + cq->ring_size = 0; + + mutex_unlock(&cq->cq_lock); + mutex_destroy(&cq->cq_lock); +} + +/** + * idpf_ctlq_add - add one control queue + * @hw: pointer to hardware struct + * @qinfo: info for queue to be created + * @cq_out: (output) double pointer to control queue to be created + * + * Allocate and initialize a control queue and add it to the control queue list. + * The cq parameter will be allocated/initialized and passed back to the caller + * if no errors occur. + * + * Note: idpf_ctlq_init must be called prior to any calls to idpf_ctlq_add + */ +int idpf_ctlq_add(struct idpf_hw *hw, + struct idpf_ctlq_create_info *qinfo, + struct idpf_ctlq_info **cq_out) +{ + struct idpf_ctlq_info *cq; + bool is_rxq = false; + int err; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return -ENOMEM; + + cq->cq_type = qinfo->type; + cq->q_id = qinfo->id; + cq->buf_size = qinfo->buf_size; + cq->ring_size = qinfo->len; + + cq->next_to_use = 0; + cq->next_to_clean = 0; + cq->next_to_post = cq->ring_size - 1; + + switch (qinfo->type) { + case IDPF_CTLQ_TYPE_MAILBOX_RX: + is_rxq = true; + fallthrough; + case IDPF_CTLQ_TYPE_MAILBOX_TX: + err = idpf_ctlq_alloc_ring_res(hw, cq); + break; + default: + err = -EBADR; + break; + } + + if (err) + goto init_free_q; + + if (is_rxq) { + idpf_ctlq_init_rxq_bufs(cq); + } else { + /* Allocate the array of msg pointers for TX queues */ + cq->bi.tx_msg = kcalloc(qinfo->len, + sizeof(struct idpf_ctlq_msg *), + GFP_KERNEL); + if (!cq->bi.tx_msg) { + err = -ENOMEM; + goto init_dealloc_q_mem; + } + } + + idpf_ctlq_setup_regs(cq, qinfo); + + idpf_ctlq_init_regs(hw, cq, is_rxq); + + mutex_init(&cq->cq_lock); + + list_add(&cq->cq_list, &hw->cq_list_head); + + *cq_out = cq; + + return 0; + +init_dealloc_q_mem: + /* free ring buffers and the ring itself */ + idpf_ctlq_dealloc_ring_res(hw, cq); +init_free_q: + kfree(cq); + + return err; +} + +/** + * idpf_ctlq_remove - deallocate and remove specified control queue + * @hw: pointer to hardware struct + * @cq: pointer to control queue to be removed + */ +void idpf_ctlq_remove(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + list_del(&cq->cq_list); + idpf_ctlq_shutdown(hw, cq); + kfree(cq); +} + +/** + * idpf_ctlq_init - main initialization routine for all control queues + * @hw: pointer to hardware struct + * @num_q: number of queues to initialize + * @q_info: array of structs containing info for each queue to be initialized + * + * This initializes any number and any type of control queues. This is an all + * or nothing routine; if one fails, all previously allocated queues will be + * destroyed. This must be called prior to using the individual add/remove + * APIs. + */ +int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q, + struct idpf_ctlq_create_info *q_info) +{ + struct idpf_ctlq_info *cq, *tmp; + int err; + int i; + + INIT_LIST_HEAD(&hw->cq_list_head); + + for (i = 0; i < num_q; i++) { + struct idpf_ctlq_create_info *qinfo = q_info + i; + + err = idpf_ctlq_add(hw, qinfo, &cq); + if (err) + goto init_destroy_qs; + } + + return 0; + +init_destroy_qs: + list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list) + idpf_ctlq_remove(hw, cq); + + return err; +} + +/** + * idpf_ctlq_deinit - destroy all control queues + * @hw: pointer to hw struct + */ +void idpf_ctlq_deinit(struct idpf_hw *hw) +{ + struct idpf_ctlq_info *cq, *tmp; + + list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list) + idpf_ctlq_remove(hw, cq); +} + +/** + * idpf_ctlq_send - send command to Control Queue (CTQ) + * @hw: pointer to hw struct + * @cq: handle to control queue struct to send on + * @num_q_msg: number of messages to send on control queue + * @q_msg: pointer to array of queue messages to be sent + * + * The caller is expected to allocate DMAable buffers and pass them to the + * send routine via the q_msg struct / control queue specific data struct. + * The control queue will hold a reference to each send message until + * the completion for that message has been cleaned. + */ +int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, + u16 num_q_msg, struct idpf_ctlq_msg q_msg[]) +{ + struct idpf_ctlq_desc *desc; + int num_desc_avail; + int err = 0; + int i; + + mutex_lock(&cq->cq_lock); + + /* Ensure there are enough descriptors to send all messages */ + num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq); + if (num_desc_avail == 0 || num_desc_avail < num_q_msg) { + err = -ENOSPC; + goto err_unlock; + } + + for (i = 0; i < num_q_msg; i++) { + struct idpf_ctlq_msg *msg = &q_msg[i]; + + desc = IDPF_CTLQ_DESC(cq, cq->next_to_use); + + desc->opcode = cpu_to_le16(msg->opcode); + desc->pfid_vfid = cpu_to_le16(msg->func_id); + + desc->v_opcode_dtype = cpu_to_le32(msg->cookie.mbx.chnl_opcode); + desc->v_retval = cpu_to_le32(msg->cookie.mbx.chnl_retval); + + desc->flags = cpu_to_le16((msg->host_id & IDPF_HOST_ID_MASK) << + IDPF_CTLQ_FLAG_HOST_ID_S); + if (msg->data_len) { + struct idpf_dma_mem *buff = msg->ctx.indirect.payload; + + desc->datalen |= cpu_to_le16(msg->data_len); + desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_BUF); + desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_RD); + + /* Update the address values in the desc with the pa + * value for respective buffer + */ + desc->params.indirect.addr_high = + cpu_to_le32(upper_32_bits(buff->pa)); + desc->params.indirect.addr_low = + cpu_to_le32(lower_32_bits(buff->pa)); + + memcpy(&desc->params, msg->ctx.indirect.context, + IDPF_INDIRECT_CTX_SIZE); + } else { + memcpy(&desc->params, msg->ctx.direct, + IDPF_DIRECT_CTX_SIZE); + } + + /* Store buffer info */ + cq->bi.tx_msg[cq->next_to_use] = msg; + + (cq->next_to_use)++; + if (cq->next_to_use == cq->ring_size) + cq->next_to_use = 0; + } + + /* Force memory write to complete before letting hardware + * know that there are new descriptors to fetch. + */ + dma_wmb(); + + wr32(hw, cq->reg.tail, cq->next_to_use); + +err_unlock: + mutex_unlock(&cq->cq_lock); + + return err; +} + +/** + * idpf_ctlq_clean_sq - reclaim send descriptors on HW write back for the + * requested queue + * @cq: pointer to the specific Control queue + * @clean_count: (input|output) number of descriptors to clean as input, and + * number of descriptors actually cleaned as output + * @msg_status: (output) pointer to msg pointer array to be populated; needs + * to be allocated by caller + * + * Returns an array of message pointers associated with the cleaned + * descriptors. The pointers are to the original ctlq_msgs sent on the cleaned + * descriptors. The status will be returned for each; any messages that failed + * to send will have a non-zero status. The caller is expected to free original + * ctlq_msgs and free or reuse the DMA buffers. + */ +int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, + struct idpf_ctlq_msg *msg_status[]) +{ + struct idpf_ctlq_desc *desc; + u16 i, num_to_clean; + u16 ntc, desc_err; + + if (*clean_count == 0) + return 0; + if (*clean_count > cq->ring_size) + return -EBADR; + + mutex_lock(&cq->cq_lock); + + ntc = cq->next_to_clean; + + num_to_clean = *clean_count; + + for (i = 0; i < num_to_clean; i++) { + /* Fetch next descriptor and check if marked as done */ + desc = IDPF_CTLQ_DESC(cq, ntc); + if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD)) + break; + + /* strip off FW internal code */ + desc_err = le16_to_cpu(desc->ret_val) & 0xff; + + msg_status[i] = cq->bi.tx_msg[ntc]; + msg_status[i]->status = desc_err; + + cq->bi.tx_msg[ntc] = NULL; + + /* Zero out any stale data */ + memset(desc, 0, sizeof(*desc)); + + ntc++; + if (ntc == cq->ring_size) + ntc = 0; + } + + cq->next_to_clean = ntc; + + mutex_unlock(&cq->cq_lock); + + /* Return number of descriptors actually cleaned */ + *clean_count = i; + + return 0; +} + +/** + * idpf_ctlq_post_rx_buffs - post buffers to descriptor ring + * @hw: pointer to hw struct + * @cq: pointer to control queue handle + * @buff_count: (input|output) input is number of buffers caller is trying to + * return; output is number of buffers that were not posted + * @buffs: array of pointers to dma mem structs to be given to hardware + * + * Caller uses this function to return DMA buffers to the descriptor ring after + * consuming them; buff_count will be the number of buffers. + * + * Note: this function needs to be called after a receive call even + * if there are no DMA buffers to be returned, i.e. buff_count = 0, + * buffs = NULL to support direct commands + */ +int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, + u16 *buff_count, struct idpf_dma_mem **buffs) +{ + struct idpf_ctlq_desc *desc; + u16 ntp = cq->next_to_post; + bool buffs_avail = false; + u16 tbp = ntp + 1; + int i = 0; + + if (*buff_count > cq->ring_size) + return -EBADR; + + if (*buff_count > 0) + buffs_avail = true; + + mutex_lock(&cq->cq_lock); + + if (tbp >= cq->ring_size) + tbp = 0; + + if (tbp == cq->next_to_clean) + /* Nothing to do */ + goto post_buffs_out; + + /* Post buffers for as many as provided or up until the last one used */ + while (ntp != cq->next_to_clean) { + desc = IDPF_CTLQ_DESC(cq, ntp); + + if (cq->bi.rx_buff[ntp]) + goto fill_desc; + if (!buffs_avail) { + /* If the caller hasn't given us any buffers or + * there are none left, search the ring itself + * for an available buffer to move to this + * entry starting at the next entry in the ring + */ + tbp = ntp + 1; + + /* Wrap ring if necessary */ + if (tbp >= cq->ring_size) + tbp = 0; + + while (tbp != cq->next_to_clean) { + if (cq->bi.rx_buff[tbp]) { + cq->bi.rx_buff[ntp] = + cq->bi.rx_buff[tbp]; + cq->bi.rx_buff[tbp] = NULL; + + /* Found a buffer, no need to + * search anymore + */ + break; + } + + /* Wrap ring if necessary */ + tbp++; + if (tbp >= cq->ring_size) + tbp = 0; + } + + if (tbp == cq->next_to_clean) + goto post_buffs_out; + } else { + /* Give back pointer to DMA buffer */ + cq->bi.rx_buff[ntp] = buffs[i]; + i++; + + if (i >= *buff_count) + buffs_avail = false; + } + +fill_desc: + desc->flags = + cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD); + + /* Post buffers to descriptor */ + desc->datalen = cpu_to_le16(cq->bi.rx_buff[ntp]->size); + desc->params.indirect.addr_high = + cpu_to_le32(upper_32_bits(cq->bi.rx_buff[ntp]->pa)); + desc->params.indirect.addr_low = + cpu_to_le32(lower_32_bits(cq->bi.rx_buff[ntp]->pa)); + + ntp++; + if (ntp == cq->ring_size) + ntp = 0; + } + +post_buffs_out: + /* Only update tail if buffers were actually posted */ + if (cq->next_to_post != ntp) { + if (ntp) + /* Update next_to_post to ntp - 1 since current ntp + * will not have a buffer + */ + cq->next_to_post = ntp - 1; + else + /* Wrap to end of end ring since current ntp is 0 */ + cq->next_to_post = cq->ring_size - 1; + + wr32(hw, cq->reg.tail, cq->next_to_post); + } + + mutex_unlock(&cq->cq_lock); + + /* return the number of buffers that were not posted */ + *buff_count = *buff_count - i; + + return 0; +} + +/** + * idpf_ctlq_recv - receive control queue message call back + * @cq: pointer to control queue handle to receive on + * @num_q_msg: (input|output) input number of messages that should be received; + * output number of messages actually received + * @q_msg: (output) array of received control queue messages on this q; + * needs to be pre-allocated by caller for as many messages as requested + * + * Called by interrupt handler or polling mechanism. Caller is expected + * to free buffers + */ +int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, + struct idpf_ctlq_msg *q_msg) +{ + u16 num_to_clean, ntc, flags; + struct idpf_ctlq_desc *desc; + int err = 0; + u16 i; + + if (*num_q_msg == 0) + return 0; + else if (*num_q_msg > cq->ring_size) + return -EBADR; + + /* take the lock before we start messing with the ring */ + mutex_lock(&cq->cq_lock); + + ntc = cq->next_to_clean; + + num_to_clean = *num_q_msg; + + for (i = 0; i < num_to_clean; i++) { + /* Fetch next descriptor and check if marked as done */ + desc = IDPF_CTLQ_DESC(cq, ntc); + flags = le16_to_cpu(desc->flags); + + if (!(flags & IDPF_CTLQ_FLAG_DD)) + break; + + q_msg[i].vmvf_type = (flags & + (IDPF_CTLQ_FLAG_FTYPE_VM | + IDPF_CTLQ_FLAG_FTYPE_PF)) >> + IDPF_CTLQ_FLAG_FTYPE_S; + + if (flags & IDPF_CTLQ_FLAG_ERR) + err = -EBADMSG; + + q_msg[i].cookie.mbx.chnl_opcode = + le32_to_cpu(desc->v_opcode_dtype); + q_msg[i].cookie.mbx.chnl_retval = + le32_to_cpu(desc->v_retval); + + q_msg[i].opcode = le16_to_cpu(desc->opcode); + q_msg[i].data_len = le16_to_cpu(desc->datalen); + q_msg[i].status = le16_to_cpu(desc->ret_val); + + if (desc->datalen) { + memcpy(q_msg[i].ctx.indirect.context, + &desc->params.indirect, IDPF_INDIRECT_CTX_SIZE); + + /* Assign pointer to dma buffer to ctlq_msg array + * to be given to upper layer + */ + q_msg[i].ctx.indirect.payload = cq->bi.rx_buff[ntc]; + + /* Zero out pointer to DMA buffer info; + * will be repopulated by post buffers API + */ + cq->bi.rx_buff[ntc] = NULL; + } else { + memcpy(q_msg[i].ctx.direct, desc->params.raw, + IDPF_DIRECT_CTX_SIZE); + } + + /* Zero out stale data in descriptor */ + memset(desc, 0, sizeof(struct idpf_ctlq_desc)); + + ntc++; + if (ntc == cq->ring_size) + ntc = 0; + } + + cq->next_to_clean = ntc; + + mutex_unlock(&cq->cq_lock); + + *num_q_msg = i; + if (*num_q_msg == 0) + err = -ENOMSG; + + return err; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.h b/drivers/net/ethernet/intel/idpf/idpf_controlq.h new file mode 100644 index 000000000000..c1aba09e9856 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_CONTROLQ_H_ +#define _IDPF_CONTROLQ_H_ + +#include <linux/slab.h> + +#include "idpf_controlq_api.h" + +/* Maximum buffer length for all control queue types */ +#define IDPF_CTLQ_MAX_BUF_LEN 4096 + +#define IDPF_CTLQ_DESC(R, i) \ + (&(((struct idpf_ctlq_desc *)((R)->desc_ring.va))[i])) + +#define IDPF_CTLQ_DESC_UNUSED(R) \ + ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + \ + (R)->next_to_clean - (R)->next_to_use - 1)) + +/* Control Queue default settings */ +#define IDPF_CTRL_SQ_CMD_TIMEOUT 250 /* msecs */ + +struct idpf_ctlq_desc { + /* Control queue descriptor flags */ + __le16 flags; + /* Control queue message opcode */ + __le16 opcode; + __le16 datalen; /* 0 for direct commands */ + union { + __le16 ret_val; + __le16 pfid_vfid; +#define IDPF_CTLQ_DESC_VF_ID_S 0 +#define IDPF_CTLQ_DESC_VF_ID_M (0x7FF << IDPF_CTLQ_DESC_VF_ID_S) +#define IDPF_CTLQ_DESC_PF_ID_S 11 +#define IDPF_CTLQ_DESC_PF_ID_M (0x1F << IDPF_CTLQ_DESC_PF_ID_S) + }; + + /* Virtchnl message opcode and virtchnl descriptor type + * v_opcode=[27:0], v_dtype=[31:28] + */ + __le32 v_opcode_dtype; + /* Virtchnl return value */ + __le32 v_retval; + union { + struct { + __le32 param0; + __le32 param1; + __le32 param2; + __le32 param3; + } direct; + struct { + __le32 param0; + __le16 sw_cookie; + /* Virtchnl flags */ + __le16 v_flags; + __le32 addr_high; + __le32 addr_low; + } indirect; + u8 raw[16]; + } params; +}; + +/* Flags sub-structure + * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 | + * |DD |CMP|ERR| * RSV * |FTYPE | *RSV* |RD |VFC|BUF| HOST_ID | + */ +/* command flags and offsets */ +#define IDPF_CTLQ_FLAG_DD_S 0 +#define IDPF_CTLQ_FLAG_CMP_S 1 +#define IDPF_CTLQ_FLAG_ERR_S 2 +#define IDPF_CTLQ_FLAG_FTYPE_S 6 +#define IDPF_CTLQ_FLAG_RD_S 10 +#define IDPF_CTLQ_FLAG_VFC_S 11 +#define IDPF_CTLQ_FLAG_BUF_S 12 +#define IDPF_CTLQ_FLAG_HOST_ID_S 13 + +#define IDPF_CTLQ_FLAG_DD BIT(IDPF_CTLQ_FLAG_DD_S) /* 0x1 */ +#define IDPF_CTLQ_FLAG_CMP BIT(IDPF_CTLQ_FLAG_CMP_S) /* 0x2 */ +#define IDPF_CTLQ_FLAG_ERR BIT(IDPF_CTLQ_FLAG_ERR_S) /* 0x4 */ +#define IDPF_CTLQ_FLAG_FTYPE_VM BIT(IDPF_CTLQ_FLAG_FTYPE_S) /* 0x40 */ +#define IDPF_CTLQ_FLAG_FTYPE_PF BIT(IDPF_CTLQ_FLAG_FTYPE_S + 1) /* 0x80 */ +#define IDPF_CTLQ_FLAG_RD BIT(IDPF_CTLQ_FLAG_RD_S) /* 0x400 */ +#define IDPF_CTLQ_FLAG_VFC BIT(IDPF_CTLQ_FLAG_VFC_S) /* 0x800 */ +#define IDPF_CTLQ_FLAG_BUF BIT(IDPF_CTLQ_FLAG_BUF_S) /* 0x1000 */ + +/* Host ID is a special field that has 3b and not a 1b flag */ +#define IDPF_CTLQ_FLAG_HOST_ID_M MAKE_MASK(0x7000UL, IDPF_CTLQ_FLAG_HOST_ID_S) + +struct idpf_mbxq_desc { + u8 pad[8]; /* CTLQ flags/opcode/len/retval fields */ + u32 chnl_opcode; /* avoid confusion with desc->opcode */ + u32 chnl_retval; /* ditto for desc->retval */ + u32 pf_vf_id; /* used by CP when sending to PF */ +}; + +/* Define the driver hardware struct to replace other control structs as needed + * Align to ctlq_hw_info + */ +struct idpf_hw { + void __iomem *hw_addr; + resource_size_t hw_addr_len; + + struct idpf_adapter *back; + + /* control queue - send and receive */ + struct idpf_ctlq_info *asq; + struct idpf_ctlq_info *arq; + + /* pci info */ + u16 device_id; + u16 vendor_id; + u16 subsystem_device_id; + u16 subsystem_vendor_id; + u8 revision_id; + bool adapter_stopped; + + struct list_head cq_list_head; +}; + +int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw, + struct idpf_ctlq_info *cq); + +void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq); + +/* prototype for functions used for dynamic memory allocation */ +void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, + u64 size); +void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem); +#endif /* _IDPF_CONTROLQ_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h new file mode 100644 index 000000000000..8dee098bbfb0 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_CONTROLQ_API_H_ +#define _IDPF_CONTROLQ_API_H_ + +#include "idpf_mem.h" + +struct idpf_hw; + +/* Used for queue init, response and events */ +enum idpf_ctlq_type { + IDPF_CTLQ_TYPE_MAILBOX_TX = 0, + IDPF_CTLQ_TYPE_MAILBOX_RX = 1, + IDPF_CTLQ_TYPE_CONFIG_TX = 2, + IDPF_CTLQ_TYPE_CONFIG_RX = 3, + IDPF_CTLQ_TYPE_EVENT_RX = 4, + IDPF_CTLQ_TYPE_RDMA_TX = 5, + IDPF_CTLQ_TYPE_RDMA_RX = 6, + IDPF_CTLQ_TYPE_RDMA_COMPL = 7 +}; + +/* Generic Control Queue Structures */ +struct idpf_ctlq_reg { + /* used for queue tracking */ + u32 head; + u32 tail; + /* Below applies only to default mb (if present) */ + u32 len; + u32 bah; + u32 bal; + u32 len_mask; + u32 len_ena_mask; + u32 head_mask; +}; + +/* Generic queue msg structure */ +struct idpf_ctlq_msg { + u8 vmvf_type; /* represents the source of the message on recv */ +#define IDPF_VMVF_TYPE_VF 0 +#define IDPF_VMVF_TYPE_VM 1 +#define IDPF_VMVF_TYPE_PF 2 + u8 host_id; + /* 3b field used only when sending a message to CP - to be used in + * combination with target func_id to route the message + */ +#define IDPF_HOST_ID_MASK 0x7 + + u16 opcode; + u16 data_len; /* data_len = 0 when no payload is attached */ + union { + u16 func_id; /* when sending a message */ + u16 status; /* when receiving a message */ + }; + union { + struct { + u32 chnl_opcode; + u32 chnl_retval; + } mbx; + } cookie; + union { +#define IDPF_DIRECT_CTX_SIZE 16 +#define IDPF_INDIRECT_CTX_SIZE 8 + /* 16 bytes of context can be provided or 8 bytes of context + * plus the address of a DMA buffer + */ + u8 direct[IDPF_DIRECT_CTX_SIZE]; + struct { + u8 context[IDPF_INDIRECT_CTX_SIZE]; + struct idpf_dma_mem *payload; + } indirect; + } ctx; +}; + +/* Generic queue info structures */ +/* MB, CONFIG and EVENT q do not have extended info */ +struct idpf_ctlq_create_info { + enum idpf_ctlq_type type; + int id; /* absolute queue offset passed as input + * -1 for default mailbox if present + */ + u16 len; /* Queue length passed as input */ + u16 buf_size; /* buffer size passed as input */ + u64 base_address; /* output, HPA of the Queue start */ + struct idpf_ctlq_reg reg; /* registers accessed by ctlqs */ + + int ext_info_size; + void *ext_info; /* Specific to q type */ +}; + +/* Control Queue information */ +struct idpf_ctlq_info { + struct list_head cq_list; + + enum idpf_ctlq_type cq_type; + int q_id; + struct mutex cq_lock; /* control queue lock */ + /* used for interrupt processing */ + u16 next_to_use; + u16 next_to_clean; + u16 next_to_post; /* starting descriptor to post buffers + * to after recev + */ + + struct idpf_dma_mem desc_ring; /* descriptor ring memory + * idpf_dma_mem is defined in OSdep.h + */ + union { + struct idpf_dma_mem **rx_buff; + struct idpf_ctlq_msg **tx_msg; + } bi; + + u16 buf_size; /* queue buffer size */ + u16 ring_size; /* Number of descriptors */ + struct idpf_ctlq_reg reg; /* registers accessed by ctlqs */ +}; + +/** + * enum idpf_mbx_opc - PF/VF mailbox commands + * @idpf_mbq_opc_send_msg_to_cp: used by PF or VF to send a message to its CP + */ +enum idpf_mbx_opc { + idpf_mbq_opc_send_msg_to_cp = 0x0801, +}; + +/* API supported for control queue management */ +/* Will init all required q including default mb. "q_info" is an array of + * create_info structs equal to the number of control queues to be created. + */ +int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q, + struct idpf_ctlq_create_info *q_info); + +/* Allocate and initialize a single control queue, which will be added to the + * control queue list; returns a handle to the created control queue + */ +int idpf_ctlq_add(struct idpf_hw *hw, + struct idpf_ctlq_create_info *qinfo, + struct idpf_ctlq_info **cq); + +/* Deinitialize and deallocate a single control queue */ +void idpf_ctlq_remove(struct idpf_hw *hw, + struct idpf_ctlq_info *cq); + +/* Sends messages to HW and will also free the buffer*/ +int idpf_ctlq_send(struct idpf_hw *hw, + struct idpf_ctlq_info *cq, + u16 num_q_msg, + struct idpf_ctlq_msg q_msg[]); + +/* Receives messages and called by interrupt handler/polling + * initiated by app/process. Also caller is supposed to free the buffers + */ +int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, + struct idpf_ctlq_msg *q_msg); + +/* Reclaims send descriptors on HW write back */ +int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, + struct idpf_ctlq_msg *msg_status[]); + +/* Indicate RX buffers are done being processed */ +int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, + struct idpf_ctlq_info *cq, + u16 *buff_count, + struct idpf_dma_mem **buffs); + +/* Will destroy all q including the default mb */ +void idpf_ctlq_deinit(struct idpf_hw *hw); + +#endif /* _IDPF_CONTROLQ_API_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c new file mode 100644 index 000000000000..a942a6385d06 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf_controlq.h" + +/** + * idpf_ctlq_alloc_desc_ring - Allocate Control Queue (CQ) rings + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + */ +static int idpf_ctlq_alloc_desc_ring(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + size_t size = cq->ring_size * sizeof(struct idpf_ctlq_desc); + + cq->desc_ring.va = idpf_alloc_dma_mem(hw, &cq->desc_ring, size); + if (!cq->desc_ring.va) + return -ENOMEM; + + return 0; +} + +/** + * idpf_ctlq_alloc_bufs - Allocate Control Queue (CQ) buffers + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * Allocate the buffer head for all control queues, and if it's a receive + * queue, allocate DMA buffers + */ +static int idpf_ctlq_alloc_bufs(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + int i; + + /* Do not allocate DMA buffers for transmit queues */ + if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX) + return 0; + + /* We'll be allocating the buffer info memory first, then we can + * allocate the mapped buffers for the event processing + */ + cq->bi.rx_buff = kcalloc(cq->ring_size, sizeof(struct idpf_dma_mem *), + GFP_KERNEL); + if (!cq->bi.rx_buff) + return -ENOMEM; + + /* allocate the mapped buffers (except for the last one) */ + for (i = 0; i < cq->ring_size - 1; i++) { + struct idpf_dma_mem *bi; + int num = 1; /* number of idpf_dma_mem to be allocated */ + + cq->bi.rx_buff[i] = kcalloc(num, sizeof(struct idpf_dma_mem), + GFP_KERNEL); + if (!cq->bi.rx_buff[i]) + goto unwind_alloc_cq_bufs; + + bi = cq->bi.rx_buff[i]; + + bi->va = idpf_alloc_dma_mem(hw, bi, cq->buf_size); + if (!bi->va) { + /* unwind will not free the failed entry */ + kfree(cq->bi.rx_buff[i]); + goto unwind_alloc_cq_bufs; + } + } + + return 0; + +unwind_alloc_cq_bufs: + /* don't try to free the one that failed... */ + i--; + for (; i >= 0; i--) { + idpf_free_dma_mem(hw, cq->bi.rx_buff[i]); + kfree(cq->bi.rx_buff[i]); + } + kfree(cq->bi.rx_buff); + + return -ENOMEM; +} + +/** + * idpf_ctlq_free_desc_ring - Free Control Queue (CQ) rings + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * This assumes the posted send buffers have already been cleaned + * and de-allocated + */ +static void idpf_ctlq_free_desc_ring(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + idpf_free_dma_mem(hw, &cq->desc_ring); +} + +/** + * idpf_ctlq_free_bufs - Free CQ buffer info elements + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * Free the DMA buffers for RX queues, and DMA buffer header for both RX and TX + * queues. The upper layers are expected to manage freeing of TX DMA buffers + */ +static void idpf_ctlq_free_bufs(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + void *bi; + + if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX) { + int i; + + /* free DMA buffers for rx queues*/ + for (i = 0; i < cq->ring_size; i++) { + if (cq->bi.rx_buff[i]) { + idpf_free_dma_mem(hw, cq->bi.rx_buff[i]); + kfree(cq->bi.rx_buff[i]); + } + } + + bi = (void *)cq->bi.rx_buff; + } else { + bi = (void *)cq->bi.tx_msg; + } + + /* free the buffer header */ + kfree(bi); +} + +/** + * idpf_ctlq_dealloc_ring_res - Free memory allocated for control queue + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * Free the memory used by the ring, buffers and other related structures + */ +void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + /* free ring buffers and the ring itself */ + idpf_ctlq_free_bufs(hw, cq); + idpf_ctlq_free_desc_ring(hw, cq); +} + +/** + * idpf_ctlq_alloc_ring_res - allocate memory for descriptor ring and bufs + * @hw: pointer to hw struct + * @cq: pointer to control queue struct + * + * Do *NOT* hold cq_lock when calling this as the memory allocation routines + * called are not going to be atomic context safe + */ +int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + int err; + + /* allocate the ring memory */ + err = idpf_ctlq_alloc_desc_ring(hw, cq); + if (err) + return err; + + /* allocate buffers in the rings */ + err = idpf_ctlq_alloc_bufs(hw, cq); + if (err) + goto idpf_init_cq_free_ring; + + /* success! */ + return 0; + +idpf_init_cq_free_ring: + idpf_free_dma_mem(hw, &cq->desc_ring); + + return err; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c new file mode 100644 index 000000000000..34ad1ac46b78 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_lan_pf_regs.h" + +#define IDPF_PF_ITR_IDX_SPACING 0x4 + +/** + * idpf_ctlq_reg_init - initialize default mailbox registers + * @cq: pointer to the array of create control queues + */ +static void idpf_ctlq_reg_init(struct idpf_ctlq_create_info *cq) +{ + int i; + + for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) { + struct idpf_ctlq_create_info *ccq = cq + i; + + switch (ccq->type) { + case IDPF_CTLQ_TYPE_MAILBOX_TX: + /* set head and tail registers in our local struct */ + ccq->reg.head = PF_FW_ATQH; + ccq->reg.tail = PF_FW_ATQT; + ccq->reg.len = PF_FW_ATQLEN; + ccq->reg.bah = PF_FW_ATQBAH; + ccq->reg.bal = PF_FW_ATQBAL; + ccq->reg.len_mask = PF_FW_ATQLEN_ATQLEN_M; + ccq->reg.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M; + ccq->reg.head_mask = PF_FW_ATQH_ATQH_M; + break; + case IDPF_CTLQ_TYPE_MAILBOX_RX: + /* set head and tail registers in our local struct */ + ccq->reg.head = PF_FW_ARQH; + ccq->reg.tail = PF_FW_ARQT; + ccq->reg.len = PF_FW_ARQLEN; + ccq->reg.bah = PF_FW_ARQBAH; + ccq->reg.bal = PF_FW_ARQBAL; + ccq->reg.len_mask = PF_FW_ARQLEN_ARQLEN_M; + ccq->reg.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M; + ccq->reg.head_mask = PF_FW_ARQH_ARQH_M; + break; + default: + break; + } + } +} + +/** + * idpf_mb_intr_reg_init - Initialize mailbox interrupt register + * @adapter: adapter structure + */ +static void idpf_mb_intr_reg_init(struct idpf_adapter *adapter) +{ + struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg; + u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl); + + intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl); + intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M; + intr->dyn_ctl_itridx_m = PF_GLINT_DYN_CTL_ITR_INDX_M; + intr->icr_ena = idpf_get_reg_addr(adapter, PF_INT_DIR_OICR_ENA); + intr->icr_ena_ctlq_m = PF_INT_DIR_OICR_ENA_M; +} + +/** + * idpf_intr_reg_init - Initialize interrupt registers + * @vport: virtual port structure + */ +static int idpf_intr_reg_init(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + int num_vecs = vport->num_q_vectors; + struct idpf_vec_regs *reg_vals; + int num_regs, i, err = 0; + u32 rx_itr, tx_itr; + u16 total_vecs; + + total_vecs = idpf_get_reserved_vecs(vport->adapter); + reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs), + GFP_KERNEL); + if (!reg_vals) + return -ENOMEM; + + num_regs = idpf_get_reg_intr_vecs(vport, reg_vals); + if (num_regs < num_vecs) { + err = -EINVAL; + goto free_reg_vals; + } + + for (i = 0; i < num_vecs; i++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[i]; + u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC; + struct idpf_intr_reg *intr = &q_vector->intr_reg; + u32 spacing; + + intr->dyn_ctl = idpf_get_reg_addr(adapter, + reg_vals[vec_id].dyn_ctl_reg); + intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M; + intr->dyn_ctl_itridx_s = PF_GLINT_DYN_CTL_ITR_INDX_S; + intr->dyn_ctl_intrvl_s = PF_GLINT_DYN_CTL_INTERVAL_S; + + spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing, + IDPF_PF_ITR_IDX_SPACING); + rx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_0, + reg_vals[vec_id].itrn_reg, + spacing); + tx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_1, + reg_vals[vec_id].itrn_reg, + spacing); + intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr); + intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); + } + +free_reg_vals: + kfree(reg_vals); + + return err; +} + +/** + * idpf_reset_reg_init - Initialize reset registers + * @adapter: Driver specific private structure + */ +static void idpf_reset_reg_init(struct idpf_adapter *adapter) +{ + adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, PFGEN_RSTAT); + adapter->reset_reg.rstat_m = PFGEN_RSTAT_PFR_STATE_M; +} + +/** + * idpf_trigger_reset - trigger reset + * @adapter: Driver specific private structure + * @trig_cause: Reason to trigger a reset + */ +static void idpf_trigger_reset(struct idpf_adapter *adapter, + enum idpf_flags __always_unused trig_cause) +{ + u32 reset_reg; + + reset_reg = readl(idpf_get_reg_addr(adapter, PFGEN_CTRL)); + writel(reset_reg | PFGEN_CTRL_PFSWR, + idpf_get_reg_addr(adapter, PFGEN_CTRL)); +} + +/** + * idpf_reg_ops_init - Initialize register API function pointers + * @adapter: Driver specific private structure + */ +static void idpf_reg_ops_init(struct idpf_adapter *adapter) +{ + adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_ctlq_reg_init; + adapter->dev_ops.reg_ops.intr_reg_init = idpf_intr_reg_init; + adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_mb_intr_reg_init; + adapter->dev_ops.reg_ops.reset_reg_init = idpf_reset_reg_init; + adapter->dev_ops.reg_ops.trigger_reset = idpf_trigger_reset; +} + +/** + * idpf_dev_ops_init - Initialize device API function pointers + * @adapter: Driver specific private structure + */ +void idpf_dev_ops_init(struct idpf_adapter *adapter) +{ + idpf_reg_ops_init(adapter); +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_devids.h b/drivers/net/ethernet/intel/idpf/idpf_devids.h new file mode 100644 index 000000000000..5154a52ae61c --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_devids.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_DEVIDS_H_ +#define _IDPF_DEVIDS_H_ + +#define IDPF_DEV_ID_PF 0x1452 +#define IDPF_DEV_ID_VF 0x145C + +#endif /* _IDPF_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c new file mode 100644 index 000000000000..52ea38669f85 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -0,0 +1,1369 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" + +/** + * idpf_get_rxnfc - command to get RX flow classification rules + * @netdev: network interface device structure + * @cmd: ethtool rxnfc command + * @rule_locs: pointer to store rule locations + * + * Returns Success if the command is supported. + */ +static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 __always_unused *rule_locs) +{ + struct idpf_vport *vport; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = vport->num_rxq; + idpf_vport_ctrl_unlock(netdev); + + return 0; + default: + break; + } + + idpf_vport_ctrl_unlock(netdev); + + return -EOPNOTSUPP; +} + +/** + * idpf_get_rxfh_key_size - get the RSS hash key size + * @netdev: network interface device structure + * + * Returns the key size on success, error value on failure. + */ +static u32 idpf_get_rxfh_key_size(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_user_config_data *user_config; + + if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) + return -EOPNOTSUPP; + + user_config = &np->adapter->vport_config[np->vport_idx]->user_config; + + return user_config->rss_data.rss_key_size; +} + +/** + * idpf_get_rxfh_indir_size - get the rx flow hash indirection table size + * @netdev: network interface device structure + * + * Returns the table size on success, error value on failure. + */ +static u32 idpf_get_rxfh_indir_size(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_user_config_data *user_config; + + if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) + return -EOPNOTSUPP; + + user_config = &np->adapter->vport_config[np->vport_idx]->user_config; + + return user_config->rss_data.rss_lut_size; +} + +/** + * idpf_get_rxfh - get the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function in use + * + * Reads the indirection table directly from the hardware. Always returns 0. + */ +static int idpf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_rss_data *rss_data; + struct idpf_adapter *adapter; + int err = 0; + u16 i; + + idpf_vport_ctrl_lock(netdev); + + adapter = np->adapter; + + if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) { + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + rss_data = &adapter->vport_config[np->vport_idx]->user_config.rss_data; + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + if (key) + memcpy(key, rss_data->rss_key, rss_data->rss_key_size); + + if (indir) { + for (i = 0; i < rss_data->rss_lut_size; i++) + indir[i] = rss_data->rss_lut[i]; + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_set_rxfh - set the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function to use + * + * Returns -EINVAL if the table specifies an invalid queue id, otherwise + * returns 0 after programming the table. + */ +static int idpf_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_rss_data *rss_data; + struct idpf_adapter *adapter; + struct idpf_vport *vport; + int err = 0; + u16 lut; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + adapter = vport->adapter; + + if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) { + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) { + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + if (key) + memcpy(rss_data->rss_key, key, rss_data->rss_key_size); + + if (indir) { + for (lut = 0; lut < rss_data->rss_lut_size; lut++) + rss_data->rss_lut[lut] = indir[lut]; + } + + err = idpf_config_rss(vport); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_channels: get the number of channels supported by the device + * @netdev: network interface device structure + * @ch: channel information structure + * + * Report maximum of TX and RX. Report one extra channel to match our MailBox + * Queue. + */ +static void idpf_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + u16 num_txq, num_rxq; + u16 combined; + + vport_config = np->adapter->vport_config[np->vport_idx]; + + num_txq = vport_config->user_config.num_req_tx_qs; + num_rxq = vport_config->user_config.num_req_rx_qs; + + combined = min(num_txq, num_rxq); + + /* Report maximum channels */ + ch->max_combined = min_t(u16, vport_config->max_q.max_txq, + vport_config->max_q.max_rxq); + ch->max_rx = vport_config->max_q.max_rxq; + ch->max_tx = vport_config->max_q.max_txq; + + ch->max_other = IDPF_MAX_MBXQ; + ch->other_count = IDPF_MAX_MBXQ; + + ch->combined_count = combined; + ch->rx_count = num_rxq - combined; + ch->tx_count = num_txq - combined; +} + +/** + * idpf_set_channels: set the new channel count + * @netdev: network interface device structure + * @ch: channel information structure + * + * Negotiate a new number of channels with CP. Returns 0 on success, negative + * on failure. + */ +static int idpf_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct idpf_vport_config *vport_config; + u16 combined, num_txq, num_rxq; + unsigned int num_req_tx_q; + unsigned int num_req_rx_q; + struct idpf_vport *vport; + struct device *dev; + int err = 0; + u16 idx; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + idx = vport->idx; + vport_config = vport->adapter->vport_config[idx]; + + num_txq = vport_config->user_config.num_req_tx_qs; + num_rxq = vport_config->user_config.num_req_rx_qs; + + combined = min(num_txq, num_rxq); + + /* these checks are for cases where user didn't specify a particular + * value on cmd line but we get non-zero value anyway via + * get_channels(); look at ethtool.c in ethtool repository (the user + * space part), particularly, do_schannels() routine + */ + if (ch->combined_count == combined) + ch->combined_count = 0; + if (ch->combined_count && ch->rx_count == num_rxq - combined) + ch->rx_count = 0; + if (ch->combined_count && ch->tx_count == num_txq - combined) + ch->tx_count = 0; + + num_req_tx_q = ch->combined_count + ch->tx_count; + num_req_rx_q = ch->combined_count + ch->rx_count; + + dev = &vport->adapter->pdev->dev; + /* It's possible to specify number of queues that exceeds max. + * Stack checks max combined_count and max [tx|rx]_count but not the + * max combined_count + [tx|rx]_count. These checks should catch that. + */ + if (num_req_tx_q > vport_config->max_q.max_txq) { + dev_info(dev, "Maximum TX queues is %d\n", + vport_config->max_q.max_txq); + err = -EINVAL; + goto unlock_mutex; + } + if (num_req_rx_q > vport_config->max_q.max_rxq) { + dev_info(dev, "Maximum RX queues is %d\n", + vport_config->max_q.max_rxq); + err = -EINVAL; + goto unlock_mutex; + } + + if (num_req_tx_q == num_txq && num_req_rx_q == num_rxq) + goto unlock_mutex; + + vport_config->user_config.num_req_tx_qs = num_req_tx_q; + vport_config->user_config.num_req_rx_qs = num_req_rx_q; + + err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_CHANGE); + if (err) { + /* roll back queue change */ + vport_config->user_config.num_req_tx_qs = num_txq; + vport_config->user_config.num_req_rx_qs = num_rxq; + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_ringparam - Get ring parameters + * @netdev: network interface device structure + * @ring: ethtool ringparam structure + * @kring: unused + * @ext_ack: unused + * + * Returns current ring parameters. TX and RX rings are reported separately, + * but the number of rings is not reported. + */ +static void idpf_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *ext_ack) +{ + struct idpf_vport *vport; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + ring->rx_max_pending = IDPF_MAX_RXQ_DESC; + ring->tx_max_pending = IDPF_MAX_TXQ_DESC; + ring->rx_pending = vport->rxq_desc_count; + ring->tx_pending = vport->txq_desc_count; + + idpf_vport_ctrl_unlock(netdev); +} + +/** + * idpf_set_ringparam - Set ring parameters + * @netdev: network interface device structure + * @ring: ethtool ringparam structure + * @kring: unused + * @ext_ack: unused + * + * Sets ring parameters. TX and RX rings are controlled separately, but the + * number of rings is not specified, so all rings get the same settings. + */ +static int idpf_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *ext_ack) +{ + struct idpf_vport_user_config_data *config_data; + u32 new_rx_count, new_tx_count; + struct idpf_vport *vport; + int i, err = 0; + u16 idx; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + idx = vport->idx; + + if (ring->tx_pending < IDPF_MIN_TXQ_DESC) { + netdev_err(netdev, "Descriptors requested (Tx: %u) is less than min supported (%u)\n", + ring->tx_pending, + IDPF_MIN_TXQ_DESC); + err = -EINVAL; + goto unlock_mutex; + } + + if (ring->rx_pending < IDPF_MIN_RXQ_DESC) { + netdev_err(netdev, "Descriptors requested (Rx: %u) is less than min supported (%u)\n", + ring->rx_pending, + IDPF_MIN_RXQ_DESC); + err = -EINVAL; + goto unlock_mutex; + } + + new_rx_count = ALIGN(ring->rx_pending, IDPF_REQ_RXQ_DESC_MULTIPLE); + if (new_rx_count != ring->rx_pending) + netdev_info(netdev, "Requested Rx descriptor count rounded up to %u\n", + new_rx_count); + + new_tx_count = ALIGN(ring->tx_pending, IDPF_REQ_DESC_MULTIPLE); + if (new_tx_count != ring->tx_pending) + netdev_info(netdev, "Requested Tx descriptor count rounded up to %u\n", + new_tx_count); + + if (new_tx_count == vport->txq_desc_count && + new_rx_count == vport->rxq_desc_count) + goto unlock_mutex; + + config_data = &vport->adapter->vport_config[idx]->user_config; + config_data->num_req_txq_desc = new_tx_count; + config_data->num_req_rxq_desc = new_rx_count; + + /* Since we adjusted the RX completion queue count, the RX buffer queue + * descriptor count needs to be adjusted as well + */ + for (i = 0; i < vport->num_bufqs_per_qgrp; i++) + vport->bufq_desc_count[i] = + IDPF_RX_BUFQ_DESC_COUNT(new_rx_count, + vport->num_bufqs_per_qgrp); + + err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_DESC_CHANGE); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * struct idpf_stats - definition for an ethtool statistic + * @stat_string: statistic name to display in ethtool -S output + * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64) + * @stat_offset: offsetof() the stat from a base pointer + * + * This structure defines a statistic to be added to the ethtool stats buffer. + * It defines a statistic as offset from a common base pointer. Stats should + * be defined in constant arrays using the IDPF_STAT macro, with every element + * of the array using the same _type for calculating the sizeof_stat and + * stat_offset. + * + * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or + * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from + * the idpf_add_ethtool_stat() helper function. + * + * The @stat_string is interpreted as a format string, allowing formatted + * values to be inserted while looping over multiple structures for a given + * statistics array. Thus, every statistic string in an array should have the + * same type and number of format specifiers, to be formatted by variadic + * arguments to the idpf_add_stat_string() helper function. + */ +struct idpf_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +/* Helper macro to define an idpf_stat structure with proper size and type. + * Use this when defining constant statistics arrays. Note that @_type expects + * only a type name and is used multiple times. + */ +#define IDPF_STAT(_type, _name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = sizeof_field(_type, _stat), \ + .stat_offset = offsetof(_type, _stat) \ +} + +/* Helper macro for defining some statistics related to queues */ +#define IDPF_QUEUE_STAT(_name, _stat) \ + IDPF_STAT(struct idpf_queue, _name, _stat) + +/* Stats associated with a Tx queue */ +static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = { + IDPF_QUEUE_STAT("pkts", q_stats.tx.packets), + IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes), + IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts), +}; + +/* Stats associated with an Rx queue */ +static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = { + IDPF_QUEUE_STAT("pkts", q_stats.rx.packets), + IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes), + IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts), +}; + +#define IDPF_TX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_tx_queue_stats) +#define IDPF_RX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_rx_queue_stats) + +#define IDPF_PORT_STAT(_name, _stat) \ + IDPF_STAT(struct idpf_vport, _name, _stat) + +static const struct idpf_stats idpf_gstrings_port_stats[] = { + IDPF_PORT_STAT("rx-csum_errors", port_stats.rx_hw_csum_err), + IDPF_PORT_STAT("rx-hsplit", port_stats.rx_hsplit), + IDPF_PORT_STAT("rx-hsplit_hbo", port_stats.rx_hsplit_hbo), + IDPF_PORT_STAT("rx-bad_descs", port_stats.rx_bad_descs), + IDPF_PORT_STAT("tx-skb_drops", port_stats.tx_drops), + IDPF_PORT_STAT("tx-dma_map_errs", port_stats.tx_dma_map_errs), + IDPF_PORT_STAT("tx-linearized_pkts", port_stats.tx_linearize), + IDPF_PORT_STAT("tx-busy_events", port_stats.tx_busy), + IDPF_PORT_STAT("rx-unicast_pkts", port_stats.vport_stats.rx_unicast), + IDPF_PORT_STAT("rx-multicast_pkts", port_stats.vport_stats.rx_multicast), + IDPF_PORT_STAT("rx-broadcast_pkts", port_stats.vport_stats.rx_broadcast), + IDPF_PORT_STAT("rx-unknown_protocol", port_stats.vport_stats.rx_unknown_protocol), + IDPF_PORT_STAT("tx-unicast_pkts", port_stats.vport_stats.tx_unicast), + IDPF_PORT_STAT("tx-multicast_pkts", port_stats.vport_stats.tx_multicast), + IDPF_PORT_STAT("tx-broadcast_pkts", port_stats.vport_stats.tx_broadcast), +}; + +#define IDPF_PORT_STATS_LEN ARRAY_SIZE(idpf_gstrings_port_stats) + +/** + * __idpf_add_qstat_strings - copy stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * @size: size of the stats array + * @type: stat type + * @idx: stat index + * + * Format and copy the strings described by stats into the buffer pointed at + * by p. + */ +static void __idpf_add_qstat_strings(u8 **p, const struct idpf_stats *stats, + const unsigned int size, const char *type, + unsigned int idx) +{ + unsigned int i; + + for (i = 0; i < size; i++) + ethtool_sprintf(p, "%s_q-%u_%s", + type, idx, stats[i].stat_string); +} + +/** + * idpf_add_qstat_strings - Copy queue stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * @type: stat type + * @idx: stat idx + * + * Format and copy the strings described by the const static stats value into + * the buffer pointed at by p. + * + * The parameter @stats is evaluated twice, so parameters with side effects + * should be avoided. Additionally, stats must be an array such that + * ARRAY_SIZE can be called on it. + */ +#define idpf_add_qstat_strings(p, stats, type, idx) \ + __idpf_add_qstat_strings(p, stats, ARRAY_SIZE(stats), type, idx) + +/** + * idpf_add_stat_strings - Copy port stat strings into ethtool buffer + * @p: ethtool buffer + * @stats: struct to copy from + * @size: size of stats array to copy from + */ +static void idpf_add_stat_strings(u8 **p, const struct idpf_stats *stats, + const unsigned int size) +{ + unsigned int i; + + for (i = 0; i < size; i++) + ethtool_sprintf(p, "%s", stats[i].stat_string); +} + +/** + * idpf_get_stat_strings - Get stat strings + * @netdev: network interface device structure + * @data: buffer for string data + * + * Builds the statistics string table + */ +static void idpf_get_stat_strings(struct net_device *netdev, u8 *data) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + unsigned int i; + + idpf_add_stat_strings(&data, idpf_gstrings_port_stats, + IDPF_PORT_STATS_LEN); + + vport_config = np->adapter->vport_config[np->vport_idx]; + /* It's critical that we always report a constant number of strings and + * that the strings are reported in the same order regardless of how + * many queues are actually in use. + */ + for (i = 0; i < vport_config->max_q.max_txq; i++) + idpf_add_qstat_strings(&data, idpf_gstrings_tx_queue_stats, + "tx", i); + + for (i = 0; i < vport_config->max_q.max_rxq; i++) + idpf_add_qstat_strings(&data, idpf_gstrings_rx_queue_stats, + "rx", i); + + page_pool_ethtool_stats_get_strings(data); +} + +/** + * idpf_get_strings - Get string set + * @netdev: network interface device structure + * @sset: id of string set + * @data: buffer for string data + * + * Builds string tables for various string sets + */ +static void idpf_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + switch (sset) { + case ETH_SS_STATS: + idpf_get_stat_strings(netdev, data); + break; + default: + break; + } +} + +/** + * idpf_get_sset_count - Get length of string set + * @netdev: network interface device structure + * @sset: id of string set + * + * Reports size of various string tables. + */ +static int idpf_get_sset_count(struct net_device *netdev, int sset) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + u16 max_txq, max_rxq; + unsigned int size; + + if (sset != ETH_SS_STATS) + return -EINVAL; + + vport_config = np->adapter->vport_config[np->vport_idx]; + /* This size reported back here *must* be constant throughout the + * lifecycle of the netdevice, i.e. we must report the maximum length + * even for queues that don't technically exist. This is due to the + * fact that this userspace API uses three separate ioctl calls to get + * stats data but has no way to communicate back to userspace when that + * size has changed, which can typically happen as a result of changing + * number of queues. If the number/order of stats change in the middle + * of this call chain it will lead to userspace crashing/accessing bad + * data through buffer under/overflow. + */ + max_txq = vport_config->max_q.max_txq; + max_rxq = vport_config->max_q.max_rxq; + + size = IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + + (IDPF_RX_QUEUE_STATS_LEN * max_rxq); + size += page_pool_ethtool_stats_get_count(); + + return size; +} + +/** + * idpf_add_one_ethtool_stat - copy the stat into the supplied buffer + * @data: location to store the stat value + * @pstat: old stat pointer to copy from + * @stat: the stat definition + * + * Copies the stat data defined by the pointer and stat structure pair into + * the memory supplied as data. If the pointer is null, data will be zero'd. + */ +static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, + const struct idpf_stats *stat) +{ + char *p; + + if (!pstat) { + /* Ensure that the ethtool data buffer is zero'd for any stats + * which don't have a valid pointer. + */ + *data = 0; + return; + } + + p = (char *)pstat + stat->stat_offset; + switch (stat->sizeof_stat) { + case sizeof(u64): + *data = *((u64 *)p); + break; + case sizeof(u32): + *data = *((u32 *)p); + break; + case sizeof(u16): + *data = *((u16 *)p); + break; + case sizeof(u8): + *data = *((u8 *)p); + break; + default: + WARN_ONCE(1, "unexpected stat size for %s", + stat->stat_string); + *data = 0; + } +} + +/** + * idpf_add_queue_stats - copy queue statistics into supplied buffer + * @data: ethtool stats buffer + * @q: the queue to copy + * + * Queue statistics must be copied while protected by u64_stats_fetch_begin, + * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats + * are defined in idpf_gstrings_queue_stats. If the queue pointer is null, + * zero out the queue stat values and update the data pointer. Otherwise + * safely copy the stats from the queue into the supplied buffer and update + * the data pointer when finished. + * + * This function expects to be called while under rcu_read_lock(). + */ +static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q) +{ + const struct idpf_stats *stats; + unsigned int start; + unsigned int size; + unsigned int i; + + if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { + size = IDPF_RX_QUEUE_STATS_LEN; + stats = idpf_gstrings_rx_queue_stats; + } else { + size = IDPF_TX_QUEUE_STATS_LEN; + stats = idpf_gstrings_tx_queue_stats; + } + + /* To avoid invalid statistics values, ensure that we keep retrying + * the copy until we get a consistent value according to + * u64_stats_fetch_retry. + */ + do { + start = u64_stats_fetch_begin(&q->stats_sync); + for (i = 0; i < size; i++) + idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]); + } while (u64_stats_fetch_retry(&q->stats_sync, start)); + + /* Once we successfully copy the stats in, update the data pointer */ + *data += size; +} + +/** + * idpf_add_empty_queue_stats - Add stats for a non-existent queue + * @data: pointer to data buffer + * @qtype: type of data queue + * + * We must report a constant length of stats back to userspace regardless of + * how many queues are actually in use because stats collection happens over + * three separate ioctls and there's no way to notify userspace the size + * changed between those calls. This adds empty to data to the stats since we + * don't have a real queue to refer to for this stats slot. + */ +static void idpf_add_empty_queue_stats(u64 **data, u16 qtype) +{ + unsigned int i; + int stats_len; + + if (qtype == VIRTCHNL2_QUEUE_TYPE_RX) + stats_len = IDPF_RX_QUEUE_STATS_LEN; + else + stats_len = IDPF_TX_QUEUE_STATS_LEN; + + for (i = 0; i < stats_len; i++) + (*data)[i] = 0; + *data += stats_len; +} + +/** + * idpf_add_port_stats - Copy port stats into ethtool buffer + * @vport: virtual port struct + * @data: ethtool buffer to copy into + */ +static void idpf_add_port_stats(struct idpf_vport *vport, u64 **data) +{ + unsigned int size = IDPF_PORT_STATS_LEN; + unsigned int start; + unsigned int i; + + do { + start = u64_stats_fetch_begin(&vport->port_stats.stats_sync); + for (i = 0; i < size; i++) + idpf_add_one_ethtool_stat(&(*data)[i], vport, + &idpf_gstrings_port_stats[i]); + } while (u64_stats_fetch_retry(&vport->port_stats.stats_sync, start)); + + *data += size; +} + +/** + * idpf_collect_queue_stats - accumulate various per queue stats + * into port level stats + * @vport: pointer to vport struct + **/ +static void idpf_collect_queue_stats(struct idpf_vport *vport) +{ + struct idpf_port_stats *pstats = &vport->port_stats; + int i, j; + + /* zero out port stats since they're actually tracked in per + * queue stats; this is only for reporting + */ + u64_stats_update_begin(&pstats->stats_sync); + u64_stats_set(&pstats->rx_hw_csum_err, 0); + u64_stats_set(&pstats->rx_hsplit, 0); + u64_stats_set(&pstats->rx_hsplit_hbo, 0); + u64_stats_set(&pstats->rx_bad_descs, 0); + u64_stats_set(&pstats->tx_linearize, 0); + u64_stats_set(&pstats->tx_busy, 0); + u64_stats_set(&pstats->tx_drops, 0); + u64_stats_set(&pstats->tx_dma_map_errs, 0); + u64_stats_update_end(&pstats->stats_sync); + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i]; + u16 num_rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rxq_grp->splitq.num_rxq_sets; + else + num_rxq = rxq_grp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs; + struct idpf_rx_queue_stats *stats; + struct idpf_queue *rxq; + unsigned int start; + + if (idpf_is_queue_model_split(vport->rxq_model)) + rxq = &rxq_grp->splitq.rxq_sets[j]->rxq; + else + rxq = rxq_grp->singleq.rxqs[j]; + + if (!rxq) + continue; + + do { + start = u64_stats_fetch_begin(&rxq->stats_sync); + + stats = &rxq->q_stats.rx; + hw_csum_err = u64_stats_read(&stats->hw_csum_err); + hsplit = u64_stats_read(&stats->hsplit_pkts); + hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf); + bad_descs = u64_stats_read(&stats->bad_descs); + } while (u64_stats_fetch_retry(&rxq->stats_sync, start)); + + u64_stats_update_begin(&pstats->stats_sync); + u64_stats_add(&pstats->rx_hw_csum_err, hw_csum_err); + u64_stats_add(&pstats->rx_hsplit, hsplit); + u64_stats_add(&pstats->rx_hsplit_hbo, hsplit_hbo); + u64_stats_add(&pstats->rx_bad_descs, bad_descs); + u64_stats_update_end(&pstats->stats_sync); + } + } + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (j = 0; j < txq_grp->num_txq; j++) { + u64 linearize, qbusy, skb_drops, dma_map_errs; + struct idpf_queue *txq = txq_grp->txqs[j]; + struct idpf_tx_queue_stats *stats; + unsigned int start; + + if (!txq) + continue; + + do { + start = u64_stats_fetch_begin(&txq->stats_sync); + + stats = &txq->q_stats.tx; + linearize = u64_stats_read(&stats->linearize); + qbusy = u64_stats_read(&stats->q_busy); + skb_drops = u64_stats_read(&stats->skb_drops); + dma_map_errs = u64_stats_read(&stats->dma_map_errs); + } while (u64_stats_fetch_retry(&txq->stats_sync, start)); + + u64_stats_update_begin(&pstats->stats_sync); + u64_stats_add(&pstats->tx_linearize, linearize); + u64_stats_add(&pstats->tx_busy, qbusy); + u64_stats_add(&pstats->tx_drops, skb_drops); + u64_stats_add(&pstats->tx_dma_map_errs, dma_map_errs); + u64_stats_update_end(&pstats->stats_sync); + } + } +} + +/** + * idpf_get_ethtool_stats - report device statistics + * @netdev: network interface device structure + * @stats: ethtool statistics structure + * @data: pointer to data buffer + * + * All statistics are added to the data buffer as an array of u64. + */ +static void idpf_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + struct page_pool_stats pp_stats = { }; + struct idpf_vport *vport; + unsigned int total = 0; + unsigned int i, j; + bool is_splitq; + u16 qtype; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (np->state != __IDPF_VPORT_UP) { + idpf_vport_ctrl_unlock(netdev); + + return; + } + + rcu_read_lock(); + + idpf_collect_queue_stats(vport); + idpf_add_port_stats(vport, &data); + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + qtype = VIRTCHNL2_QUEUE_TYPE_TX; + + for (j = 0; j < txq_grp->num_txq; j++, total++) { + struct idpf_queue *txq = txq_grp->txqs[j]; + + if (!txq) + idpf_add_empty_queue_stats(&data, qtype); + else + idpf_add_queue_stats(&data, txq); + } + } + + vport_config = vport->adapter->vport_config[vport->idx]; + /* It is critical we provide a constant number of stats back to + * userspace regardless of how many queues are actually in use because + * there is no way to inform userspace the size has changed between + * ioctl calls. This will fill in any missing stats with zero. + */ + for (; total < vport_config->max_q.max_txq; total++) + idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_TX); + total = 0; + + is_splitq = idpf_is_queue_model_split(vport->rxq_model); + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i]; + u16 num_rxq; + + qtype = VIRTCHNL2_QUEUE_TYPE_RX; + + if (is_splitq) + num_rxq = rxq_grp->splitq.num_rxq_sets; + else + num_rxq = rxq_grp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, total++) { + struct idpf_queue *rxq; + + if (is_splitq) + rxq = &rxq_grp->splitq.rxq_sets[j]->rxq; + else + rxq = rxq_grp->singleq.rxqs[j]; + if (!rxq) + idpf_add_empty_queue_stats(&data, qtype); + else + idpf_add_queue_stats(&data, rxq); + + /* In splitq mode, don't get page pool stats here since + * the pools are attached to the buffer queues + */ + if (is_splitq) + continue; + + if (rxq) + page_pool_get_stats(rxq->pp, &pp_stats); + } + } + + for (i = 0; i < vport->num_rxq_grp; i++) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_queue *rxbufq = + &vport->rxq_grps[i].splitq.bufq_sets[j].bufq; + + page_pool_get_stats(rxbufq->pp, &pp_stats); + } + } + + for (; total < vport_config->max_q.max_rxq; total++) + idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_RX); + + page_pool_ethtool_stats_get(data, &pp_stats); + + rcu_read_unlock(); + + idpf_vport_ctrl_unlock(netdev); +} + +/** + * idpf_find_rxq - find rxq from q index + * @vport: virtual port associated to queue + * @q_num: q index used to find queue + * + * returns pointer to rx queue + */ +static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num) +{ + int q_grp, q_idx; + + if (!idpf_is_queue_model_split(vport->rxq_model)) + return vport->rxq_grps->singleq.rxqs[q_num]; + + q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + + return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq; +} + +/** + * idpf_find_txq - find txq from q index + * @vport: virtual port associated to queue + * @q_num: q index used to find queue + * + * returns pointer to tx queue + */ +static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num) +{ + int q_grp; + + if (!idpf_is_queue_model_split(vport->txq_model)) + return vport->txqs[q_num]; + + q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP; + + return vport->txq_grps[q_grp].complq; +} + +/** + * __idpf_get_q_coalesce - get ITR values for specific queue + * @ec: ethtool structure to fill with driver's coalesce settings + * @q: quuee of Rx or Tx + */ +static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec, + struct idpf_queue *q) +{ + if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { + ec->use_adaptive_rx_coalesce = + IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode); + ec->rx_coalesce_usecs = q->q_vector->rx_itr_value; + } else { + ec->use_adaptive_tx_coalesce = + IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode); + ec->tx_coalesce_usecs = q->q_vector->tx_itr_value; + } +} + +/** + * idpf_get_q_coalesce - get ITR values for specific queue + * @netdev: pointer to the netdev associated with this query + * @ec: coalesce settings to program the device with + * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index + * + * Return 0 on success, and negative on failure + */ +static int idpf_get_q_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + u32 q_num) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + int err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + if (q_num >= vport->num_rxq && q_num >= vport->num_txq) { + err = -EINVAL; + goto unlock_mutex; + } + + if (q_num < vport->num_rxq) + __idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num)); + + if (q_num < vport->num_txq) + __idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num)); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_coalesce - get ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @ec: coalesce settings to be filled + * @kec: unused + * @extack: unused + * + * Return 0 on success, and negative on failure + */ +static int idpf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kec, + struct netlink_ext_ack *extack) +{ + /* Return coalesce based on queue number zero */ + return idpf_get_q_coalesce(netdev, ec, 0); +} + +/** + * idpf_get_per_q_coalesce - get ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @q_num: queue for which the itr values has to retrieved + * @ec: coalesce settings to be filled + * + * Return 0 on success, and negative on failure + */ + +static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) +{ + return idpf_get_q_coalesce(netdev, ec, q_num); +} + +/** + * __idpf_set_q_coalesce - set ITR values for specific queue + * @ec: ethtool structure from user to update ITR settings + * @q: queue for which itr values has to be set + * @is_rxq: is queue type rx + * + * Returns 0 on success, negative otherwise. + */ +static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, + struct idpf_queue *q, bool is_rxq) +{ + u32 use_adaptive_coalesce, coalesce_usecs; + struct idpf_q_vector *qv = q->q_vector; + bool is_dim_ena = false; + u16 itr_val; + + if (is_rxq) { + is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode); + use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; + coalesce_usecs = ec->rx_coalesce_usecs; + itr_val = qv->rx_itr_value; + } else { + is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode); + use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; + coalesce_usecs = ec->tx_coalesce_usecs; + itr_val = qv->tx_itr_value; + } + if (coalesce_usecs != itr_val && use_adaptive_coalesce) { + netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n"); + + return -EINVAL; + } + + if (is_dim_ena && use_adaptive_coalesce) + return 0; + + if (coalesce_usecs > IDPF_ITR_MAX) { + netdev_err(q->vport->netdev, + "Invalid value, %d-usecs range is 0-%d\n", + coalesce_usecs, IDPF_ITR_MAX); + + return -EINVAL; + } + + if (coalesce_usecs % 2) { + coalesce_usecs--; + netdev_info(q->vport->netdev, + "HW only supports even ITR values, ITR rounded to %d\n", + coalesce_usecs); + } + + if (is_rxq) { + qv->rx_itr_value = coalesce_usecs; + if (use_adaptive_coalesce) { + qv->rx_intr_mode = IDPF_ITR_DYNAMIC; + } else { + qv->rx_intr_mode = !IDPF_ITR_DYNAMIC; + idpf_vport_intr_write_itr(qv, qv->rx_itr_value, + false); + } + } else { + qv->tx_itr_value = coalesce_usecs; + if (use_adaptive_coalesce) { + qv->tx_intr_mode = IDPF_ITR_DYNAMIC; + } else { + qv->tx_intr_mode = !IDPF_ITR_DYNAMIC; + idpf_vport_intr_write_itr(qv, qv->tx_itr_value, true); + } + } + + /* Update of static/dynamic itr will be taken care when interrupt is + * fired + */ + return 0; +} + +/** + * idpf_set_q_coalesce - set ITR values for specific queue + * @vport: vport associated to the queue that need updating + * @ec: coalesce settings to program the device with + * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index + * @is_rxq: is queue type rx + * + * Return 0 on success, and negative on failure + */ +static int idpf_set_q_coalesce(struct idpf_vport *vport, + struct ethtool_coalesce *ec, + int q_num, bool is_rxq) +{ + struct idpf_queue *q; + + q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num); + + if (q && __idpf_set_q_coalesce(ec, q, is_rxq)) + return -EINVAL; + + return 0; +} + +/** + * idpf_set_coalesce - set ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @ec: coalesce settings to program the device with + * @kec: unused + * @extack: unused + * + * Return 0 on success, and negative on failure + */ +static int idpf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kec, + struct netlink_ext_ack *extack) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + int i, err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + for (i = 0; i < vport->num_txq; i++) { + err = idpf_set_q_coalesce(vport, ec, i, false); + if (err) + goto unlock_mutex; + } + + for (i = 0; i < vport->num_rxq; i++) { + err = idpf_set_q_coalesce(vport, ec, i, true); + if (err) + goto unlock_mutex; + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_set_per_q_coalesce - set ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @q_num: queue for which the itr values has to be set + * @ec: coalesce settings to program the device with + * + * Return 0 on success, and negative on failure + */ +static int idpf_set_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) +{ + struct idpf_vport *vport; + int err; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + err = idpf_set_q_coalesce(vport, ec, q_num, false); + if (err) { + idpf_vport_ctrl_unlock(netdev); + + return err; + } + + err = idpf_set_q_coalesce(vport, ec, q_num, true); + + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_msglevel - Get debug message level + * @netdev: network interface device structure + * + * Returns current debug message level. + */ +static u32 idpf_get_msglevel(struct net_device *netdev) +{ + struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev); + + return adapter->msg_enable; +} + +/** + * idpf_set_msglevel - Set debug message level + * @netdev: network interface device structure + * @data: message level + * + * Set current debug message level. Higher values cause the driver to + * be noisier. + */ +static void idpf_set_msglevel(struct net_device *netdev, u32 data) +{ + struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev); + + adapter->msg_enable = data; +} + +/** + * idpf_get_link_ksettings - Get Link Speed and Duplex settings + * @netdev: network interface device structure + * @cmd: ethtool command + * + * Reports speed/duplex settings. + **/ +static int idpf_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct idpf_vport *vport; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + ethtool_link_ksettings_zero_link_mode(cmd, supported); + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.port = PORT_NONE; + if (vport->link_up) { + cmd->base.duplex = DUPLEX_FULL; + cmd->base.speed = vport->link_speed_mbps; + } else { + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + } + + idpf_vport_ctrl_unlock(netdev); + + return 0; +} + +static const struct ethtool_ops idpf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_msglevel = idpf_get_msglevel, + .set_msglevel = idpf_set_msglevel, + .get_link = ethtool_op_get_link, + .get_coalesce = idpf_get_coalesce, + .set_coalesce = idpf_set_coalesce, + .get_per_queue_coalesce = idpf_get_per_q_coalesce, + .set_per_queue_coalesce = idpf_set_per_q_coalesce, + .get_ethtool_stats = idpf_get_ethtool_stats, + .get_strings = idpf_get_strings, + .get_sset_count = idpf_get_sset_count, + .get_channels = idpf_get_channels, + .get_rxnfc = idpf_get_rxnfc, + .get_rxfh_key_size = idpf_get_rxfh_key_size, + .get_rxfh_indir_size = idpf_get_rxfh_indir_size, + .get_rxfh = idpf_get_rxfh, + .set_rxfh = idpf_set_rxfh, + .set_channels = idpf_set_channels, + .get_ringparam = idpf_get_ringparam, + .set_ringparam = idpf_set_ringparam, + .get_link_ksettings = idpf_get_link_ksettings, +}; + +/** + * idpf_set_ethtool_ops - Initialize ethtool ops struct + * @netdev: network interface device structure + * + * Sets ethtool ops struct in our netdev so that ethtool can call + * our functions. + */ +void idpf_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &idpf_ethtool_ops; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h new file mode 100644 index 000000000000..24edb8a6ec2e --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_LAN_PF_REGS_H_ +#define _IDPF_LAN_PF_REGS_H_ + +/* Receive queues */ +#define PF_QRX_BASE 0x00000000 +#define PF_QRX_TAIL(_QRX) (PF_QRX_BASE + (((_QRX) * 0x1000))) +#define PF_QRX_BUFFQ_BASE 0x03000000 +#define PF_QRX_BUFFQ_TAIL(_QRX) (PF_QRX_BUFFQ_BASE + (((_QRX) * 0x1000))) + +/* Transmit queues */ +#define PF_QTX_BASE 0x05000000 +#define PF_QTX_COMM_DBELL(_DBQM) (PF_QTX_BASE + ((_DBQM) * 0x1000)) + +/* Control(PF Mailbox) Queue */ +#define PF_FW_BASE 0x08400000 + +#define PF_FW_ARQBAL (PF_FW_BASE) +#define PF_FW_ARQBAH (PF_FW_BASE + 0x4) +#define PF_FW_ARQLEN (PF_FW_BASE + 0x8) +#define PF_FW_ARQLEN_ARQLEN_S 0 +#define PF_FW_ARQLEN_ARQLEN_M GENMASK(12, 0) +#define PF_FW_ARQLEN_ARQVFE_S 28 +#define PF_FW_ARQLEN_ARQVFE_M BIT(PF_FW_ARQLEN_ARQVFE_S) +#define PF_FW_ARQLEN_ARQOVFL_S 29 +#define PF_FW_ARQLEN_ARQOVFL_M BIT(PF_FW_ARQLEN_ARQOVFL_S) +#define PF_FW_ARQLEN_ARQCRIT_S 30 +#define PF_FW_ARQLEN_ARQCRIT_M BIT(PF_FW_ARQLEN_ARQCRIT_S) +#define PF_FW_ARQLEN_ARQENABLE_S 31 +#define PF_FW_ARQLEN_ARQENABLE_M BIT(PF_FW_ARQLEN_ARQENABLE_S) +#define PF_FW_ARQH (PF_FW_BASE + 0xC) +#define PF_FW_ARQH_ARQH_S 0 +#define PF_FW_ARQH_ARQH_M GENMASK(12, 0) +#define PF_FW_ARQT (PF_FW_BASE + 0x10) + +#define PF_FW_ATQBAL (PF_FW_BASE + 0x14) +#define PF_FW_ATQBAH (PF_FW_BASE + 0x18) +#define PF_FW_ATQLEN (PF_FW_BASE + 0x1C) +#define PF_FW_ATQLEN_ATQLEN_S 0 +#define PF_FW_ATQLEN_ATQLEN_M GENMASK(9, 0) +#define PF_FW_ATQLEN_ATQVFE_S 28 +#define PF_FW_ATQLEN_ATQVFE_M BIT(PF_FW_ATQLEN_ATQVFE_S) +#define PF_FW_ATQLEN_ATQOVFL_S 29 +#define PF_FW_ATQLEN_ATQOVFL_M BIT(PF_FW_ATQLEN_ATQOVFL_S) +#define PF_FW_ATQLEN_ATQCRIT_S 30 +#define PF_FW_ATQLEN_ATQCRIT_M BIT(PF_FW_ATQLEN_ATQCRIT_S) +#define PF_FW_ATQLEN_ATQENABLE_S 31 +#define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S) +#define PF_FW_ATQH (PF_FW_BASE + 0x20) +#define PF_FW_ATQH_ATQH_S 0 +#define PF_FW_ATQH_ATQH_M GENMASK(9, 0) +#define PF_FW_ATQT (PF_FW_BASE + 0x24) + +/* Interrupts */ +#define PF_GLINT_BASE 0x08900000 +#define PF_GLINT_DYN_CTL(_INT) (PF_GLINT_BASE + ((_INT) * 0x1000)) +#define PF_GLINT_DYN_CTL_INTENA_S 0 +#define PF_GLINT_DYN_CTL_INTENA_M BIT(PF_GLINT_DYN_CTL_INTENA_S) +#define PF_GLINT_DYN_CTL_CLEARPBA_S 1 +#define PF_GLINT_DYN_CTL_CLEARPBA_M BIT(PF_GLINT_DYN_CTL_CLEARPBA_S) +#define PF_GLINT_DYN_CTL_SWINT_TRIG_S 2 +#define PF_GLINT_DYN_CTL_SWINT_TRIG_M BIT(PF_GLINT_DYN_CTL_SWINT_TRIG_S) +#define PF_GLINT_DYN_CTL_ITR_INDX_S 3 +#define PF_GLINT_DYN_CTL_ITR_INDX_M GENMASK(4, 3) +#define PF_GLINT_DYN_CTL_INTERVAL_S 5 +#define PF_GLINT_DYN_CTL_INTERVAL_M BIT(PF_GLINT_DYN_CTL_INTERVAL_S) +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S 24 +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S) +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_S 25 +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_M BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_S) +#define PF_GLINT_DYN_CTL_WB_ON_ITR_S 30 +#define PF_GLINT_DYN_CTL_WB_ON_ITR_M BIT(PF_GLINT_DYN_CTL_WB_ON_ITR_S) +#define PF_GLINT_DYN_CTL_INTENA_MSK_S 31 +#define PF_GLINT_DYN_CTL_INTENA_MSK_M BIT(PF_GLINT_DYN_CTL_INTENA_MSK_S) +/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is + * spacing b/w itrn registers of the same vector. + */ +#define PF_GLINT_ITR_ADDR(_ITR, _reg_start, _itrn_indx_spacing) \ + ((_reg_start) + ((_ITR) * (_itrn_indx_spacing))) +/* For PF, itrn_indx_spacing is 4 and itrn_reg_spacing is 0x1000 */ +#define PF_GLINT_ITR(_ITR, _INT) \ + (PF_GLINT_BASE + (((_ITR) + 1) * 4) + ((_INT) * 0x1000)) +#define PF_GLINT_ITR_MAX_INDEX 2 +#define PF_GLINT_ITR_INTERVAL_S 0 +#define PF_GLINT_ITR_INTERVAL_M GENMASK(11, 0) + +/* Generic registers */ +#define PF_INT_DIR_OICR_ENA 0x08406000 +#define PF_INT_DIR_OICR_ENA_S 0 +#define PF_INT_DIR_OICR_ENA_M GENMASK(31, 0) +#define PF_INT_DIR_OICR 0x08406004 +#define PF_INT_DIR_OICR_TSYN_EVNT 0 +#define PF_INT_DIR_OICR_PHY_TS_0 BIT(1) +#define PF_INT_DIR_OICR_PHY_TS_1 BIT(2) +#define PF_INT_DIR_OICR_CAUSE 0x08406008 +#define PF_INT_DIR_OICR_CAUSE_CAUSE_S 0 +#define PF_INT_DIR_OICR_CAUSE_CAUSE_M GENMASK(31, 0) +#define PF_INT_PBA_CLEAR 0x0840600C + +#define PF_FUNC_RID 0x08406010 +#define PF_FUNC_RID_FUNCTION_NUMBER_S 0 +#define PF_FUNC_RID_FUNCTION_NUMBER_M GENMASK(2, 0) +#define PF_FUNC_RID_DEVICE_NUMBER_S 3 +#define PF_FUNC_RID_DEVICE_NUMBER_M GENMASK(7, 3) +#define PF_FUNC_RID_BUS_NUMBER_S 8 +#define PF_FUNC_RID_BUS_NUMBER_M GENMASK(15, 8) + +/* Reset registers */ +#define PFGEN_RTRIG 0x08407000 +#define PFGEN_RTRIG_CORER_S 0 +#define PFGEN_RTRIG_CORER_M BIT(0) +#define PFGEN_RTRIG_LINKR_S 1 +#define PFGEN_RTRIG_LINKR_M BIT(1) +#define PFGEN_RTRIG_IMCR_S 2 +#define PFGEN_RTRIG_IMCR_M BIT(2) +#define PFGEN_RSTAT 0x08407008 /* PFR Status */ +#define PFGEN_RSTAT_PFR_STATE_S 0 +#define PFGEN_RSTAT_PFR_STATE_M GENMASK(1, 0) +#define PFGEN_CTRL 0x0840700C +#define PFGEN_CTRL_PFSWR BIT(0) + +#endif diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h new file mode 100644 index 000000000000..a5752dcab888 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h @@ -0,0 +1,293 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_LAN_TXRX_H_ +#define _IDPF_LAN_TXRX_H_ + +enum idpf_rss_hash { + IDPF_HASH_INVALID = 0, + /* Values 1 - 28 are reserved for future use */ + IDPF_HASH_NONF_UNICAST_IPV4_UDP = 29, + IDPF_HASH_NONF_MULTICAST_IPV4_UDP, + IDPF_HASH_NONF_IPV4_UDP, + IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK, + IDPF_HASH_NONF_IPV4_TCP, + IDPF_HASH_NONF_IPV4_SCTP, + IDPF_HASH_NONF_IPV4_OTHER, + IDPF_HASH_FRAG_IPV4, + /* Values 37-38 are reserved */ + IDPF_HASH_NONF_UNICAST_IPV6_UDP = 39, + IDPF_HASH_NONF_MULTICAST_IPV6_UDP, + IDPF_HASH_NONF_IPV6_UDP, + IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK, + IDPF_HASH_NONF_IPV6_TCP, + IDPF_HASH_NONF_IPV6_SCTP, + IDPF_HASH_NONF_IPV6_OTHER, + IDPF_HASH_FRAG_IPV6, + IDPF_HASH_NONF_RSVD47, + IDPF_HASH_NONF_FCOE_OX, + IDPF_HASH_NONF_FCOE_RX, + IDPF_HASH_NONF_FCOE_OTHER, + /* Values 51-62 are reserved */ + IDPF_HASH_L2_PAYLOAD = 63, + + IDPF_HASH_MAX +}; + +/* Supported RSS offloads */ +#define IDPF_DEFAULT_RSS_HASH \ + (BIT_ULL(IDPF_HASH_NONF_IPV4_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_SCTP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_TCP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_OTHER) | \ + BIT_ULL(IDPF_HASH_FRAG_IPV4) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_TCP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_SCTP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_OTHER) | \ + BIT_ULL(IDPF_HASH_FRAG_IPV6) | \ + BIT_ULL(IDPF_HASH_L2_PAYLOAD)) + +#define IDPF_DEFAULT_RSS_HASH_EXPANDED (IDPF_DEFAULT_RSS_HASH | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK) | \ + BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV4_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV4_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK) | \ + BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV6_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV6_UDP)) + +/* For idpf_splitq_base_tx_compl_desc */ +#define IDPF_TXD_COMPLQ_GEN_S 15 +#define IDPF_TXD_COMPLQ_GEN_M BIT_ULL(IDPF_TXD_COMPLQ_GEN_S) +#define IDPF_TXD_COMPLQ_COMPL_TYPE_S 11 +#define IDPF_TXD_COMPLQ_COMPL_TYPE_M GENMASK_ULL(13, 11) +#define IDPF_TXD_COMPLQ_QID_S 0 +#define IDPF_TXD_COMPLQ_QID_M GENMASK_ULL(9, 0) + +/* For base mode TX descriptors */ + +#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S 23 +#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S) +#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_S 19 +#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_M \ + (0xFULL << IDPF_TXD_CTX_QW0_TUNN_DECTTL_S) +#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_S 12 +#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_M \ + (0X7FULL << IDPF_TXD_CTX_QW0_TUNN_NATLEN_S) +#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S 11 +#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M \ + BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S) +#define IDPF_TXD_CTX_EIP_NOINC_IPID_CONST \ + IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M +#define IDPF_TXD_CTX_QW0_TUNN_NATT_S 9 +#define IDPF_TXD_CTX_QW0_TUNN_NATT_M (0x3ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S) +#define IDPF_TXD_CTX_UDP_TUNNELING BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_NATT_S) +#define IDPF_TXD_CTX_GRE_TUNNELING (0x2ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S) +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S 2 +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M \ + (0x3FULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S) +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S 0 +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_M \ + (0x3ULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S) + +#define IDPF_TXD_CTX_QW1_MSS_S 50 +#define IDPF_TXD_CTX_QW1_MSS_M GENMASK_ULL(63, 50) +#define IDPF_TXD_CTX_QW1_TSO_LEN_S 30 +#define IDPF_TXD_CTX_QW1_TSO_LEN_M GENMASK_ULL(47, 30) +#define IDPF_TXD_CTX_QW1_CMD_S 4 +#define IDPF_TXD_CTX_QW1_CMD_M GENMASK_ULL(15, 4) +#define IDPF_TXD_CTX_QW1_DTYPE_S 0 +#define IDPF_TXD_CTX_QW1_DTYPE_M GENMASK_ULL(3, 0) +#define IDPF_TXD_QW1_L2TAG1_S 48 +#define IDPF_TXD_QW1_L2TAG1_M GENMASK_ULL(63, 48) +#define IDPF_TXD_QW1_TX_BUF_SZ_S 34 +#define IDPF_TXD_QW1_TX_BUF_SZ_M GENMASK_ULL(47, 34) +#define IDPF_TXD_QW1_OFFSET_S 16 +#define IDPF_TXD_QW1_OFFSET_M GENMASK_ULL(33, 16) +#define IDPF_TXD_QW1_CMD_S 4 +#define IDPF_TXD_QW1_CMD_M GENMASK_ULL(15, 4) +#define IDPF_TXD_QW1_DTYPE_S 0 +#define IDPF_TXD_QW1_DTYPE_M GENMASK_ULL(3, 0) + +/* TX Completion Descriptor Completion Types */ +#define IDPF_TXD_COMPLT_ITR_FLUSH 0 +/* Descriptor completion type 1 is reserved */ +#define IDPF_TXD_COMPLT_RS 2 +/* Descriptor completion type 3 is reserved */ +#define IDPF_TXD_COMPLT_RE 4 +#define IDPF_TXD_COMPLT_SW_MARKER 5 + +enum idpf_tx_desc_dtype_value { + IDPF_TX_DESC_DTYPE_DATA = 0, + IDPF_TX_DESC_DTYPE_CTX = 1, + /* DTYPE 2 is reserved + * DTYPE 3 is free for future use + * DTYPE 4 is reserved + */ + IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX = 5, + /* DTYPE 6 is reserved */ + IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 = 7, + /* DTYPE 8, 9 are free for future use + * DTYPE 10 is reserved + * DTYPE 11 is free for future use + */ + IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE = 12, + /* DTYPE 13, 14 are free for future use */ + + /* DESC_DONE - HW has completed write-back of descriptor */ + IDPF_TX_DESC_DTYPE_DESC_DONE = 15, +}; + +enum idpf_tx_ctx_desc_cmd_bits { + IDPF_TX_CTX_DESC_TSO = 0x01, + IDPF_TX_CTX_DESC_TSYN = 0x02, + IDPF_TX_CTX_DESC_IL2TAG2 = 0x04, + IDPF_TX_CTX_DESC_RSVD = 0x08, + IDPF_TX_CTX_DESC_SWTCH_NOTAG = 0x00, + IDPF_TX_CTX_DESC_SWTCH_UPLINK = 0x10, + IDPF_TX_CTX_DESC_SWTCH_LOCAL = 0x20, + IDPF_TX_CTX_DESC_SWTCH_VSI = 0x30, + IDPF_TX_CTX_DESC_FILT_AU_EN = 0x40, + IDPF_TX_CTX_DESC_FILT_AU_EVICT = 0x80, + IDPF_TX_CTX_DESC_RSVD1 = 0xF00 +}; + +enum idpf_tx_desc_len_fields { + /* Note: These are predefined bit offsets */ + IDPF_TX_DESC_LEN_MACLEN_S = 0, /* 7 BITS */ + IDPF_TX_DESC_LEN_IPLEN_S = 7, /* 7 BITS */ + IDPF_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */ +}; + +enum idpf_tx_base_desc_cmd_bits { + IDPF_TX_DESC_CMD_EOP = BIT(0), + IDPF_TX_DESC_CMD_RS = BIT(1), + /* only on VFs else RSVD */ + IDPF_TX_DESC_CMD_ICRC = BIT(2), + IDPF_TX_DESC_CMD_IL2TAG1 = BIT(3), + IDPF_TX_DESC_CMD_RSVD1 = BIT(4), + IDPF_TX_DESC_CMD_IIPT_IPV6 = BIT(5), + IDPF_TX_DESC_CMD_IIPT_IPV4 = BIT(6), + IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM = GENMASK(6, 5), + IDPF_TX_DESC_CMD_RSVD2 = BIT(7), + IDPF_TX_DESC_CMD_L4T_EOFT_TCP = BIT(8), + IDPF_TX_DESC_CMD_L4T_EOFT_SCTP = BIT(9), + IDPF_TX_DESC_CMD_L4T_EOFT_UDP = GENMASK(9, 8), + IDPF_TX_DESC_CMD_RSVD3 = BIT(10), + IDPF_TX_DESC_CMD_RSVD4 = BIT(11), +}; + +/* Transmit descriptors */ +/* splitq tx buf, singleq tx buf and singleq compl desc */ +struct idpf_base_tx_desc { + __le64 buf_addr; /* Address of descriptor's data buf */ + __le64 qw1; /* type_cmd_offset_bsz_l2tag1 */ +}; /* read used with buffer queues */ + +struct idpf_splitq_tx_compl_desc { + /* qid=[10:0] comptype=[13:11] rsvd=[14] gen=[15] */ + __le16 qid_comptype_gen; + union { + __le16 q_head; /* Queue head */ + __le16 compl_tag; /* Completion tag */ + } q_head_compl_tag; + u8 ts[3]; + u8 rsvd; /* Reserved */ +}; /* writeback used with completion queues */ + +/* Context descriptors */ +struct idpf_base_tx_ctx_desc { + struct { + __le32 tunneling_params; + __le16 l2tag2; + __le16 rsvd1; + } qw0; + __le64 qw1; /* type_cmd_tlen_mss/rt_hint */ +}; + +/* Common cmd field defines for all desc except Flex Flow Scheduler (0x0C) */ +enum idpf_tx_flex_desc_cmd_bits { + IDPF_TX_FLEX_DESC_CMD_EOP = BIT(0), + IDPF_TX_FLEX_DESC_CMD_RS = BIT(1), + IDPF_TX_FLEX_DESC_CMD_RE = BIT(2), + IDPF_TX_FLEX_DESC_CMD_IL2TAG1 = BIT(3), + IDPF_TX_FLEX_DESC_CMD_DUMMY = BIT(4), + IDPF_TX_FLEX_DESC_CMD_CS_EN = BIT(5), + IDPF_TX_FLEX_DESC_CMD_FILT_AU_EN = BIT(6), + IDPF_TX_FLEX_DESC_CMD_FILT_AU_EVICT = BIT(7), +}; + +struct idpf_flex_tx_desc { + __le64 buf_addr; /* Packet buffer address */ + struct { +#define IDPF_FLEX_TXD_QW1_DTYPE_S 0 +#define IDPF_FLEX_TXD_QW1_DTYPE_M GENMASK(4, 0) +#define IDPF_FLEX_TXD_QW1_CMD_S 5 +#define IDPF_FLEX_TXD_QW1_CMD_M GENMASK(15, 5) + __le16 cmd_dtype; + /* DTYPE=IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 (0x07) */ + struct { + __le16 l2tag1; + __le16 l2tag2; + } l2tags; + __le16 buf_size; + } qw1; +}; + +struct idpf_flex_tx_sched_desc { + __le64 buf_addr; /* Packet buffer address */ + + /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE_16B (0x0C) */ + struct { + u8 cmd_dtype; +#define IDPF_TXD_FLEX_FLOW_DTYPE_M GENMASK(4, 0) +#define IDPF_TXD_FLEX_FLOW_CMD_EOP BIT(5) +#define IDPF_TXD_FLEX_FLOW_CMD_CS_EN BIT(6) +#define IDPF_TXD_FLEX_FLOW_CMD_RE BIT(7) + + /* [23:23] Horizon Overflow bit, [22:0] timestamp */ + u8 ts[3]; +#define IDPF_TXD_FLOW_SCH_HORIZON_OVERFLOW_M BIT(7) + + __le16 compl_tag; + __le16 rxr_bufsize; +#define IDPF_TXD_FLEX_FLOW_RXR BIT(14) +#define IDPF_TXD_FLEX_FLOW_BUFSIZE_M GENMASK(13, 0) + } qw1; +}; + +/* Common cmd fields for all flex context descriptors + * Note: these defines already account for the 5 bit dtype in the cmd_dtype + * field + */ +enum idpf_tx_flex_ctx_desc_cmd_bits { + IDPF_TX_FLEX_CTX_DESC_CMD_TSO = BIT(5), + IDPF_TX_FLEX_CTX_DESC_CMD_TSYN_EN = BIT(6), + IDPF_TX_FLEX_CTX_DESC_CMD_L2TAG2 = BIT(7), + IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_UPLNK = BIT(9), + IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_LOCAL = BIT(10), + IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_TARGETVSI = GENMASK(10, 9), +}; + +/* Standard flex descriptor TSO context quad word */ +struct idpf_flex_tx_tso_ctx_qw { + __le32 flex_tlen; +#define IDPF_TXD_FLEX_CTX_TLEN_M GENMASK(17, 0) +#define IDPF_TXD_FLEX_TSO_CTX_FLEX_S 24 + __le16 mss_rt; +#define IDPF_TXD_FLEX_CTX_MSS_RT_M GENMASK(13, 0) + u8 hdr_len; + u8 flex; +}; + +struct idpf_flex_tx_ctx_desc { + /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX (0x05) */ + struct { + struct idpf_flex_tx_tso_ctx_qw qw0; + struct { + __le16 cmd_dtype; + u8 flex[6]; + } qw1; + } tso; +}; +#endif /* _IDPF_LAN_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h new file mode 100644 index 000000000000..3d73b6c76863 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_LAN_VF_REGS_H_ +#define _IDPF_LAN_VF_REGS_H_ + +/* Reset */ +#define VFGEN_RSTAT 0x00008800 +#define VFGEN_RSTAT_VFR_STATE_S 0 +#define VFGEN_RSTAT_VFR_STATE_M GENMASK(1, 0) + +/* Control(VF Mailbox) Queue */ +#define VF_BASE 0x00006000 + +#define VF_ATQBAL (VF_BASE + 0x1C00) +#define VF_ATQBAH (VF_BASE + 0x1800) +#define VF_ATQLEN (VF_BASE + 0x0800) +#define VF_ATQLEN_ATQLEN_S 0 +#define VF_ATQLEN_ATQLEN_M GENMASK(9, 0) +#define VF_ATQLEN_ATQVFE_S 28 +#define VF_ATQLEN_ATQVFE_M BIT(VF_ATQLEN_ATQVFE_S) +#define VF_ATQLEN_ATQOVFL_S 29 +#define VF_ATQLEN_ATQOVFL_M BIT(VF_ATQLEN_ATQOVFL_S) +#define VF_ATQLEN_ATQCRIT_S 30 +#define VF_ATQLEN_ATQCRIT_M BIT(VF_ATQLEN_ATQCRIT_S) +#define VF_ATQLEN_ATQENABLE_S 31 +#define VF_ATQLEN_ATQENABLE_M BIT(VF_ATQLEN_ATQENABLE_S) +#define VF_ATQH (VF_BASE + 0x0400) +#define VF_ATQH_ATQH_S 0 +#define VF_ATQH_ATQH_M GENMASK(9, 0) +#define VF_ATQT (VF_BASE + 0x2400) + +#define VF_ARQBAL (VF_BASE + 0x0C00) +#define VF_ARQBAH (VF_BASE) +#define VF_ARQLEN (VF_BASE + 0x2000) +#define VF_ARQLEN_ARQLEN_S 0 +#define VF_ARQLEN_ARQLEN_M GENMASK(9, 0) +#define VF_ARQLEN_ARQVFE_S 28 +#define VF_ARQLEN_ARQVFE_M BIT(VF_ARQLEN_ARQVFE_S) +#define VF_ARQLEN_ARQOVFL_S 29 +#define VF_ARQLEN_ARQOVFL_M BIT(VF_ARQLEN_ARQOVFL_S) +#define VF_ARQLEN_ARQCRIT_S 30 +#define VF_ARQLEN_ARQCRIT_M BIT(VF_ARQLEN_ARQCRIT_S) +#define VF_ARQLEN_ARQENABLE_S 31 +#define VF_ARQLEN_ARQENABLE_M BIT(VF_ARQLEN_ARQENABLE_S) +#define VF_ARQH (VF_BASE + 0x1400) +#define VF_ARQH_ARQH_S 0 +#define VF_ARQH_ARQH_M GENMASK(12, 0) +#define VF_ARQT (VF_BASE + 0x1000) + +/* Transmit queues */ +#define VF_QTX_TAIL_BASE 0x00000000 +#define VF_QTX_TAIL(_QTX) (VF_QTX_TAIL_BASE + (_QTX) * 0x4) +#define VF_QTX_TAIL_EXT_BASE 0x00040000 +#define VF_QTX_TAIL_EXT(_QTX) (VF_QTX_TAIL_EXT_BASE + ((_QTX) * 4)) + +/* Receive queues */ +#define VF_QRX_TAIL_BASE 0x00002000 +#define VF_QRX_TAIL(_QRX) (VF_QRX_TAIL_BASE + ((_QRX) * 4)) +#define VF_QRX_TAIL_EXT_BASE 0x00050000 +#define VF_QRX_TAIL_EXT(_QRX) (VF_QRX_TAIL_EXT_BASE + ((_QRX) * 4)) +#define VF_QRXB_TAIL_BASE 0x00060000 +#define VF_QRXB_TAIL(_QRX) (VF_QRXB_TAIL_BASE + ((_QRX) * 4)) + +/* Interrupts */ +#define VF_INT_DYN_CTL0 0x00005C00 +#define VF_INT_DYN_CTL0_INTENA_S 0 +#define VF_INT_DYN_CTL0_INTENA_M BIT(VF_INT_DYN_CTL0_INTENA_S) +#define VF_INT_DYN_CTL0_ITR_INDX_S 3 +#define VF_INT_DYN_CTL0_ITR_INDX_M GENMASK(4, 3) +#define VF_INT_DYN_CTLN(_INT) (0x00003800 + ((_INT) * 4)) +#define VF_INT_DYN_CTLN_EXT(_INT) (0x00070000 + ((_INT) * 4)) +#define VF_INT_DYN_CTLN_INTENA_S 0 +#define VF_INT_DYN_CTLN_INTENA_M BIT(VF_INT_DYN_CTLN_INTENA_S) +#define VF_INT_DYN_CTLN_CLEARPBA_S 1 +#define VF_INT_DYN_CTLN_CLEARPBA_M BIT(VF_INT_DYN_CTLN_CLEARPBA_S) +#define VF_INT_DYN_CTLN_SWINT_TRIG_S 2 +#define VF_INT_DYN_CTLN_SWINT_TRIG_M BIT(VF_INT_DYN_CTLN_SWINT_TRIG_S) +#define VF_INT_DYN_CTLN_ITR_INDX_S 3 +#define VF_INT_DYN_CTLN_ITR_INDX_M GENMASK(4, 3) +#define VF_INT_DYN_CTLN_INTERVAL_S 5 +#define VF_INT_DYN_CTLN_INTERVAL_M BIT(VF_INT_DYN_CTLN_INTERVAL_S) +#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S 24 +#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_M BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S) +#define VF_INT_DYN_CTLN_SW_ITR_INDX_S 25 +#define VF_INT_DYN_CTLN_SW_ITR_INDX_M BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_S) +#define VF_INT_DYN_CTLN_WB_ON_ITR_S 30 +#define VF_INT_DYN_CTLN_WB_ON_ITR_M BIT(VF_INT_DYN_CTLN_WB_ON_ITR_S) +#define VF_INT_DYN_CTLN_INTENA_MSK_S 31 +#define VF_INT_DYN_CTLN_INTENA_MSK_M BIT(VF_INT_DYN_CTLN_INTENA_MSK_S) +/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is spacing + * b/w itrn registers of the same vector + */ +#define VF_INT_ITR0(_ITR) (0x00004C00 + ((_ITR) * 4)) +#define VF_INT_ITRN_ADDR(_ITR, _reg_start, _itrn_indx_spacing) \ + ((_reg_start) + ((_ITR) * (_itrn_indx_spacing))) +/* For VF with 16 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing + * is 0x40 and base register offset is 0x00002800 + */ +#define VF_INT_ITRN(_INT, _ITR) \ + (0x00002800 + ((_INT) * 4) + ((_ITR) * 0x40)) +/* For VF with 64 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing + * is 0x100 and base register offset is 0x00002C00 + */ +#define VF_INT_ITRN_64(_INT, _ITR) \ + (0x00002C00 + ((_INT) * 4) + ((_ITR) * 0x100)) +/* For VF with 2k vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing + * is 0x2000 and base register offset is 0x00072000 + */ +#define VF_INT_ITRN_2K(_INT, _ITR) \ + (0x00072000 + ((_INT) * 4) + ((_ITR) * 0x2000)) +#define VF_INT_ITRN_MAX_INDEX 2 +#define VF_INT_ITRN_INTERVAL_S 0 +#define VF_INT_ITRN_INTERVAL_M GENMASK(11, 0) +#define VF_INT_PBA_CLEAR 0x00008900 + +#define VF_INT_ICR0_ENA1 0x00005000 +#define VF_INT_ICR0_ENA1_ADMINQ_S 30 +#define VF_INT_ICR0_ENA1_ADMINQ_M BIT(VF_INT_ICR0_ENA1_ADMINQ_S) +#define VF_INT_ICR0_ENA1_RSVD_S 31 +#define VF_INT_ICR01 0x00004800 +#define VF_QF_HENA(_i) (0x0000C400 + ((_i) * 4)) +#define VF_QF_HENA_MAX_INDX 1 +#define VF_QF_HKEY(_i) (0x0000CC00 + ((_i) * 4)) +#define VF_QF_HKEY_MAX_INDX 12 +#define VF_QF_HLUT(_i) (0x0000D000 + ((_i) * 4)) +#define VF_QF_HLUT_MAX_INDX 15 +#endif diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c new file mode 100644 index 000000000000..19809b0ddcd9 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -0,0 +1,2379 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" + +static const struct net_device_ops idpf_netdev_ops_splitq; +static const struct net_device_ops idpf_netdev_ops_singleq; + +const char * const idpf_vport_vc_state_str[] = { + IDPF_FOREACH_VPORT_VC_STATE(IDPF_GEN_STRING) +}; + +/** + * idpf_init_vector_stack - Fill the MSIX vector stack with vector index + * @adapter: private data struct + * + * Return 0 on success, error on failure + */ +static int idpf_init_vector_stack(struct idpf_adapter *adapter) +{ + struct idpf_vector_lifo *stack; + u16 min_vec; + u32 i; + + mutex_lock(&adapter->vector_lock); + min_vec = adapter->num_msix_entries - adapter->num_avail_msix; + stack = &adapter->vector_stack; + stack->size = adapter->num_msix_entries; + /* set the base and top to point at start of the 'free pool' to + * distribute the unused vectors on-demand basis + */ + stack->base = min_vec; + stack->top = min_vec; + + stack->vec_idx = kcalloc(stack->size, sizeof(u16), GFP_KERNEL); + if (!stack->vec_idx) { + mutex_unlock(&adapter->vector_lock); + + return -ENOMEM; + } + + for (i = 0; i < stack->size; i++) + stack->vec_idx[i] = i; + + mutex_unlock(&adapter->vector_lock); + + return 0; +} + +/** + * idpf_deinit_vector_stack - zero out the MSIX vector stack + * @adapter: private data struct + */ +static void idpf_deinit_vector_stack(struct idpf_adapter *adapter) +{ + struct idpf_vector_lifo *stack; + + mutex_lock(&adapter->vector_lock); + stack = &adapter->vector_stack; + kfree(stack->vec_idx); + stack->vec_idx = NULL; + mutex_unlock(&adapter->vector_lock); +} + +/** + * idpf_mb_intr_rel_irq - Free the IRQ association with the OS + * @adapter: adapter structure + * + * This will also disable interrupt mode and queue up mailbox task. Mailbox + * task will reschedule itself if not in interrupt mode. + */ +static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter) +{ + clear_bit(IDPF_MB_INTR_MODE, adapter->flags); + free_irq(adapter->msix_entries[0].vector, adapter); + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); +} + +/** + * idpf_intr_rel - Release interrupt capabilities and free memory + * @adapter: adapter to disable interrupts on + */ +void idpf_intr_rel(struct idpf_adapter *adapter) +{ + int err; + + if (!adapter->msix_entries) + return; + + idpf_mb_intr_rel_irq(adapter); + pci_free_irq_vectors(adapter->pdev); + + err = idpf_send_dealloc_vectors_msg(adapter); + if (err) + dev_err(&adapter->pdev->dev, + "Failed to deallocate vectors: %d\n", err); + + idpf_deinit_vector_stack(adapter); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; +} + +/** + * idpf_mb_intr_clean - Interrupt handler for the mailbox + * @irq: interrupt number + * @data: pointer to the adapter structure + */ +static irqreturn_t idpf_mb_intr_clean(int __always_unused irq, void *data) +{ + struct idpf_adapter *adapter = (struct idpf_adapter *)data; + + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); + + return IRQ_HANDLED; +} + +/** + * idpf_mb_irq_enable - Enable MSIX interrupt for the mailbox + * @adapter: adapter to get the hardware address for register write + */ +static void idpf_mb_irq_enable(struct idpf_adapter *adapter) +{ + struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg; + u32 val; + + val = intr->dyn_ctl_intena_m | intr->dyn_ctl_itridx_m; + writel(val, intr->dyn_ctl); + writel(intr->icr_ena_ctlq_m, intr->icr_ena); +} + +/** + * idpf_mb_intr_req_irq - Request irq for the mailbox interrupt + * @adapter: adapter structure to pass to the mailbox irq handler + */ +static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter) +{ + struct idpf_q_vector *mb_vector = &adapter->mb_vector; + int irq_num, mb_vidx = 0, err; + + irq_num = adapter->msix_entries[mb_vidx].vector; + mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", + dev_driver_string(&adapter->pdev->dev), + "Mailbox", mb_vidx); + err = request_irq(irq_num, adapter->irq_mb_handler, 0, + mb_vector->name, adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "IRQ request for mailbox failed, error: %d\n", err); + + return err; + } + + set_bit(IDPF_MB_INTR_MODE, adapter->flags); + + return 0; +} + +/** + * idpf_set_mb_vec_id - Set vector index for mailbox + * @adapter: adapter structure to access the vector chunks + * + * The first vector id in the requested vector chunks from the CP is for + * the mailbox + */ +static void idpf_set_mb_vec_id(struct idpf_adapter *adapter) +{ + if (adapter->req_vec_chunks) + adapter->mb_vector.v_idx = + le16_to_cpu(adapter->caps.mailbox_vector_id); + else + adapter->mb_vector.v_idx = 0; +} + +/** + * idpf_mb_intr_init - Initialize the mailbox interrupt + * @adapter: adapter structure to store the mailbox vector + */ +static int idpf_mb_intr_init(struct idpf_adapter *adapter) +{ + adapter->dev_ops.reg_ops.mb_intr_reg_init(adapter); + adapter->irq_mb_handler = idpf_mb_intr_clean; + + return idpf_mb_intr_req_irq(adapter); +} + +/** + * idpf_vector_lifo_push - push MSIX vector index onto stack + * @adapter: private data struct + * @vec_idx: vector index to store + */ +static int idpf_vector_lifo_push(struct idpf_adapter *adapter, u16 vec_idx) +{ + struct idpf_vector_lifo *stack = &adapter->vector_stack; + + lockdep_assert_held(&adapter->vector_lock); + + if (stack->top == stack->base) { + dev_err(&adapter->pdev->dev, "Exceeded the vector stack limit: %d\n", + stack->top); + return -EINVAL; + } + + stack->vec_idx[--stack->top] = vec_idx; + + return 0; +} + +/** + * idpf_vector_lifo_pop - pop MSIX vector index from stack + * @adapter: private data struct + */ +static int idpf_vector_lifo_pop(struct idpf_adapter *adapter) +{ + struct idpf_vector_lifo *stack = &adapter->vector_stack; + + lockdep_assert_held(&adapter->vector_lock); + + if (stack->top == stack->size) { + dev_err(&adapter->pdev->dev, "No interrupt vectors are available to distribute!\n"); + + return -EINVAL; + } + + return stack->vec_idx[stack->top++]; +} + +/** + * idpf_vector_stash - Store the vector indexes onto the stack + * @adapter: private data struct + * @q_vector_idxs: vector index array + * @vec_info: info related to the number of vectors + * + * This function is a no-op if there are no vectors indexes to be stashed + */ +static void idpf_vector_stash(struct idpf_adapter *adapter, u16 *q_vector_idxs, + struct idpf_vector_info *vec_info) +{ + int i, base = 0; + u16 vec_idx; + + lockdep_assert_held(&adapter->vector_lock); + + if (!vec_info->num_curr_vecs) + return; + + /* For default vports, no need to stash vector allocated from the + * default pool onto the stack + */ + if (vec_info->default_vport) + base = IDPF_MIN_Q_VEC; + + for (i = vec_info->num_curr_vecs - 1; i >= base ; i--) { + vec_idx = q_vector_idxs[i]; + idpf_vector_lifo_push(adapter, vec_idx); + adapter->num_avail_msix++; + } +} + +/** + * idpf_req_rel_vector_indexes - Request or release MSIX vector indexes + * @adapter: driver specific private structure + * @q_vector_idxs: vector index array + * @vec_info: info related to the number of vectors + * + * This is the core function to distribute the MSIX vectors acquired from the + * OS. It expects the caller to pass the number of vectors required and + * also previously allocated. First, it stashes previously allocated vector + * indexes on to the stack and then figures out if it can allocate requested + * vectors. It can wait on acquiring the mutex lock. If the caller passes 0 as + * requested vectors, then this function just stashes the already allocated + * vectors and returns 0. + * + * Returns actual number of vectors allocated on success, error value on failure + * If 0 is returned, implies the stack has no vectors to allocate which is also + * a failure case for the caller + */ +int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter, + u16 *q_vector_idxs, + struct idpf_vector_info *vec_info) +{ + u16 num_req_vecs, num_alloc_vecs = 0, max_vecs; + struct idpf_vector_lifo *stack; + int i, j, vecid; + + mutex_lock(&adapter->vector_lock); + stack = &adapter->vector_stack; + num_req_vecs = vec_info->num_req_vecs; + + /* Stash interrupt vector indexes onto the stack if required */ + idpf_vector_stash(adapter, q_vector_idxs, vec_info); + + if (!num_req_vecs) + goto rel_lock; + + if (vec_info->default_vport) { + /* As IDPF_MIN_Q_VEC per default vport is put aside in the + * default pool of the stack, use them for default vports + */ + j = vec_info->index * IDPF_MIN_Q_VEC + IDPF_MBX_Q_VEC; + for (i = 0; i < IDPF_MIN_Q_VEC; i++) { + q_vector_idxs[num_alloc_vecs++] = stack->vec_idx[j++]; + num_req_vecs--; + } + } + + /* Find if stack has enough vector to allocate */ + max_vecs = min(adapter->num_avail_msix, num_req_vecs); + + for (j = 0; j < max_vecs; j++) { + vecid = idpf_vector_lifo_pop(adapter); + q_vector_idxs[num_alloc_vecs++] = vecid; + } + adapter->num_avail_msix -= max_vecs; + +rel_lock: + mutex_unlock(&adapter->vector_lock); + + return num_alloc_vecs; +} + +/** + * idpf_intr_req - Request interrupt capabilities + * @adapter: adapter to enable interrupts on + * + * Returns 0 on success, negative on failure + */ +int idpf_intr_req(struct idpf_adapter *adapter) +{ + u16 default_vports = idpf_get_default_vports(adapter); + int num_q_vecs, total_vecs, num_vec_ids; + int min_vectors, v_actual, err; + unsigned int vector; + u16 *vecids; + + total_vecs = idpf_get_reserved_vecs(adapter); + num_q_vecs = total_vecs - IDPF_MBX_Q_VEC; + + err = idpf_send_alloc_vectors_msg(adapter, num_q_vecs); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to allocate %d vectors: %d\n", num_q_vecs, err); + + return -EAGAIN; + } + + min_vectors = IDPF_MBX_Q_VEC + IDPF_MIN_Q_VEC * default_vports; + v_actual = pci_alloc_irq_vectors(adapter->pdev, min_vectors, + total_vecs, PCI_IRQ_MSIX); + if (v_actual < min_vectors) { + dev_err(&adapter->pdev->dev, "Failed to allocate MSIX vectors: %d\n", + v_actual); + err = -EAGAIN; + goto send_dealloc_vecs; + } + + adapter->msix_entries = kcalloc(v_actual, sizeof(struct msix_entry), + GFP_KERNEL); + + if (!adapter->msix_entries) { + err = -ENOMEM; + goto free_irq; + } + + idpf_set_mb_vec_id(adapter); + + vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL); + if (!vecids) { + err = -ENOMEM; + goto free_msix; + } + + if (adapter->req_vec_chunks) { + struct virtchnl2_vector_chunks *vchunks; + struct virtchnl2_alloc_vectors *ac; + + ac = adapter->req_vec_chunks; + vchunks = &ac->vchunks; + + num_vec_ids = idpf_get_vec_ids(adapter, vecids, total_vecs, + vchunks); + if (num_vec_ids < v_actual) { + err = -EINVAL; + goto free_vecids; + } + } else { + int i; + + for (i = 0; i < v_actual; i++) + vecids[i] = i; + } + + for (vector = 0; vector < v_actual; vector++) { + adapter->msix_entries[vector].entry = vecids[vector]; + adapter->msix_entries[vector].vector = + pci_irq_vector(adapter->pdev, vector); + } + + adapter->num_req_msix = total_vecs; + adapter->num_msix_entries = v_actual; + /* 'num_avail_msix' is used to distribute excess vectors to the vports + * after considering the minimum vectors required per each default + * vport + */ + adapter->num_avail_msix = v_actual - min_vectors; + + /* Fill MSIX vector lifo stack with vector indexes */ + err = idpf_init_vector_stack(adapter); + if (err) + goto free_vecids; + + err = idpf_mb_intr_init(adapter); + if (err) + goto deinit_vec_stack; + idpf_mb_irq_enable(adapter); + kfree(vecids); + + return 0; + +deinit_vec_stack: + idpf_deinit_vector_stack(adapter); +free_vecids: + kfree(vecids); +free_msix: + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; +free_irq: + pci_free_irq_vectors(adapter->pdev); +send_dealloc_vecs: + idpf_send_dealloc_vectors_msg(adapter); + + return err; +} + +/** + * idpf_find_mac_filter - Search filter list for specific mac filter + * @vconfig: Vport config structure + * @macaddr: The MAC address + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * mac_filter_list_lock. + **/ +static struct idpf_mac_filter *idpf_find_mac_filter(struct idpf_vport_config *vconfig, + const u8 *macaddr) +{ + struct idpf_mac_filter *f; + + if (!macaddr) + return NULL; + + list_for_each_entry(f, &vconfig->user_config.mac_filter_list, list) { + if (ether_addr_equal(macaddr, f->macaddr)) + return f; + } + + return NULL; +} + +/** + * __idpf_del_mac_filter - Delete a MAC filter from the filter list + * @vport_config: Vport config structure + * @macaddr: The MAC address + * + * Returns 0 on success, error value on failure + **/ +static int __idpf_del_mac_filter(struct idpf_vport_config *vport_config, + const u8 *macaddr) +{ + struct idpf_mac_filter *f; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + f = idpf_find_mac_filter(vport_config, macaddr); + if (f) { + list_del(&f->list); + kfree(f); + } + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; +} + +/** + * idpf_del_mac_filter - Delete a MAC filter from the filter list + * @vport: Main vport structure + * @np: Netdev private structure + * @macaddr: The MAC address + * @async: Don't wait for return message + * + * Removes filter from list and if interface is up, tells hardware about the + * removed filter. + **/ +static int idpf_del_mac_filter(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + const u8 *macaddr, bool async) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = np->adapter->vport_config[np->vport_idx]; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + f = idpf_find_mac_filter(vport_config, macaddr); + if (f) { + f->remove = true; + } else { + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return -EINVAL; + } + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + if (np->state == __IDPF_VPORT_UP) { + int err; + + err = idpf_add_del_mac_filters(vport, np, false, async); + if (err) + return err; + } + + return __idpf_del_mac_filter(vport_config, macaddr); +} + +/** + * __idpf_add_mac_filter - Add mac filter helper function + * @vport_config: Vport config structure + * @macaddr: Address to add + * + * Takes mac_filter_list_lock spinlock to add new filter to list. + */ +static int __idpf_add_mac_filter(struct idpf_vport_config *vport_config, + const u8 *macaddr) +{ + struct idpf_mac_filter *f; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + + f = idpf_find_mac_filter(vport_config, macaddr); + if (f) { + f->remove = false; + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; + } + + f = kzalloc(sizeof(*f), GFP_ATOMIC); + if (!f) { + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return -ENOMEM; + } + + ether_addr_copy(f->macaddr, macaddr); + list_add_tail(&f->list, &vport_config->user_config.mac_filter_list); + f->add = true; + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; +} + +/** + * idpf_add_mac_filter - Add a mac filter to the filter list + * @vport: Main vport structure + * @np: Netdev private structure + * @macaddr: The MAC address + * @async: Don't wait for return message + * + * Returns 0 on success or error on failure. If interface is up, we'll also + * send the virtchnl message to tell hardware about the filter. + **/ +static int idpf_add_mac_filter(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + const u8 *macaddr, bool async) +{ + struct idpf_vport_config *vport_config; + int err; + + vport_config = np->adapter->vport_config[np->vport_idx]; + err = __idpf_add_mac_filter(vport_config, macaddr); + if (err) + return err; + + if (np->state == __IDPF_VPORT_UP) + err = idpf_add_del_mac_filters(vport, np, true, async); + + return err; +} + +/** + * idpf_del_all_mac_filters - Delete all MAC filters in list + * @vport: main vport struct + * + * Takes mac_filter_list_lock spinlock. Deletes all filters + */ +static void idpf_del_all_mac_filters(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f, *ftmp; + + vport_config = vport->adapter->vport_config[vport->idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + list_for_each_entry_safe(f, ftmp, &vport_config->user_config.mac_filter_list, + list) { + list_del(&f->list); + kfree(f); + } + + spin_unlock_bh(&vport_config->mac_filter_list_lock); +} + +/** + * idpf_restore_mac_filters - Re-add all MAC filters in list + * @vport: main vport struct + * + * Takes mac_filter_list_lock spinlock. Sets add field to true for filters to + * resync filters back to HW. + */ +static void idpf_restore_mac_filters(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = vport->adapter->vport_config[vport->idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list) + f->add = true; + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev), + true, false); +} + +/** + * idpf_remove_mac_filters - Remove all MAC filters in list + * @vport: main vport struct + * + * Takes mac_filter_list_lock spinlock. Sets remove field to true for filters + * to remove filters in HW. + */ +static void idpf_remove_mac_filters(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = vport->adapter->vport_config[vport->idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list) + f->remove = true; + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev), + false, false); +} + +/** + * idpf_deinit_mac_addr - deinitialize mac address for vport + * @vport: main vport structure + */ +static void idpf_deinit_mac_addr(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = vport->adapter->vport_config[vport->idx]; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + + f = idpf_find_mac_filter(vport_config, vport->default_mac_addr); + if (f) { + list_del(&f->list); + kfree(f); + } + + spin_unlock_bh(&vport_config->mac_filter_list_lock); +} + +/** + * idpf_init_mac_addr - initialize mac address for vport + * @vport: main vport structure + * @netdev: pointer to netdev struct associated with this vport + */ +static int idpf_init_mac_addr(struct idpf_vport *vport, + struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_adapter *adapter = vport->adapter; + int err; + + if (is_valid_ether_addr(vport->default_mac_addr)) { + eth_hw_addr_set(netdev, vport->default_mac_addr); + ether_addr_copy(netdev->perm_addr, vport->default_mac_addr); + + return idpf_add_mac_filter(vport, np, vport->default_mac_addr, + false); + } + + if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_MACFILTER)) { + dev_err(&adapter->pdev->dev, + "MAC address is not provided and capability is not set\n"); + + return -EINVAL; + } + + eth_hw_addr_random(netdev); + err = idpf_add_mac_filter(vport, np, netdev->dev_addr, false); + if (err) + return err; + + dev_info(&adapter->pdev->dev, "Invalid MAC address %pM, using random %pM\n", + vport->default_mac_addr, netdev->dev_addr); + ether_addr_copy(vport->default_mac_addr, netdev->dev_addr); + + return 0; +} + +/** + * idpf_cfg_netdev - Allocate, configure and register a netdev + * @vport: main vport structure + * + * Returns 0 on success, negative value on failure. + */ +static int idpf_cfg_netdev(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport_config *vport_config; + netdev_features_t dflt_features; + netdev_features_t offloads = 0; + struct idpf_netdev_priv *np; + struct net_device *netdev; + u16 idx = vport->idx; + int err; + + vport_config = adapter->vport_config[idx]; + + /* It's possible we already have a netdev allocated and registered for + * this vport + */ + if (test_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags)) { + netdev = adapter->netdevs[idx]; + np = netdev_priv(netdev); + np->vport = vport; + np->vport_idx = vport->idx; + np->vport_id = vport->vport_id; + vport->netdev = netdev; + + return idpf_init_mac_addr(vport, netdev); + } + + netdev = alloc_etherdev_mqs(sizeof(struct idpf_netdev_priv), + vport_config->max_q.max_txq, + vport_config->max_q.max_rxq); + if (!netdev) + return -ENOMEM; + + vport->netdev = netdev; + np = netdev_priv(netdev); + np->vport = vport; + np->adapter = adapter; + np->vport_idx = vport->idx; + np->vport_id = vport->vport_id; + + spin_lock_init(&np->stats_lock); + + err = idpf_init_mac_addr(vport, netdev); + if (err) { + free_netdev(vport->netdev); + vport->netdev = NULL; + + return err; + } + + /* assign netdev_ops */ + if (idpf_is_queue_model_split(vport->txq_model)) + netdev->netdev_ops = &idpf_netdev_ops_splitq; + else + netdev->netdev_ops = &idpf_netdev_ops_singleq; + + /* setup watchdog timeout value to be 5 second */ + netdev->watchdog_timeo = 5 * HZ; + + /* configure default MTU size */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = vport->max_mtu; + + dflt_features = NETIF_F_SG | + NETIF_F_HIGHDMA; + + if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) + dflt_features |= NETIF_F_RXHASH; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4)) + dflt_features |= NETIF_F_IP_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6)) + dflt_features |= NETIF_F_IPV6_CSUM; + if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM)) + dflt_features |= NETIF_F_RXCSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM)) + dflt_features |= NETIF_F_SCTP_CRC; + + if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP)) + dflt_features |= NETIF_F_TSO; + if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP)) + dflt_features |= NETIF_F_TSO6; + if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS, + VIRTCHNL2_CAP_SEG_IPV4_UDP | + VIRTCHNL2_CAP_SEG_IPV6_UDP)) + dflt_features |= NETIF_F_GSO_UDP_L4; + if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC)) + offloads |= NETIF_F_GRO_HW; + /* advertise to stack only if offloads for encapsulated packets is + * supported + */ + if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS, + VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) { + offloads |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | + 0; + + if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS, + IDPF_CAP_TUNNEL_TX_CSUM)) + netdev->gso_partial_features |= + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; + offloads |= NETIF_F_TSO_MANGLEID; + } + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK)) + offloads |= NETIF_F_LOOPBACK; + + netdev->features |= dflt_features; + netdev->hw_features |= dflt_features | offloads; + netdev->hw_enc_features |= dflt_features | offloads; + idpf_set_ethtool_ops(netdev); + SET_NETDEV_DEV(netdev, &adapter->pdev->dev); + + /* carrier off on init to avoid Tx hangs */ + netif_carrier_off(netdev); + + /* make sure transmit queues start off as stopped */ + netif_tx_stop_all_queues(netdev); + + /* The vport can be arbitrarily released so we need to also track + * netdevs in the adapter struct + */ + adapter->netdevs[idx] = netdev; + + return 0; +} + +/** + * idpf_get_free_slot - get the next non-NULL location index in array + * @adapter: adapter in which to look for a free vport slot + */ +static int idpf_get_free_slot(struct idpf_adapter *adapter) +{ + unsigned int i; + + for (i = 0; i < adapter->max_vports; i++) { + if (!adapter->vports[i]) + return i; + } + + return IDPF_NO_FREE_SLOT; +} + +/** + * idpf_remove_features - Turn off feature configs + * @vport: virtual port structure + */ +static void idpf_remove_features(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) + idpf_remove_mac_filters(vport); +} + +/** + * idpf_vport_stop - Disable a vport + * @vport: vport to disable + */ +static void idpf_vport_stop(struct idpf_vport *vport) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + + if (np->state <= __IDPF_VPORT_DOWN) + return; + + netif_carrier_off(vport->netdev); + netif_tx_disable(vport->netdev); + + idpf_send_disable_vport_msg(vport); + idpf_send_disable_queues_msg(vport); + idpf_send_map_unmap_queue_vector_msg(vport, false); + /* Normally we ask for queues in create_vport, but if the number of + * initially requested queues have changed, for example via ethtool + * set channels, we do delete queues and then add the queues back + * instead of deleting and reallocating the vport. + */ + if (test_and_clear_bit(IDPF_VPORT_DEL_QUEUES, vport->flags)) + idpf_send_delete_queues_msg(vport); + + idpf_remove_features(vport); + + vport->link_up = false; + idpf_vport_intr_deinit(vport); + idpf_vport_intr_rel(vport); + idpf_vport_queues_rel(vport); + np->state = __IDPF_VPORT_DOWN; +} + +/** + * idpf_stop - Disables a network interface + * @netdev: network interface device structure + * + * The stop entry point is called when an interface is de-activated by the OS, + * and the netdevice enters the DOWN state. The hardware is still under the + * driver's control, but the netdev interface is disabled. + * + * Returns success only - not allowed to fail + */ +static int idpf_stop(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + + if (test_bit(IDPF_REMOVE_IN_PROG, np->adapter->flags)) + return 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + idpf_vport_stop(vport); + + idpf_vport_ctrl_unlock(netdev); + + return 0; +} + +/** + * idpf_decfg_netdev - Unregister the netdev + * @vport: vport for which netdev to be unregistered + */ +static void idpf_decfg_netdev(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + + unregister_netdev(vport->netdev); + free_netdev(vport->netdev); + vport->netdev = NULL; + + adapter->netdevs[vport->idx] = NULL; +} + +/** + * idpf_vport_rel - Delete a vport and free its resources + * @vport: the vport being removed + */ +static void idpf_vport_rel(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport_config *vport_config; + struct idpf_vector_info vec_info; + struct idpf_rss_data *rss_data; + struct idpf_vport_max_q max_q; + u16 idx = vport->idx; + int i; + + vport_config = adapter->vport_config[vport->idx]; + idpf_deinit_rss(vport); + rss_data = &vport_config->user_config.rss_data; + kfree(rss_data->rss_key); + rss_data->rss_key = NULL; + + idpf_send_destroy_vport_msg(vport); + + /* Set all bits as we dont know on which vc_state the vport vhnl_wq + * is waiting on and wakeup the virtchnl workqueue even if it is + * waiting for the response as we are going down + */ + for (i = 0; i < IDPF_VC_NBITS; i++) + set_bit(i, vport->vc_state); + wake_up(&vport->vchnl_wq); + + mutex_destroy(&vport->vc_buf_lock); + + /* Clear all the bits */ + for (i = 0; i < IDPF_VC_NBITS; i++) + clear_bit(i, vport->vc_state); + + /* Release all max queues allocated to the adapter's pool */ + max_q.max_rxq = vport_config->max_q.max_rxq; + max_q.max_txq = vport_config->max_q.max_txq; + max_q.max_bufq = vport_config->max_q.max_bufq; + max_q.max_complq = vport_config->max_q.max_complq; + idpf_vport_dealloc_max_qs(adapter, &max_q); + + /* Release all the allocated vectors on the stack */ + vec_info.num_req_vecs = 0; + vec_info.num_curr_vecs = vport->num_q_vectors; + vec_info.default_vport = vport->default_vport; + + idpf_req_rel_vector_indexes(adapter, vport->q_vector_idxs, &vec_info); + + kfree(vport->q_vector_idxs); + vport->q_vector_idxs = NULL; + + kfree(adapter->vport_params_recvd[idx]); + adapter->vport_params_recvd[idx] = NULL; + kfree(adapter->vport_params_reqd[idx]); + adapter->vport_params_reqd[idx] = NULL; + if (adapter->vport_config[idx]) { + kfree(adapter->vport_config[idx]->req_qs_chunks); + adapter->vport_config[idx]->req_qs_chunks = NULL; + } + kfree(vport); + adapter->num_alloc_vports--; +} + +/** + * idpf_vport_dealloc - cleanup and release a given vport + * @vport: pointer to idpf vport structure + * + * returns nothing + */ +static void idpf_vport_dealloc(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + unsigned int i = vport->idx; + + idpf_deinit_mac_addr(vport); + idpf_vport_stop(vport); + + if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) + idpf_decfg_netdev(vport); + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + idpf_del_all_mac_filters(vport); + + if (adapter->netdevs[i]) { + struct idpf_netdev_priv *np = netdev_priv(adapter->netdevs[i]); + + np->vport = NULL; + } + + idpf_vport_rel(vport); + + adapter->vports[i] = NULL; + adapter->next_vport = idpf_get_free_slot(adapter); +} + +/** + * idpf_vport_alloc - Allocates the next available struct vport in the adapter + * @adapter: board private structure + * @max_q: vport max queue info + * + * returns a pointer to a vport on success, NULL on failure. + */ +static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct idpf_rss_data *rss_data; + u16 idx = adapter->next_vport; + struct idpf_vport *vport; + u16 num_max_q; + + if (idx == IDPF_NO_FREE_SLOT) + return NULL; + + vport = kzalloc(sizeof(*vport), GFP_KERNEL); + if (!vport) + return vport; + + if (!adapter->vport_config[idx]) { + struct idpf_vport_config *vport_config; + + vport_config = kzalloc(sizeof(*vport_config), GFP_KERNEL); + if (!vport_config) { + kfree(vport); + + return NULL; + } + + adapter->vport_config[idx] = vport_config; + } + + vport->idx = idx; + vport->adapter = adapter; + vport->compln_clean_budget = IDPF_TX_COMPLQ_CLEAN_BUDGET; + vport->default_vport = adapter->num_alloc_vports < + idpf_get_default_vports(adapter); + + num_max_q = max(max_q->max_txq, max_q->max_rxq); + vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL); + if (!vport->q_vector_idxs) { + kfree(vport); + + return NULL; + } + idpf_vport_init(vport, max_q); + + /* This alloc is done separate from the LUT because it's not strictly + * dependent on how many queues we have. If we change number of queues + * and soft reset we'll need a new LUT but the key can remain the same + * for as long as the vport exists. + */ + rss_data = &adapter->vport_config[idx]->user_config.rss_data; + rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL); + if (!rss_data->rss_key) { + kfree(vport); + + return NULL; + } + /* Initialize default rss key */ + netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size); + + /* fill vport slot in the adapter struct */ + adapter->vports[idx] = vport; + adapter->vport_ids[idx] = idpf_get_vport_id(vport); + + adapter->num_alloc_vports++; + /* prepare adapter->next_vport for next use */ + adapter->next_vport = idpf_get_free_slot(adapter); + + return vport; +} + +/** + * idpf_get_stats64 - get statistics for network device structure + * @netdev: network interface device structure + * @stats: main device statistics structure + */ +static void idpf_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + spin_lock_bh(&np->stats_lock); + *stats = np->netstats; + spin_unlock_bh(&np->stats_lock); +} + +/** + * idpf_statistics_task - Delayed task to get statistics over mailbox + * @work: work_struct handle to our data + */ +void idpf_statistics_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + int i; + + adapter = container_of(work, struct idpf_adapter, stats_task.work); + + for (i = 0; i < adapter->max_vports; i++) { + struct idpf_vport *vport = adapter->vports[i]; + + if (vport && !test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) + idpf_send_get_stats_msg(vport); + } + + queue_delayed_work(adapter->stats_wq, &adapter->stats_task, + msecs_to_jiffies(10000)); +} + +/** + * idpf_mbx_task - Delayed task to handle mailbox responses + * @work: work_struct handle + */ +void idpf_mbx_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + + adapter = container_of(work, struct idpf_adapter, mbx_task.work); + + if (test_bit(IDPF_MB_INTR_MODE, adapter->flags)) + idpf_mb_irq_enable(adapter); + else + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, + msecs_to_jiffies(300)); + + idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_UNKNOWN, NULL, 0); +} + +/** + * idpf_service_task - Delayed task for handling mailbox responses + * @work: work_struct handle to our data + * + */ +void idpf_service_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + + adapter = container_of(work, struct idpf_adapter, serv_task.work); + + if (idpf_is_reset_detected(adapter) && + !idpf_is_reset_in_prog(adapter) && + !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) { + dev_info(&adapter->pdev->dev, "HW reset detected\n"); + set_bit(IDPF_HR_FUNC_RESET, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, + &adapter->vc_event_task, + msecs_to_jiffies(10)); + } + + queue_delayed_work(adapter->serv_wq, &adapter->serv_task, + msecs_to_jiffies(300)); +} + +/** + * idpf_restore_features - Restore feature configs + * @vport: virtual port structure + */ +static void idpf_restore_features(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) + idpf_restore_mac_filters(vport); +} + +/** + * idpf_set_real_num_queues - set number of queues for netdev + * @vport: virtual port structure + * + * Returns 0 on success, negative on failure. + */ +static int idpf_set_real_num_queues(struct idpf_vport *vport) +{ + int err; + + err = netif_set_real_num_rx_queues(vport->netdev, vport->num_rxq); + if (err) + return err; + + return netif_set_real_num_tx_queues(vport->netdev, vport->num_txq); +} + +/** + * idpf_up_complete - Complete interface up sequence + * @vport: virtual port structure + * + * Returns 0 on success, negative on failure. + */ +static int idpf_up_complete(struct idpf_vport *vport) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + + if (vport->link_up && !netif_carrier_ok(vport->netdev)) { + netif_carrier_on(vport->netdev); + netif_tx_start_all_queues(vport->netdev); + } + + np->state = __IDPF_VPORT_UP; + + return 0; +} + +/** + * idpf_rx_init_buf_tail - Write initial buffer ring tail value + * @vport: virtual port struct + */ +static void idpf_rx_init_buf_tail(struct idpf_vport *vport) +{ + int i, j; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *grp = &vport->rxq_grps[i]; + + if (idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_queue *q = + &grp->splitq.bufq_sets[j].bufq; + + writel(q->next_to_alloc, q->tail); + } + } else { + for (j = 0; j < grp->singleq.num_rxq; j++) { + struct idpf_queue *q = + grp->singleq.rxqs[j]; + + writel(q->next_to_alloc, q->tail); + } + } + } +} + +/** + * idpf_vport_open - Bring up a vport + * @vport: vport to bring up + * @alloc_res: allocate queue resources + */ +static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport_config *vport_config; + int err; + + if (np->state != __IDPF_VPORT_DOWN) + return -EBUSY; + + /* we do not allow interface up just yet */ + netif_carrier_off(vport->netdev); + + if (alloc_res) { + err = idpf_vport_queues_alloc(vport); + if (err) + return err; + } + + err = idpf_vport_intr_alloc(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", + vport->vport_id, err); + goto queues_rel; + } + + err = idpf_vport_queue_ids_init(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + err = idpf_vport_intr_init(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + err = idpf_rx_bufs_init_all(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + err = idpf_queue_reg_init(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + idpf_rx_init_buf_tail(vport); + + err = idpf_send_config_queues_msg(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to configure queues for vport %u, %d\n", + vport->vport_id, err); + goto intr_deinit; + } + + err = idpf_send_map_unmap_queue_vector_msg(vport, true); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to map queue vectors for vport %u: %d\n", + vport->vport_id, err); + goto intr_deinit; + } + + err = idpf_send_enable_queues_msg(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to enable queues for vport %u: %d\n", + vport->vport_id, err); + goto unmap_queue_vectors; + } + + err = idpf_send_enable_vport_msg(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to enable vport %u: %d\n", + vport->vport_id, err); + err = -EAGAIN; + goto disable_queues; + } + + idpf_restore_features(vport); + + vport_config = adapter->vport_config[vport->idx]; + if (vport_config->user_config.rss_data.rss_lut) + err = idpf_config_rss(vport); + else + err = idpf_init_rss(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize RSS for vport %u: %d\n", + vport->vport_id, err); + goto disable_vport; + } + + err = idpf_up_complete(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to complete interface up for vport %u: %d\n", + vport->vport_id, err); + goto deinit_rss; + } + + return 0; + +deinit_rss: + idpf_deinit_rss(vport); +disable_vport: + idpf_send_disable_vport_msg(vport); +disable_queues: + idpf_send_disable_queues_msg(vport); +unmap_queue_vectors: + idpf_send_map_unmap_queue_vector_msg(vport, false); +intr_deinit: + idpf_vport_intr_deinit(vport); +intr_rel: + idpf_vport_intr_rel(vport); +queues_rel: + idpf_vport_queues_rel(vport); + + return err; +} + +/** + * idpf_init_task - Delayed initialization task + * @work: work_struct handle to our data + * + * Init task finishes up pending work started in probe. Due to the asynchronous + * nature in which the device communicates with hardware, we may have to wait + * several milliseconds to get a response. Instead of busy polling in probe, + * pulling it out into a delayed work task prevents us from bogging down the + * whole system waiting for a response from hardware. + */ +void idpf_init_task(struct work_struct *work) +{ + struct idpf_vport_config *vport_config; + struct idpf_vport_max_q max_q; + struct idpf_adapter *adapter; + struct idpf_netdev_priv *np; + struct idpf_vport *vport; + u16 num_default_vports; + struct pci_dev *pdev; + bool default_vport; + int index, err; + + adapter = container_of(work, struct idpf_adapter, init_task.work); + + num_default_vports = idpf_get_default_vports(adapter); + if (adapter->num_alloc_vports < num_default_vports) + default_vport = true; + else + default_vport = false; + + err = idpf_vport_alloc_max_qs(adapter, &max_q); + if (err) + goto unwind_vports; + + err = idpf_send_create_vport_msg(adapter, &max_q); + if (err) { + idpf_vport_dealloc_max_qs(adapter, &max_q); + goto unwind_vports; + } + + pdev = adapter->pdev; + vport = idpf_vport_alloc(adapter, &max_q); + if (!vport) { + err = -EFAULT; + dev_err(&pdev->dev, "failed to allocate vport: %d\n", + err); + idpf_vport_dealloc_max_qs(adapter, &max_q); + goto unwind_vports; + } + + index = vport->idx; + vport_config = adapter->vport_config[index]; + + init_waitqueue_head(&vport->sw_marker_wq); + init_waitqueue_head(&vport->vchnl_wq); + + mutex_init(&vport->vc_buf_lock); + spin_lock_init(&vport_config->mac_filter_list_lock); + + INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list); + + err = idpf_check_supported_desc_ids(vport); + if (err) { + dev_err(&pdev->dev, "failed to get required descriptor ids\n"); + goto cfg_netdev_err; + } + + if (idpf_cfg_netdev(vport)) + goto cfg_netdev_err; + + err = idpf_send_get_rx_ptype_msg(vport); + if (err) + goto handle_err; + + /* Once state is put into DOWN, driver is ready for dev_open */ + np = netdev_priv(vport->netdev); + np->state = __IDPF_VPORT_DOWN; + if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) + idpf_vport_open(vport, true); + + /* Spawn and return 'idpf_init_task' work queue until all the + * default vports are created + */ + if (adapter->num_alloc_vports < num_default_vports) { + queue_delayed_work(adapter->init_wq, &adapter->init_task, + msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07))); + + return; + } + + for (index = 0; index < adapter->max_vports; index++) { + if (adapter->netdevs[index] && + !test_bit(IDPF_VPORT_REG_NETDEV, + adapter->vport_config[index]->flags)) { + register_netdev(adapter->netdevs[index]); + set_bit(IDPF_VPORT_REG_NETDEV, + adapter->vport_config[index]->flags); + } + } + + /* As all the required vports are created, clear the reset flag + * unconditionally here in case we were in reset and the link was down. + */ + clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + /* Start the statistics task now */ + queue_delayed_work(adapter->stats_wq, &adapter->stats_task, + msecs_to_jiffies(10 * (pdev->devfn & 0x07))); + + return; + +handle_err: + idpf_decfg_netdev(vport); +cfg_netdev_err: + idpf_vport_rel(vport); + adapter->vports[index] = NULL; +unwind_vports: + if (default_vport) { + for (index = 0; index < adapter->max_vports; index++) { + if (adapter->vports[index]) + idpf_vport_dealloc(adapter->vports[index]); + } + } + clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); +} + +/** + * idpf_sriov_ena - Enable or change number of VFs + * @adapter: private data struct + * @num_vfs: number of VFs to allocate + */ +static int idpf_sriov_ena(struct idpf_adapter *adapter, int num_vfs) +{ + struct device *dev = &adapter->pdev->dev; + int err; + + err = idpf_send_set_sriov_vfs_msg(adapter, num_vfs); + if (err) { + dev_err(dev, "Failed to allocate VFs: %d\n", err); + + return err; + } + + err = pci_enable_sriov(adapter->pdev, num_vfs); + if (err) { + idpf_send_set_sriov_vfs_msg(adapter, 0); + dev_err(dev, "Failed to enable SR-IOV: %d\n", err); + + return err; + } + + adapter->num_vfs = num_vfs; + + return num_vfs; +} + +/** + * idpf_sriov_configure - Configure the requested VFs + * @pdev: pointer to a pci_dev structure + * @num_vfs: number of vfs to allocate + * + * Enable or change the number of VFs. Called when the user updates the number + * of VFs in sysfs. + **/ +int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct idpf_adapter *adapter = pci_get_drvdata(pdev); + + if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_SRIOV)) { + dev_info(&pdev->dev, "SR-IOV is not supported on this device\n"); + + return -EOPNOTSUPP; + } + + if (num_vfs) + return idpf_sriov_ena(adapter, num_vfs); + + if (pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs\n"); + + return -EBUSY; + } + + pci_disable_sriov(adapter->pdev); + idpf_send_set_sriov_vfs_msg(adapter, 0); + adapter->num_vfs = 0; + + return 0; +} + +/** + * idpf_deinit_task - Device deinit routine + * @adapter: Driver specific private structure + * + * Extended remove logic which will be used for + * hard reset as well + */ +void idpf_deinit_task(struct idpf_adapter *adapter) +{ + unsigned int i; + + /* Wait until the init_task is done else this thread might release + * the resources first and the other thread might end up in a bad state + */ + cancel_delayed_work_sync(&adapter->init_task); + + if (!adapter->vports) + return; + + cancel_delayed_work_sync(&adapter->stats_task); + + for (i = 0; i < adapter->max_vports; i++) { + if (adapter->vports[i]) + idpf_vport_dealloc(adapter->vports[i]); + } +} + +/** + * idpf_check_reset_complete - check that reset is complete + * @hw: pointer to hw struct + * @reset_reg: struct with reset registers + * + * Returns 0 if device is ready to use, or -EBUSY if it's in reset. + **/ +static int idpf_check_reset_complete(struct idpf_hw *hw, + struct idpf_reset_reg *reset_reg) +{ + struct idpf_adapter *adapter = hw->back; + int i; + + for (i = 0; i < 2000; i++) { + u32 reg_val = readl(reset_reg->rstat); + + /* 0xFFFFFFFF might be read if other side hasn't cleared the + * register for us yet and 0xFFFFFFFF is not a valid value for + * the register, so treat that as invalid. + */ + if (reg_val != 0xFFFFFFFF && (reg_val & reset_reg->rstat_m)) + return 0; + + usleep_range(5000, 10000); + } + + dev_warn(&adapter->pdev->dev, "Device reset timeout!\n"); + /* Clear the reset flag unconditionally here since the reset + * technically isn't in progress anymore from the driver's perspective + */ + clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + + return -EBUSY; +} + +/** + * idpf_set_vport_state - Set the vport state to be after the reset + * @adapter: Driver specific private structure + */ +static void idpf_set_vport_state(struct idpf_adapter *adapter) +{ + u16 i; + + for (i = 0; i < adapter->max_vports; i++) { + struct idpf_netdev_priv *np; + + if (!adapter->netdevs[i]) + continue; + + np = netdev_priv(adapter->netdevs[i]); + if (np->state == __IDPF_VPORT_UP) + set_bit(IDPF_VPORT_UP_REQUESTED, + adapter->vport_config[i]->flags); + } +} + +/** + * idpf_init_hard_reset - Initiate a hardware reset + * @adapter: Driver specific private structure + * + * Deallocate the vports and all the resources associated with them and + * reallocate. Also reinitialize the mailbox. Return 0 on success, + * negative on failure. + */ +static int idpf_init_hard_reset(struct idpf_adapter *adapter) +{ + struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops; + struct device *dev = &adapter->pdev->dev; + struct net_device *netdev; + int err; + u16 i; + + mutex_lock(&adapter->vport_ctrl_lock); + + dev_info(dev, "Device HW Reset initiated\n"); + + /* Avoid TX hangs on reset */ + for (i = 0; i < adapter->max_vports; i++) { + netdev = adapter->netdevs[i]; + if (!netdev) + continue; + + netif_carrier_off(netdev); + netif_tx_disable(netdev); + } + + /* Prepare for reset */ + if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { + reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD); + } else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) { + bool is_reset = idpf_is_reset_detected(adapter); + + idpf_set_vport_state(adapter); + idpf_vc_core_deinit(adapter); + if (!is_reset) + reg_ops->trigger_reset(adapter, IDPF_HR_FUNC_RESET); + idpf_deinit_dflt_mbx(adapter); + } else { + dev_err(dev, "Unhandled hard reset cause\n"); + err = -EBADRQC; + goto unlock_mutex; + } + + /* Wait for reset to complete */ + err = idpf_check_reset_complete(&adapter->hw, &adapter->reset_reg); + if (err) { + dev_err(dev, "The driver was unable to contact the device's firmware. Check that the FW is running. Driver state= 0x%x\n", + adapter->state); + goto unlock_mutex; + } + + /* Reset is complete and so start building the driver resources again */ + err = idpf_init_dflt_mbx(adapter); + if (err) { + dev_err(dev, "Failed to initialize default mailbox: %d\n", err); + goto unlock_mutex; + } + + /* Initialize the state machine, also allocate memory and request + * resources + */ + err = idpf_vc_core_init(adapter); + if (err) { + idpf_deinit_dflt_mbx(adapter); + goto unlock_mutex; + } + + /* Wait till all the vports are initialized to release the reset lock, + * else user space callbacks may access uninitialized vports + */ + while (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) + msleep(100); + +unlock_mutex: + mutex_unlock(&adapter->vport_ctrl_lock); + + return err; +} + +/** + * idpf_vc_event_task - Handle virtchannel event logic + * @work: work queue struct + */ +void idpf_vc_event_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + + adapter = container_of(work, struct idpf_adapter, vc_event_task.work); + + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + return; + + if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) || + test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { + set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + idpf_init_hard_reset(adapter); + } +} + +/** + * idpf_initiate_soft_reset - Initiate a software reset + * @vport: virtual port data struct + * @reset_cause: reason for the soft reset + * + * Soft reset only reallocs vport queue resources. Returns 0 on success, + * negative on failure. + */ +int idpf_initiate_soft_reset(struct idpf_vport *vport, + enum idpf_vport_reset_cause reset_cause) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + enum idpf_vport_state current_state = np->state; + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport *new_vport; + int err, i; + + /* If the system is low on memory, we can end up in bad state if we + * free all the memory for queue resources and try to allocate them + * again. Instead, we can pre-allocate the new resources before doing + * anything and bailing if the alloc fails. + * + * Make a clone of the existing vport to mimic its current + * configuration, then modify the new structure with any requested + * changes. Once the allocation of the new resources is done, stop the + * existing vport and copy the configuration to the main vport. If an + * error occurred, the existing vport will be untouched. + * + */ + new_vport = kzalloc(sizeof(*vport), GFP_KERNEL); + if (!new_vport) + return -ENOMEM; + + /* This purposely avoids copying the end of the struct because it + * contains wait_queues and mutexes and other stuff we don't want to + * mess with. Nothing below should use those variables from new_vport + * and should instead always refer to them in vport if they need to. + */ + memcpy(new_vport, vport, offsetof(struct idpf_vport, vc_state)); + + /* Adjust resource parameters prior to reallocating resources */ + switch (reset_cause) { + case IDPF_SR_Q_CHANGE: + err = idpf_vport_adjust_qs(new_vport); + if (err) + goto free_vport; + break; + case IDPF_SR_Q_DESC_CHANGE: + /* Update queue parameters before allocating resources */ + idpf_vport_calc_num_q_desc(new_vport); + break; + case IDPF_SR_MTU_CHANGE: + case IDPF_SR_RSC_CHANGE: + break; + default: + dev_err(&adapter->pdev->dev, "Unhandled soft reset cause\n"); + err = -EINVAL; + goto free_vport; + } + + err = idpf_vport_queues_alloc(new_vport); + if (err) + goto free_vport; + if (current_state <= __IDPF_VPORT_DOWN) { + idpf_send_delete_queues_msg(vport); + } else { + set_bit(IDPF_VPORT_DEL_QUEUES, vport->flags); + idpf_vport_stop(vport); + } + + idpf_deinit_rss(vport); + /* We're passing in vport here because we need its wait_queue + * to send a message and it should be getting all the vport + * config data out of the adapter but we need to be careful not + * to add code to add_queues to change the vport config within + * vport itself as it will be wiped with a memcpy later. + */ + err = idpf_send_add_queues_msg(vport, new_vport->num_txq, + new_vport->num_complq, + new_vport->num_rxq, + new_vport->num_bufq); + if (err) + goto err_reset; + + /* Same comment as above regarding avoiding copying the wait_queues and + * mutexes applies here. We do not want to mess with those if possible. + */ + memcpy(vport, new_vport, offsetof(struct idpf_vport, vc_state)); + + /* Since idpf_vport_queues_alloc was called with new_port, the queue + * back pointers are currently pointing to the local new_vport. Reset + * the backpointers to the original vport here + */ + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + int j; + + tx_qgrp->vport = vport; + for (j = 0; j < tx_qgrp->num_txq; j++) + tx_qgrp->txqs[j]->vport = vport; + + if (idpf_is_queue_model_split(vport->txq_model)) + tx_qgrp->complq->vport = vport; + } + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + struct idpf_queue *q; + u16 num_rxq; + int j; + + rx_qgrp->vport = vport; + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) + rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->vport = vport; + } + } + + if (reset_cause == IDPF_SR_Q_CHANGE) + idpf_vport_alloc_vec_indexes(vport); + + err = idpf_set_real_num_queues(vport); + if (err) + goto err_reset; + + if (current_state == __IDPF_VPORT_UP) + err = idpf_vport_open(vport, false); + + kfree(new_vport); + + return err; + +err_reset: + idpf_vport_queues_rel(new_vport); +free_vport: + kfree(new_vport); + + return err; +} + +/** + * idpf_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock + * meaning we cannot sleep in this context. Due to this, we have to add the + * filter and send the virtchnl message asynchronously without waiting for the + * response from the other side. We won't know whether or not the operation + * actually succeeded until we get the message back. Returns 0 on success, + * negative on failure. + */ +static int idpf_addr_sync(struct net_device *netdev, const u8 *addr) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return idpf_add_mac_filter(np->vport, np, addr, true); +} + +/** + * idpf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock + * meaning we cannot sleep in this context. Due to this we have to delete the + * filter and send the virtchnl message asynchronously without waiting for the + * return from the other side. We won't know whether or not the operation + * actually succeeded until we get the message back. Returns 0 on success, + * negative on failure. + */ +static int idpf_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + + idpf_del_mac_filter(np->vport, np, addr, true); + + return 0; +} + +/** + * idpf_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + * + * Stack takes addr_list_lock spinlock before calling our .set_rx_mode. We + * cannot sleep in this context. + */ +static void idpf_set_rx_mode(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_user_config_data *config_data; + struct idpf_adapter *adapter; + bool changed = false; + struct device *dev; + int err; + + adapter = np->adapter; + dev = &adapter->pdev->dev; + + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) { + __dev_uc_sync(netdev, idpf_addr_sync, idpf_addr_unsync); + __dev_mc_sync(netdev, idpf_addr_sync, idpf_addr_unsync); + } + + if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_PROMISC)) + return; + + config_data = &adapter->vport_config[np->vport_idx]->user_config; + /* IFF_PROMISC enables both unicast and multicast promiscuous, + * while IFF_ALLMULTI only enables multicast such that: + * + * promisc + allmulti = unicast | multicast + * promisc + !allmulti = unicast | multicast + * !promisc + allmulti = multicast + */ + if ((netdev->flags & IFF_PROMISC) && + !test_and_set_bit(__IDPF_PROMISC_UC, config_data->user_flags)) { + changed = true; + dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); + if (!test_and_set_bit(__IDPF_PROMISC_MC, adapter->flags)) + dev_info(dev, "Entering multicast promiscuous mode\n"); + } + + if (!(netdev->flags & IFF_PROMISC) && + test_and_clear_bit(__IDPF_PROMISC_UC, config_data->user_flags)) { + changed = true; + dev_info(dev, "Leaving promiscuous mode\n"); + } + + if (netdev->flags & IFF_ALLMULTI && + !test_and_set_bit(__IDPF_PROMISC_MC, config_data->user_flags)) { + changed = true; + dev_info(dev, "Entering multicast promiscuous mode\n"); + } + + if (!(netdev->flags & (IFF_ALLMULTI | IFF_PROMISC)) && + test_and_clear_bit(__IDPF_PROMISC_MC, config_data->user_flags)) { + changed = true; + dev_info(dev, "Leaving multicast promiscuous mode\n"); + } + + if (!changed) + return; + + err = idpf_set_promiscuous(adapter, config_data, np->vport_id); + if (err) + dev_err(dev, "Failed to set promiscuous mode: %d\n", err); +} + +/** + * idpf_vport_manage_rss_lut - disable/enable RSS + * @vport: the vport being changed + * + * In the event of disable request for RSS, this function will zero out RSS + * LUT, while in the event of enable request for RSS, it will reconfigure RSS + * LUT with the default LUT configuration. + */ +static int idpf_vport_manage_rss_lut(struct idpf_vport *vport) +{ + bool ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH); + struct idpf_rss_data *rss_data; + u16 idx = vport->idx; + int lut_size; + + rss_data = &vport->adapter->vport_config[idx]->user_config.rss_data; + lut_size = rss_data->rss_lut_size * sizeof(u32); + + if (ena) { + /* This will contain the default or user configured LUT */ + memcpy(rss_data->rss_lut, rss_data->cached_lut, lut_size); + } else { + /* Save a copy of the current LUT to be restored later if + * requested. + */ + memcpy(rss_data->cached_lut, rss_data->rss_lut, lut_size); + + /* Zero out the current LUT to disable */ + memset(rss_data->rss_lut, 0, lut_size); + } + + return idpf_config_rss(vport); +} + +/** + * idpf_set_features - set the netdev feature flags + * @netdev: ptr to the netdev being adjusted + * @features: the feature set that the stack is suggesting + */ +static int idpf_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct idpf_adapter *adapter; + struct idpf_vport *vport; + int err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + adapter = vport->adapter; + + if (idpf_is_reset_in_prog(adapter)) { + dev_err(&adapter->pdev->dev, "Device is resetting, changing netdev features temporarily unavailable.\n"); + err = -EBUSY; + goto unlock_mutex; + } + + if (changed & NETIF_F_RXHASH) { + netdev->features ^= NETIF_F_RXHASH; + err = idpf_vport_manage_rss_lut(vport); + if (err) + goto unlock_mutex; + } + + if (changed & NETIF_F_GRO_HW) { + netdev->features ^= NETIF_F_GRO_HW; + err = idpf_initiate_soft_reset(vport, IDPF_SR_RSC_CHANGE); + if (err) + goto unlock_mutex; + } + + if (changed & NETIF_F_LOOPBACK) { + netdev->features ^= NETIF_F_LOOPBACK; + err = idpf_send_ena_dis_loopback_msg(vport); + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_open - Called when a network interface becomes active + * @netdev: network interface device structure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the netdev watchdog is enabled, + * and the stack is notified that the interface is ready. + * + * Returns 0 on success, negative value on failure + */ +static int idpf_open(struct net_device *netdev) +{ + struct idpf_vport *vport; + int err; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + err = idpf_vport_open(vport, true); + + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_change_mtu - NDO callback to change the MTU + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + */ +static int idpf_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct idpf_vport *vport; + int err; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + netdev->mtu = new_mtu; + + err = idpf_initiate_soft_reset(vport, IDPF_SR_MTU_CHANGE); + + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_features_check - Validate packet conforms to limits + * @skb: skb buffer + * @netdev: This port's netdev + * @features: Offload features that the stack believes apply + */ +static netdev_features_t idpf_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_adapter *adapter = vport->adapter; + size_t len; + + /* No point in doing any of this if neither checksum nor GSO are + * being requested for this frame. We can rule out both by just + * checking for CHECKSUM_PARTIAL + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + /* We cannot support GSO if the MSS is going to be less than + * 88 bytes. If it is then we need to drop support for GSO. + */ + if (skb_is_gso(skb) && + (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)) + features &= ~NETIF_F_GSO_MASK; + + /* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */ + len = skb_network_offset(skb); + if (unlikely(len & ~(126))) + goto unsupported; + + len = skb_network_header_len(skb); + if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + goto unsupported; + + if (!skb->encapsulation) + return features; + + /* L4TUNLEN can support 127 words */ + len = skb_inner_network_header(skb) - skb_transport_header(skb); + if (unlikely(len & ~(127 * 2))) + goto unsupported; + + /* IPLEN can support at most 127 dwords */ + len = skb_inner_network_header_len(skb); + if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + goto unsupported; + + /* No need to validate L4LEN as TCP is the only protocol with a + * a flexible value and we support all possible values supported + * by TCP, which is at most 15 dwords + */ + + return features; + +unsupported: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +/** + * idpf_set_mac - NDO callback to set port mac address + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int idpf_set_mac(struct net_device *netdev, void *p) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + struct sockaddr *addr = p; + struct idpf_vport *vport; + int err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (!idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_MACFILTER)) { + dev_info(&vport->adapter->pdev->dev, "Setting MAC address is not supported\n"); + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + if (!is_valid_ether_addr(addr->sa_data)) { + dev_info(&vport->adapter->pdev->dev, "Invalid MAC address: %pM\n", + addr->sa_data); + err = -EADDRNOTAVAIL; + goto unlock_mutex; + } + + if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) + goto unlock_mutex; + + vport_config = vport->adapter->vport_config[vport->idx]; + err = idpf_add_mac_filter(vport, np, addr->sa_data, false); + if (err) { + __idpf_del_mac_filter(vport_config, addr->sa_data); + goto unlock_mutex; + } + + if (is_valid_ether_addr(vport->default_mac_addr)) + idpf_del_mac_filter(vport, np, vport->default_mac_addr, false); + + ether_addr_copy(vport->default_mac_addr, addr->sa_data); + eth_hw_addr_set(netdev, addr->sa_data); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_alloc_dma_mem - Allocate dma memory + * @hw: pointer to hw struct + * @mem: pointer to dma_mem struct + * @size: size of the memory to allocate + */ +void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size) +{ + struct idpf_adapter *adapter = hw->back; + size_t sz = ALIGN(size, 4096); + + mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz, + &mem->pa, GFP_KERNEL); + mem->size = sz; + + return mem->va; +} + +/** + * idpf_free_dma_mem - Free the allocated dma memory + * @hw: pointer to hw struct + * @mem: pointer to dma_mem struct + */ +void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) +{ + struct idpf_adapter *adapter = hw->back; + + dma_free_coherent(&adapter->pdev->dev, mem->size, + mem->va, mem->pa); + mem->size = 0; + mem->va = NULL; + mem->pa = 0; +} + +static const struct net_device_ops idpf_netdev_ops_splitq = { + .ndo_open = idpf_open, + .ndo_stop = idpf_stop, + .ndo_start_xmit = idpf_tx_splitq_start, + .ndo_features_check = idpf_features_check, + .ndo_set_rx_mode = idpf_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = idpf_set_mac, + .ndo_change_mtu = idpf_change_mtu, + .ndo_get_stats64 = idpf_get_stats64, + .ndo_set_features = idpf_set_features, + .ndo_tx_timeout = idpf_tx_timeout, +}; + +static const struct net_device_ops idpf_netdev_ops_singleq = { + .ndo_open = idpf_open, + .ndo_stop = idpf_stop, + .ndo_start_xmit = idpf_tx_singleq_start, + .ndo_features_check = idpf_features_check, + .ndo_set_rx_mode = idpf_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = idpf_set_mac, + .ndo_change_mtu = idpf_change_mtu, + .ndo_get_stats64 = idpf_get_stats64, + .ndo_set_features = idpf_set_features, + .ndo_tx_timeout = idpf_tx_timeout, +}; diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c new file mode 100644 index 000000000000..e1febc74cefd --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_devids.h" + +#define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver" + +MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_LICENSE("GPL"); + +/** + * idpf_remove - Device removal routine + * @pdev: PCI device information struct + */ +static void idpf_remove(struct pci_dev *pdev) +{ + struct idpf_adapter *adapter = pci_get_drvdata(pdev); + int i; + + set_bit(IDPF_REMOVE_IN_PROG, adapter->flags); + + /* Wait until vc_event_task is done to consider if any hard reset is + * in progress else we may go ahead and release the resources but the + * thread doing the hard reset might continue the init path and + * end up in bad state. + */ + cancel_delayed_work_sync(&adapter->vc_event_task); + if (adapter->num_vfs) + idpf_sriov_configure(pdev, 0); + + idpf_vc_core_deinit(adapter); + /* Be a good citizen and leave the device clean on exit */ + adapter->dev_ops.reg_ops.trigger_reset(adapter, IDPF_HR_FUNC_RESET); + idpf_deinit_dflt_mbx(adapter); + + if (!adapter->netdevs) + goto destroy_wqs; + + /* There are some cases where it's possible to still have netdevs + * registered with the stack at this point, e.g. if the driver detected + * a HW reset and rmmod is called before it fully recovers. Unregister + * any stale netdevs here. + */ + for (i = 0; i < adapter->max_vports; i++) { + if (!adapter->netdevs[i]) + continue; + if (adapter->netdevs[i]->reg_state != NETREG_UNINITIALIZED) + unregister_netdev(adapter->netdevs[i]); + free_netdev(adapter->netdevs[i]); + adapter->netdevs[i] = NULL; + } + +destroy_wqs: + destroy_workqueue(adapter->init_wq); + destroy_workqueue(adapter->serv_wq); + destroy_workqueue(adapter->mbx_wq); + destroy_workqueue(adapter->stats_wq); + destroy_workqueue(adapter->vc_event_wq); + + for (i = 0; i < adapter->max_vports; i++) { + kfree(adapter->vport_config[i]); + adapter->vport_config[i] = NULL; + } + kfree(adapter->vport_config); + adapter->vport_config = NULL; + kfree(adapter->netdevs); + adapter->netdevs = NULL; + + mutex_destroy(&adapter->vport_ctrl_lock); + mutex_destroy(&adapter->vector_lock); + mutex_destroy(&adapter->queue_lock); + mutex_destroy(&adapter->vc_buf_lock); + + pci_set_drvdata(pdev, NULL); + kfree(adapter); +} + +/** + * idpf_shutdown - PCI callback for shutting down device + * @pdev: PCI device information struct + */ +static void idpf_shutdown(struct pci_dev *pdev) +{ + idpf_remove(pdev); + + if (system_state == SYSTEM_POWER_OFF) + pci_set_power_state(pdev, PCI_D3hot); +} + +/** + * idpf_cfg_hw - Initialize HW struct + * @adapter: adapter to setup hw struct for + * + * Returns 0 on success, negative on failure + */ +static int idpf_cfg_hw(struct idpf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct idpf_hw *hw = &adapter->hw; + + hw->hw_addr = pcim_iomap_table(pdev)[0]; + if (!hw->hw_addr) { + pci_err(pdev, "failed to allocate PCI iomap table\n"); + + return -ENOMEM; + } + + hw->back = adapter; + + return 0; +} + +/** + * idpf_probe - Device initialization routine + * @pdev: PCI device information struct + * @ent: entry in idpf_pci_tbl + * + * Returns 0 on success, negative on failure + */ +static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct idpf_adapter *adapter; + int err; + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + adapter->req_tx_splitq = true; + adapter->req_rx_splitq = true; + + switch (ent->device) { + case IDPF_DEV_ID_PF: + idpf_dev_ops_init(adapter); + break; + case IDPF_DEV_ID_VF: + idpf_vf_dev_ops_init(adapter); + adapter->crc_enable = true; + break; + default: + err = -ENODEV; + dev_err(&pdev->dev, "Unexpected dev ID 0x%x in idpf probe\n", + ent->device); + goto err_free; + } + + adapter->pdev = pdev; + err = pcim_enable_device(pdev); + if (err) + goto err_free; + + err = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (err) { + pci_err(pdev, "pcim_iomap_regions failed %pe\n", ERR_PTR(err)); + + goto err_free; + } + + /* set up for high or low dma */ + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (err) { + pci_err(pdev, "DMA configuration failed: %pe\n", ERR_PTR(err)); + + goto err_free; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, adapter); + + adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->init_wq) { + dev_err(dev, "Failed to allocate init workqueue\n"); + err = -ENOMEM; + goto err_free; + } + + adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->serv_wq) { + dev_err(dev, "Failed to allocate service workqueue\n"); + err = -ENOMEM; + goto err_serv_wq_alloc; + } + + adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->mbx_wq) { + dev_err(dev, "Failed to allocate mailbox workqueue\n"); + err = -ENOMEM; + goto err_mbx_wq_alloc; + } + + adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->stats_wq) { + dev_err(dev, "Failed to allocate workqueue\n"); + err = -ENOMEM; + goto err_stats_wq_alloc; + } + + adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->vc_event_wq) { + dev_err(dev, "Failed to allocate virtchnl event workqueue\n"); + err = -ENOMEM; + goto err_vc_event_wq_alloc; + } + + /* setup msglvl */ + adapter->msg_enable = netif_msg_init(-1, IDPF_AVAIL_NETIF_M); + + err = idpf_cfg_hw(adapter); + if (err) { + dev_err(dev, "Failed to configure HW structure for adapter: %d\n", + err); + goto err_cfg_hw; + } + + mutex_init(&adapter->vport_ctrl_lock); + mutex_init(&adapter->vector_lock); + mutex_init(&adapter->queue_lock); + mutex_init(&adapter->vc_buf_lock); + + init_waitqueue_head(&adapter->vchnl_wq); + + INIT_DELAYED_WORK(&adapter->init_task, idpf_init_task); + INIT_DELAYED_WORK(&adapter->serv_task, idpf_service_task); + INIT_DELAYED_WORK(&adapter->mbx_task, idpf_mbx_task); + INIT_DELAYED_WORK(&adapter->stats_task, idpf_statistics_task); + INIT_DELAYED_WORK(&adapter->vc_event_task, idpf_vc_event_task); + + adapter->dev_ops.reg_ops.reset_reg_init(adapter); + set_bit(IDPF_HR_DRV_LOAD, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task, + msecs_to_jiffies(10 * (pdev->devfn & 0x07))); + + return 0; + +err_cfg_hw: + destroy_workqueue(adapter->vc_event_wq); +err_vc_event_wq_alloc: + destroy_workqueue(adapter->stats_wq); +err_stats_wq_alloc: + destroy_workqueue(adapter->mbx_wq); +err_mbx_wq_alloc: + destroy_workqueue(adapter->serv_wq); +err_serv_wq_alloc: + destroy_workqueue(adapter->init_wq); +err_free: + kfree(adapter); + return err; +} + +/* idpf_pci_tbl - PCI Dev idpf ID Table + */ +static const struct pci_device_id idpf_pci_tbl[] = { + { PCI_VDEVICE(INTEL, IDPF_DEV_ID_PF)}, + { PCI_VDEVICE(INTEL, IDPF_DEV_ID_VF)}, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(pci, idpf_pci_tbl); + +static struct pci_driver idpf_driver = { + .name = KBUILD_MODNAME, + .id_table = idpf_pci_tbl, + .probe = idpf_probe, + .sriov_configure = idpf_sriov_configure, + .remove = idpf_remove, + .shutdown = idpf_shutdown, +}; +module_pci_driver(idpf_driver); diff --git a/drivers/net/ethernet/intel/idpf/idpf_mem.h b/drivers/net/ethernet/intel/idpf/idpf_mem.h new file mode 100644 index 000000000000..b21a04fccf0f --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_mem.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_MEM_H_ +#define _IDPF_MEM_H_ + +#include <linux/io.h> + +struct idpf_dma_mem { + void *va; + dma_addr_t pa; + size_t size; +}; + +#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) +#define rd32(a, reg) readl((a)->hw_addr + (reg)) +#define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) +#define rd64(a, reg) readq((a)->hw_addr + (reg)) + +#endif /* _IDPF_MEM_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c new file mode 100644 index 000000000000..81288a17da2a --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -0,0 +1,1183 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" + +/** + * idpf_tx_singleq_csum - Enable tx checksum offloads + * @skb: pointer to skb + * @off: pointer to struct that holds offload parameters + * + * Returns 0 or error (negative) if checksum offload cannot be executed, 1 + * otherwise. + */ +static int idpf_tx_singleq_csum(struct sk_buff *skb, + struct idpf_tx_offload_params *off) +{ + u32 l4_len, l3_len, l2_len; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + u32 offset, cmd = 0; + u8 l4_proto = 0; + __be16 frag_off; + bool is_tso; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + l2_len = ip.hdr - skb->data; + offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); + is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); + if (skb->encapsulation) { + u32 tunnel = 0; + + /* define outer network header type */ + if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { + /* The stack computes the IP header already, the only + * time we need the hardware to recompute it is in the + * case of TSO. + */ + tunnel |= is_tso ? + IDPF_TX_CTX_EXT_IP_IPV4 : + IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { + tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; + + l4_proto = ip.v6->nexthdr; + if (ipv6_ext_hdr(l4_proto)) + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(*ip.v6), + &l4_proto, &frag_off); + } + + /* define outer transport */ + switch (l4_proto) { + case IPPROTO_UDP: + tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; + break; + case IPPROTO_GRE: + tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + l4.hdr = skb_inner_network_header(skb); + break; + default: + if (is_tso) + return -1; + + skb_checksum_help(skb); + + return 0; + } + off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; + + /* compute outer L3 header size */ + tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, + (l4.hdr - ip.hdr) / 4); + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* compute tunnel header size */ + tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, + (ip.hdr - l4.hdr) / 2); + + /* indicate if we need to offload outer UDP header */ + if (is_tso && + !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; + + /* record tunnel offload values */ + off->cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + off->tx_flags |= IDPF_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + off->tx_flags |= IDPF_TX_FLAGS_IPV6; + } + + /* Enable IP checksum offloads */ + if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { + l4_proto = ip.v4->protocol; + /* See comment above regarding need for HW to recompute IP + * header checksum in the case of TSO. + */ + if (is_tso) + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; + else + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; + + } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; + l4_proto = ip.v6->nexthdr; + if (ipv6_ext_hdr(l4_proto)) + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(*ip.v6), &l4_proto, + &frag_off); + } else { + return -1; + } + + /* compute inner L3 header size */ + l3_len = l4.hdr - ip.hdr; + offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; + + /* Enable L4 checksum offloads */ + switch (l4_proto) { + case IPPROTO_TCP: + /* enable checksum offloads */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; + l4_len = l4.tcp->doff; + break; + case IPPROTO_UDP: + /* enable UDP checksum offload */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; + l4_len = sizeof(struct udphdr) >> 2; + break; + case IPPROTO_SCTP: + /* enable SCTP checksum offload */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; + l4_len = sizeof(struct sctphdr) >> 2; + break; + default: + if (is_tso) + return -1; + + skb_checksum_help(skb); + + return 0; + } + + offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; + off->td_cmd |= cmd; + off->hdr_offsets |= offset; + + return 1; +} + +/** + * idpf_tx_singleq_map - Build the Tx base descriptor + * @tx_q: queue to send buffer on + * @first: first buffer info buffer to use + * @offloads: pointer to struct that holds offload parameters + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit base mode descriptor. + */ +static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + struct idpf_tx_buf *first, + struct idpf_tx_offload_params *offloads) +{ + u32 offsets = offloads->hdr_offsets; + struct idpf_tx_buf *tx_buf = first; + struct idpf_base_tx_desc *tx_desc; + struct sk_buff *skb = first->skb; + u64 td_cmd = offloads->td_cmd; + unsigned int data_len, size; + u16 i = tx_q->next_to_use; + struct netdev_queue *nq; + skb_frag_t *frag; + dma_addr_t dma; + u64 td_tag = 0; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = IDPF_BASE_TX_DESC(tx_q, i); + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + /* write each descriptor with CRC bit */ + if (tx_q->vport->crc_enable) + td_cmd |= IDPF_TX_DESC_CMD_ICRC; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + + if (dma_mapping_error(tx_q->dev, dma)) + return idpf_tx_dma_map_error(tx_q, skb, first, i); + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + /* align size to end of page */ + max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); + tx_desc->buf_addr = cpu_to_le64(dma); + + /* account for data chunks larger than the hardware + * can handle + */ + while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, + offsets, + max_data, + td_tag); + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + i = 0; + } + + dma += max_data; + size -= max_data; + + max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + tx_desc->buf_addr = cpu_to_le64(dma); + } + + if (!data_len) + break; + + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, + size, td_tag); + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + i = 0; + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_buf = &tx_q->tx_buf[i]; + } + + skb_tx_timestamp(first->skb); + + /* write last descriptor with RS and EOP bits */ + td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); + + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, + size, td_tag); + + IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +} + +/** + * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring + * @txq: queue to put context descriptor on + * + * Since the TX buffer rings mimics the descriptor ring, update the tx buffer + * ring entry to reflect that this index is a context descriptor + */ +static struct idpf_base_tx_ctx_desc * +idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) +{ + struct idpf_base_tx_ctx_desc *ctx_desc; + int ntu = txq->next_to_use; + + memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf)); + txq->tx_buf[ntu].ctx_entry = true; + + ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu); + + IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); + txq->next_to_use = ntu; + + return ctx_desc; +} + +/** + * idpf_tx_singleq_build_ctx_desc - populate context descriptor + * @txq: queue to send buffer on + * @offload: offload parameter structure + **/ +static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, + struct idpf_tx_offload_params *offload) +{ + struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); + u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; + + if (offload->tso_segs) { + qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; + qw1 |= ((u64)offload->tso_len << IDPF_TXD_CTX_QW1_TSO_LEN_S) & + IDPF_TXD_CTX_QW1_TSO_LEN_M; + qw1 |= ((u64)offload->mss << IDPF_TXD_CTX_QW1_MSS_S) & + IDPF_TXD_CTX_QW1_MSS_M; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.tx.lso_pkts); + u64_stats_update_end(&txq->stats_sync); + } + + desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); + + desc->qw0.l2tag2 = 0; + desc->qw0.rsvd1 = 0; + desc->qw1 = cpu_to_le64(qw1); +} + +/** + * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors + * @skb: send buffer + * @tx_q: queue to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_queue *tx_q) +{ + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; + unsigned int count; + __be16 protocol; + int csum, tso; + + count = idpf_tx_desc_count_required(tx_q, skb); + if (unlikely(!count)) + return idpf_tx_drop_skb(tx_q, skb); + + if (idpf_tx_maybe_stop_common(tx_q, + count + IDPF_TX_DESCS_PER_CACHE_LINE + + IDPF_TX_DESCS_FOR_CTX)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_BUSY; + } + + protocol = vlan_get_protocol(skb); + if (protocol == htons(ETH_P_IP)) + offload.tx_flags |= IDPF_TX_FLAGS_IPV4; + else if (protocol == htons(ETH_P_IPV6)) + offload.tx_flags |= IDPF_TX_FLAGS_IPV6; + + tso = idpf_tso(skb, &offload); + if (tso < 0) + goto out_drop; + + csum = idpf_tx_singleq_csum(skb, &offload); + if (csum < 0) + goto out_drop; + + if (tso || offload.cd_tunneling) + idpf_tx_singleq_build_ctx_desc(tx_q, &offload); + + /* record the location of the first descriptor for this packet */ + first = &tx_q->tx_buf[tx_q->next_to_use]; + first->skb = skb; + + if (tso) { + first->gso_segs = offload.tso_segs; + first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len); + } else { + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + first->gso_segs = 1; + } + idpf_tx_singleq_map(tx_q, first, &offload); + + return NETDEV_TX_OK; + +out_drop: + return idpf_tx_drop_skb(tx_q, skb); +} + +/** + * idpf_tx_singleq_start - Selects the right Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev) +{ + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_queue *tx_q; + + tx_q = vport->txqs[skb_get_queue_mapping(skb)]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_OK; + } + + return idpf_tx_singleq_frame(skb, tx_q); +} + +/** + * idpf_tx_singleq_clean - Reclaim resources from queue + * @tx_q: Tx queue to clean + * @napi_budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + */ +static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, + int *cleaned) +{ + unsigned int budget = tx_q->vport->compln_clean_budget; + unsigned int total_bytes = 0, total_pkts = 0; + struct idpf_base_tx_desc *tx_desc; + s16 ntc = tx_q->next_to_clean; + struct idpf_netdev_priv *np; + struct idpf_tx_buf *tx_buf; + struct idpf_vport *vport; + struct netdev_queue *nq; + bool dont_wake; + + tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc); + tx_buf = &tx_q->tx_buf[ntc]; + ntc -= tx_q->desc_count; + + do { + struct idpf_base_tx_desc *eop_desc; + + /* If this entry in the ring was used as a context descriptor, + * it's corresponding entry in the buffer ring will indicate as + * such. We can skip this descriptor since there is no buffer + * to clean. + */ + if (tx_buf->ctx_entry) { + /* Clear this flag here to avoid stale flag values when + * this buffer is used for actual data in the future. + * There are cases where the tx_buf struct / the flags + * field will not be cleared before being reused. + */ + tx_buf->ctx_entry = false; + goto fetch_next_txq_desc; + } + + /* if next_to_watch is not set then no work pending */ + eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch; + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + smp_rmb(); + + /* if the descriptor isn't done, no work yet to do */ + if (!(eop_desc->qw1 & + cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_pkts += tx_buf->gso_segs; + + napi_consume_skb(tx_buf->skb, napi_budget); + + /* unmap skb header data */ + dma_unmap_single(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + /* clear tx_buf data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buf++; + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + + /* update budget only if we did something */ + budget--; + +fetch_next_txq_desc: + tx_buf++; + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + } + } while (likely(budget)); + + ntc += tx_q->desc_count; + tx_q->next_to_clean = ntc; + + *cleaned += total_pkts; + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts); + u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes); + u64_stats_update_end(&tx_q->stats_sync); + + vport = tx_q->vport; + np = netdev_priv(vport->netdev); + nq = netdev_get_tx_queue(vport->netdev, tx_q->idx); + + dont_wake = np->state != __IDPF_VPORT_UP || + !netif_carrier_ok(vport->netdev); + __netif_txq_completed_wake(nq, total_pkts, total_bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, + dont_wake); + + return !!budget; +} + +/** + * idpf_tx_singleq_clean_all - Clean all Tx queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_txq = q_vec->num_txq; + bool clean_complete = true; + int i, budget_per_q; + + budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; + for (i = 0; i < num_txq; i++) { + struct idpf_queue *q; + + q = q_vec->tx[i]; + clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, + cleaned); + } + + return clean_complete; +} + +/** + * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor + * status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_ptype_len doesn't need to be shifted because it begins + * at offset zero. + */ +static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, + const u64 stat_err_bits) +{ + return !!(rx_desc->base_wb.qword1.status_error_ptype_len & + cpu_to_le64(stat_err_bits)); +} + +/** + * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers + * @rxq: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * @ntc: next to clean + */ +static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, + union virtchnl2_rx_desc *rx_desc, + struct sk_buff *skb, u16 ntc) +{ + /* if we are the last buffer then there is nothing else to do */ + if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) + return false; + + return true; +} + +/** + * idpf_rx_singleq_csum - Indicate in skb if checksum is good + * @rxq: Rx ring being processed + * @skb: skb currently being received and modified + * @csum_bits: checksum bits from descriptor + * @ptype: the packet type decoded by hardware + * + * skb->protocol must be set before this function is called + */ +static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + struct idpf_rx_csum_decoded *csum_bits, + u16 ptype) +{ + struct idpf_rx_ptype_decoded decoded; + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ + if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM))) + return; + + /* check if HW has decoded the packet and checksum */ + if (unlikely(!(csum_bits->l3l4p))) + return; + + decoded = rxq->vport->rx_ptype_lkup[ptype]; + if (unlikely(!(decoded.known && decoded.outer_ip))) + return; + + ipv4 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV4); + ipv6 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV6); + + /* Check if there were any checksum errors */ + if (unlikely(ipv4 && (csum_bits->ipe || csum_bits->eipe))) + goto checksum_fail; + + /* Device could not do any checksum offload for certain extension + * headers as indicated by setting IPV6EXADD bit + */ + if (unlikely(ipv6 && csum_bits->ipv6exadd)) + return; + + /* check for L4 errors and handle packets that were not able to be + * checksummed due to arrival speed + */ + if (unlikely(csum_bits->l4e)) + goto checksum_fail; + + if (unlikely(csum_bits->nat && csum_bits->eudpe)) + goto checksum_fail; + + /* Handle packets that were not able to be checksummed due to arrival + * speed, in this case the stack can compute the csum. + */ + if (unlikely(csum_bits->pprs)) + return; + + /* If there is an outer header present that might contain a checksum + * we need to bump the checksum level by 1 to reflect the fact that + * we are indicating we validated the inner checksum. + */ + if (decoded.tunnel_type >= IDPF_RX_PTYPE_TUNNEL_IP_GRENAT) + skb->csum_level = 1; + + /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */ + switch (decoded.inner_prot) { + case IDPF_RX_PTYPE_INNER_PROT_ICMP: + case IDPF_RX_PTYPE_INNER_PROT_TCP: + case IDPF_RX_PTYPE_INNER_PROT_UDP: + case IDPF_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + return; + default: + return; + } + +checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); +} + +/** + * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: Rx packet type + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + u16 ptype) +{ + struct idpf_rx_csum_decoded csum_bits; + u32 rx_error, rx_status; + u64 qword; + + qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); + rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); + + csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); + csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, + rx_error); + csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); + csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, + rx_error); + csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, + rx_status); + csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, + rx_status); + csum_bits.nat = 0; + csum_bits.eudpe = 0; + + idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); +} + +/** + * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: Rx packet type + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + u16 ptype) +{ + struct idpf_rx_csum_decoded csum_bits; + u16 rx_status0, rx_status1; + + rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); + rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); + + csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, + rx_status0); + csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, + rx_status0); + csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, + rx_status0); + csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, + rx_status0); + csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, + rx_status0); + csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, + rx_status0); + csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, + rx_status1); + csum_bits.pprs = 0; + + idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); +} + +/** + * idpf_rx_singleq_base_hash - set the hash value in the skb + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: specific descriptor + * @decoded: Decoded Rx packet type related fields + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + u64 mask, qw1; + + if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) + return; + + mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; + qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + if (FIELD_GET(mask, qw1) == mask) { + u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); + + skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded)); + } +} + +/** + * idpf_rx_singleq_flex_hash - set the hash value in the skb + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: specific descriptor + * @decoded: Decoded Rx packet type related fields + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) + return; + + if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, + le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) + skb_set_hash(skb, le32_to_cpu(rx_desc->flex_nic_wb.rss_hash), + idpf_ptype_to_htype(decoded)); +} + +/** + * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx + * descriptor + * @rx_q: Rx ring being processed + * @skb: pointer to current skb being populated + * @rx_desc: descriptor for skb + * @ptype: packet type + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + u16 ptype) +{ + struct idpf_rx_ptype_decoded decoded = + rx_q->vport->rx_ptype_lkup[ptype]; + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_q->vport->netdev); + + /* Check if we're using base mode descriptor IDs */ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { + idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, &decoded); + idpf_rx_singleq_base_csum(rx_q, skb, rx_desc, ptype); + } else { + idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, &decoded); + idpf_rx_singleq_flex_csum(rx_q, skb, rx_desc, ptype); + } +} + +/** + * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers + * @rx_q: queue for which the hw buffers are allocated + * @cleaned_count: number of buffers to replace + * + * Returns false if all allocations were successful, true if any fail + */ +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + u16 cleaned_count) +{ + struct virtchnl2_singleq_rx_buf_desc *desc; + u16 nta = rx_q->next_to_alloc; + struct idpf_rx_buf *buf; + + if (!cleaned_count) + return false; + + desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta); + buf = &rx_q->rx_buf.buf[nta]; + + do { + dma_addr_t addr; + + addr = idpf_alloc_page(rx_q->pp, buf, rx_q->rx_buf_size); + if (unlikely(addr == DMA_MAPPING_ERROR)) + break; + + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + desc->pkt_addr = cpu_to_le64(addr); + desc->hdr_addr = 0; + desc++; + + buf++; + nta++; + if (unlikely(nta == rx_q->desc_count)) { + desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0); + buf = rx_q->rx_buf.buf; + nta = 0; + } + + cleaned_count--; + } while (cleaned_count); + + if (rx_q->next_to_alloc != nta) { + idpf_rx_buf_hw_update(rx_q, nta); + rx_q->next_to_alloc = nta; + } + + return !!cleaned_count; +} + +/** + * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor + * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + * Decode the Rx descriptor and extract relevant information including the + * size and Rx packet type. + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + */ +static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) +{ + u64 qword; + + qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); + fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); +} + +/** + * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor + * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + * Decode the Rx descriptor and extract relevant information including the + * size and Rx packet type. + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + */ +static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) +{ + fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, + le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); + fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, + le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); +} + +/** + * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor + * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + */ +static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) +{ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) + idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields); + else + idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields); +} + +/** + * idpf_rx_singleq_clean - Reclaim resources after receive completes + * @rx_q: rx queue to clean + * @budget: Total limit on number of packets to process + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + struct sk_buff *skb = rx_q->skb; + u16 ntc = rx_q->next_to_clean; + u16 cleaned_count = 0; + bool failure = false; + + /* Process Rx packets bounded by budget */ + while (likely(total_rx_pkts < (unsigned int)budget)) { + struct idpf_rx_extracted fields = { }; + union virtchnl2_rx_desc *rx_desc; + struct idpf_rx_buf *rx_buf; + + /* get the Rx desc from Rx queue based on 'next_to_clean' */ + rx_desc = IDPF_RX_DESC(rx_q, ntc); + + /* status_error_ptype_len will always be zero for unused + * descriptors because it's cleared in cleanup, and overlaps + * with hdr_addr which is always zero because packet split + * isn't used, if the hardware wrote DD then the length will be + * non-zero + */ +#define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M + if (!idpf_rx_singleq_test_staterr(rx_desc, + IDPF_RXD_DD)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc + */ + dma_rmb(); + + idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); + + rx_buf = &rx_q->rx_buf.buf[ntc]; + if (!fields.size) { + idpf_rx_put_page(rx_buf); + goto skip_data; + } + + idpf_rx_sync_for_cpu(rx_buf, fields.size); + skb = rx_q->skb; + if (skb) + idpf_rx_add_frag(rx_buf, skb, fields.size); + else + skb = idpf_rx_construct_skb(rx_q, rx_buf, fields.size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; + +skip_data: + IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); + + cleaned_count++; + + /* skip if it is non EOP desc */ + if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc)) + continue; + +#define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ + VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) + if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, + IDPF_RXD_ERR_S))) { + dev_kfree_skb_any(skb); + skb = NULL; + continue; + } + + /* pad skb if needed (to make valid ethernet frame) */ + if (eth_skb_pad(skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + /* protocol */ + idpf_rx_singleq_process_skb_fields(rx_q, skb, + rx_desc, fields.rx_ptype); + + /* send completed skb up the stack */ + napi_gro_receive(&rx_q->q_vector->napi, skb); + skb = NULL; + + /* update budget accounting */ + total_rx_pkts++; + } + + rx_q->skb = skb; + + rx_q->next_to_clean = ntc; + + if (cleaned_count) + failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); + + u64_stats_update_begin(&rx_q->stats_sync); + u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts); + u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes); + u64_stats_update_end(&rx_q->stats_sync); + + /* guarantee a trip back through this routine if there was a failure */ + return failure ? budget : (int)total_rx_pkts; +} + +/** + * idpf_rx_singleq_clean_all - Clean all Rx queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_rxq = q_vec->num_rxq; + bool clean_complete = true; + int budget_per_q, i; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { + struct idpf_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); + + /* if we clean as many as budgeted, we must not be done */ + if (pkts_cleaned_per_q >= budget_per_q) + clean_complete = false; + *cleaned += pkts_cleaned_per_q; + } + + return clean_complete; +} + +/** + * idpf_vport_singleq_napi_poll - NAPI handler + * @napi: struct from which you get q_vector + * @budget: budget provided by stack + */ +int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) +{ + struct idpf_q_vector *q_vector = + container_of(napi, struct idpf_q_vector, napi); + bool clean_complete; + int work_done = 0; + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) { + idpf_tx_singleq_clean_all(q_vector, budget, &work_done); + + return budget; + } + + clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, + &work_done); + clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, + &work_done); + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) + return budget; + + work_done = min_t(int, work_done, budget - 1); + + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + idpf_vport_intr_update_itr_ena_irq(q_vector); + + return work_done; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c new file mode 100644 index 000000000000..6fa79898c42c --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -0,0 +1,4289 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" + +/** + * idpf_buf_lifo_push - push a buffer pointer onto stack + * @stack: pointer to stack struct + * @buf: pointer to buf to push + * + * Returns 0 on success, negative on failure + **/ +static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack, + struct idpf_tx_stash *buf) +{ + if (unlikely(stack->top == stack->size)) + return -ENOSPC; + + stack->bufs[stack->top++] = buf; + + return 0; +} + +/** + * idpf_buf_lifo_pop - pop a buffer pointer from stack + * @stack: pointer to stack struct + **/ +static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack) +{ + if (unlikely(!stack->top)) + return NULL; + + return stack->bufs[--stack->top]; +} + +/** + * idpf_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + * @txqueue: TX queue + */ +void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) +{ + struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev); + + adapter->tx_timeout_count++; + + netdev_err(netdev, "Detected Tx timeout: Count %d, Queue %d\n", + adapter->tx_timeout_count, txqueue); + if (!idpf_is_reset_in_prog(adapter)) { + set_bit(IDPF_HR_FUNC_RESET, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, + &adapter->vc_event_task, + msecs_to_jiffies(10)); + } +} + +/** + * idpf_tx_buf_rel - Release a Tx buffer + * @tx_q: the queue that owns the buffer + * @tx_buf: the buffer to free + */ +static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) +{ + if (tx_buf->skb) { + if (dma_unmap_len(tx_buf, len)) + dma_unmap_single(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dev_kfree_skb_any(tx_buf->skb); + } else if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + } + + tx_buf->next_to_watch = NULL; + tx_buf->skb = NULL; + tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + dma_unmap_len_set(tx_buf, len, 0); +} + +/** + * idpf_tx_buf_rel_all - Free any empty Tx buffers + * @txq: queue to be cleaned + */ +static void idpf_tx_buf_rel_all(struct idpf_queue *txq) +{ + u16 i; + + /* Buffers already cleared, nothing to do */ + if (!txq->tx_buf) + return; + + /* Free all the Tx buffer sk_buffs */ + for (i = 0; i < txq->desc_count; i++) + idpf_tx_buf_rel(txq, &txq->tx_buf[i]); + + kfree(txq->tx_buf); + txq->tx_buf = NULL; + + if (!txq->buf_stack.bufs) + return; + + for (i = 0; i < txq->buf_stack.size; i++) + kfree(txq->buf_stack.bufs[i]); + + kfree(txq->buf_stack.bufs); + txq->buf_stack.bufs = NULL; +} + +/** + * idpf_tx_desc_rel - Free Tx resources per queue + * @txq: Tx descriptor ring for a specific queue + * @bufq: buffer q or completion q + * + * Free all transmit software resources + */ +static void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq) +{ + if (bufq) + idpf_tx_buf_rel_all(txq); + + if (!txq->desc_ring) + return; + + dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); + txq->desc_ring = NULL; + txq->next_to_alloc = 0; + txq->next_to_use = 0; + txq->next_to_clean = 0; +} + +/** + * idpf_tx_desc_rel_all - Free Tx Resources for All Queues + * @vport: virtual port structure + * + * Free all transmit software resources + */ +static void idpf_tx_desc_rel_all(struct idpf_vport *vport) +{ + int i, j; + + if (!vport->txq_grps) + return; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (j = 0; j < txq_grp->num_txq; j++) + idpf_tx_desc_rel(txq_grp->txqs[j], true); + + if (idpf_is_queue_model_split(vport->txq_model)) + idpf_tx_desc_rel(txq_grp->complq, false); + } +} + +/** + * idpf_tx_buf_alloc_all - Allocate memory for all buffer resources + * @tx_q: queue for which the buffers are allocated + * + * Returns 0 on success, negative on failure + */ +static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) +{ + int buf_size; + int i; + + /* Allocate book keeping buffers only. Buffers to be supplied to HW + * are allocated by kernel network stack and received as part of skb + */ + buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count; + tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL); + if (!tx_q->tx_buf) + return -ENOMEM; + + /* Initialize tx_bufs with invalid completion tags */ + for (i = 0; i < tx_q->desc_count; i++) + tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + /* Initialize tx buf stack for out-of-order completions if + * flow scheduling offload is enabled + */ + tx_q->buf_stack.bufs = + kcalloc(tx_q->desc_count, sizeof(struct idpf_tx_stash *), + GFP_KERNEL); + if (!tx_q->buf_stack.bufs) + return -ENOMEM; + + tx_q->buf_stack.size = tx_q->desc_count; + tx_q->buf_stack.top = tx_q->desc_count; + + for (i = 0; i < tx_q->desc_count; i++) { + tx_q->buf_stack.bufs[i] = kzalloc(sizeof(*tx_q->buf_stack.bufs[i]), + GFP_KERNEL); + if (!tx_q->buf_stack.bufs[i]) + return -ENOMEM; + } + + return 0; +} + +/** + * idpf_tx_desc_alloc - Allocate the Tx descriptors + * @tx_q: the tx ring to set up + * @bufq: buffer or completion queue + * + * Returns 0 on success, negative on failure + */ +static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) +{ + struct device *dev = tx_q->dev; + u32 desc_sz; + int err; + + if (bufq) { + err = idpf_tx_buf_alloc_all(tx_q); + if (err) + goto err_alloc; + + desc_sz = sizeof(struct idpf_base_tx_desc); + } else { + desc_sz = sizeof(struct idpf_splitq_tx_compl_desc); + } + + tx_q->size = tx_q->desc_count * desc_sz; + + /* Allocate descriptors also round up to nearest 4K */ + tx_q->size = ALIGN(tx_q->size, 4096); + tx_q->desc_ring = dmam_alloc_coherent(dev, tx_q->size, &tx_q->dma, + GFP_KERNEL); + if (!tx_q->desc_ring) { + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", + tx_q->size); + err = -ENOMEM; + goto err_alloc; + } + + tx_q->next_to_alloc = 0; + tx_q->next_to_use = 0; + tx_q->next_to_clean = 0; + set_bit(__IDPF_Q_GEN_CHK, tx_q->flags); + + return 0; + +err_alloc: + idpf_tx_desc_rel(tx_q, bufq); + + return err; +} + +/** + * idpf_tx_desc_alloc_all - allocate all queues Tx resources + * @vport: virtual port private structure + * + * Returns 0 on success, negative on failure + */ +static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) +{ + struct device *dev = &vport->adapter->pdev->dev; + int err = 0; + int i, j; + + /* Setup buffer queues. In single queue model buffer queues and + * completion queues will be same + */ + for (i = 0; i < vport->num_txq_grp; i++) { + for (j = 0; j < vport->txq_grps[i].num_txq; j++) { + struct idpf_queue *txq = vport->txq_grps[i].txqs[j]; + u8 gen_bits = 0; + u16 bufidx_mask; + + err = idpf_tx_desc_alloc(txq, true); + if (err) { + dev_err(dev, "Allocation for Tx Queue %u failed\n", + i); + goto err_out; + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + txq->compl_tag_cur_gen = 0; + + /* Determine the number of bits in the bufid + * mask and add one to get the start of the + * generation bits + */ + bufidx_mask = txq->desc_count - 1; + while (bufidx_mask >> 1) { + txq->compl_tag_gen_s++; + bufidx_mask = bufidx_mask >> 1; + } + txq->compl_tag_gen_s++; + + gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH - + txq->compl_tag_gen_s; + txq->compl_tag_gen_max = GETMAXVAL(gen_bits); + + /* Set bufid mask based on location of first + * gen bit; it cannot simply be the descriptor + * ring size-1 since we can have size values + * where not all of those bits are set. + */ + txq->compl_tag_bufid_m = + GETMAXVAL(txq->compl_tag_gen_s); + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + /* Setup completion queues */ + err = idpf_tx_desc_alloc(vport->txq_grps[i].complq, false); + if (err) { + dev_err(dev, "Allocation for Tx Completion Queue %u failed\n", + i); + goto err_out; + } + } + +err_out: + if (err) + idpf_tx_desc_rel_all(vport); + + return err; +} + +/** + * idpf_rx_page_rel - Release an rx buffer page + * @rxq: the queue that owns the buffer + * @rx_buf: the buffer to free + */ +static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf) +{ + if (unlikely(!rx_buf->page)) + return; + + page_pool_put_full_page(rxq->pp, rx_buf->page, false); + + rx_buf->page = NULL; + rx_buf->page_offset = 0; +} + +/** + * idpf_rx_hdr_buf_rel_all - Release header buffer memory + * @rxq: queue to use + */ +static void idpf_rx_hdr_buf_rel_all(struct idpf_queue *rxq) +{ + struct idpf_adapter *adapter = rxq->vport->adapter; + + dma_free_coherent(&adapter->pdev->dev, + rxq->desc_count * IDPF_HDR_BUF_SIZE, + rxq->rx_buf.hdr_buf_va, + rxq->rx_buf.hdr_buf_pa); + rxq->rx_buf.hdr_buf_va = NULL; +} + +/** + * idpf_rx_buf_rel_all - Free all Rx buffer resources for a queue + * @rxq: queue to be cleaned + */ +static void idpf_rx_buf_rel_all(struct idpf_queue *rxq) +{ + u16 i; + + /* queue already cleared, nothing to do */ + if (!rxq->rx_buf.buf) + return; + + /* Free all the bufs allocated and given to hw on Rx queue */ + for (i = 0; i < rxq->desc_count; i++) + idpf_rx_page_rel(rxq, &rxq->rx_buf.buf[i]); + + if (rxq->rx_hsplit_en) + idpf_rx_hdr_buf_rel_all(rxq); + + page_pool_destroy(rxq->pp); + rxq->pp = NULL; + + kfree(rxq->rx_buf.buf); + rxq->rx_buf.buf = NULL; +} + +/** + * idpf_rx_desc_rel - Free a specific Rx q resources + * @rxq: queue to clean the resources from + * @bufq: buffer q or completion q + * @q_model: single or split q model + * + * Free a specific rx queue resources + */ +static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) +{ + if (!rxq) + return; + + if (!bufq && idpf_is_queue_model_split(q_model) && rxq->skb) { + dev_kfree_skb_any(rxq->skb); + rxq->skb = NULL; + } + + if (bufq || !idpf_is_queue_model_split(q_model)) + idpf_rx_buf_rel_all(rxq); + + rxq->next_to_alloc = 0; + rxq->next_to_clean = 0; + rxq->next_to_use = 0; + if (!rxq->desc_ring) + return; + + dmam_free_coherent(rxq->dev, rxq->size, rxq->desc_ring, rxq->dma); + rxq->desc_ring = NULL; +} + +/** + * idpf_rx_desc_rel_all - Free Rx Resources for All Queues + * @vport: virtual port structure + * + * Free all rx queues resources + */ +static void idpf_rx_desc_rel_all(struct idpf_vport *vport) +{ + struct idpf_rxq_group *rx_qgrp; + u16 num_rxq; + int i, j; + + if (!vport->rxq_grps) + return; + + for (i = 0; i < vport->num_rxq_grp; i++) { + rx_qgrp = &vport->rxq_grps[i]; + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) + idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j], + false, vport->rxq_model); + continue; + } + + num_rxq = rx_qgrp->splitq.num_rxq_sets; + for (j = 0; j < num_rxq; j++) + idpf_rx_desc_rel(&rx_qgrp->splitq.rxq_sets[j]->rxq, + false, vport->rxq_model); + + if (!rx_qgrp->splitq.bufq_sets) + continue; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_bufq_set *bufq_set = + &rx_qgrp->splitq.bufq_sets[j]; + + idpf_rx_desc_rel(&bufq_set->bufq, true, + vport->rxq_model); + } + } +} + +/** + * idpf_rx_buf_hw_update - Store the new tail and head values + * @rxq: queue to bump + * @val: new head index + */ +void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val) +{ + rxq->next_to_use = val; + + if (unlikely(!rxq->tail)) + return; + + /* writel has an implicit memory barrier */ + writel(val, rxq->tail); +} + +/** + * idpf_rx_hdr_buf_alloc_all - Allocate memory for header buffers + * @rxq: ring to use + * + * Returns 0 on success, negative on failure. + */ +static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq) +{ + struct idpf_adapter *adapter = rxq->vport->adapter; + + rxq->rx_buf.hdr_buf_va = + dma_alloc_coherent(&adapter->pdev->dev, + IDPF_HDR_BUF_SIZE * rxq->desc_count, + &rxq->rx_buf.hdr_buf_pa, + GFP_KERNEL); + if (!rxq->rx_buf.hdr_buf_va) + return -ENOMEM; + + return 0; +} + +/** + * idpf_rx_post_buf_refill - Post buffer id to refill queue + * @refillq: refill queue to post to + * @buf_id: buffer id to post + */ +static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) +{ + u16 nta = refillq->next_to_alloc; + + /* store the buffer ID and the SW maintained GEN bit to the refillq */ + refillq->ring[nta] = + ((buf_id << IDPF_RX_BI_BUFID_S) & IDPF_RX_BI_BUFID_M) | + (!!(test_bit(__IDPF_Q_GEN_CHK, refillq->flags)) << + IDPF_RX_BI_GEN_S); + + if (unlikely(++nta == refillq->desc_count)) { + nta = 0; + change_bit(__IDPF_Q_GEN_CHK, refillq->flags); + } + refillq->next_to_alloc = nta; +} + +/** + * idpf_rx_post_buf_desc - Post buffer to bufq descriptor ring + * @bufq: buffer queue to post to + * @buf_id: buffer id to post + * + * Returns false if buffer could not be allocated, true otherwise. + */ +static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) +{ + struct virtchnl2_splitq_rx_buf_desc *splitq_rx_desc = NULL; + u16 nta = bufq->next_to_alloc; + struct idpf_rx_buf *buf; + dma_addr_t addr; + + splitq_rx_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, nta); + buf = &bufq->rx_buf.buf[buf_id]; + + if (bufq->rx_hsplit_en) { + splitq_rx_desc->hdr_addr = + cpu_to_le64(bufq->rx_buf.hdr_buf_pa + + (u32)buf_id * IDPF_HDR_BUF_SIZE); + } + + addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size); + if (unlikely(addr == DMA_MAPPING_ERROR)) + return false; + + splitq_rx_desc->pkt_addr = cpu_to_le64(addr); + splitq_rx_desc->qword0.buf_id = cpu_to_le16(buf_id); + + nta++; + if (unlikely(nta == bufq->desc_count)) + nta = 0; + bufq->next_to_alloc = nta; + + return true; +} + +/** + * idpf_rx_post_init_bufs - Post initial buffers to bufq + * @bufq: buffer queue to post working set to + * @working_set: number of buffers to put in working set + * + * Returns true if @working_set bufs were posted successfully, false otherwise. + */ +static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set) +{ + int i; + + for (i = 0; i < working_set; i++) { + if (!idpf_rx_post_buf_desc(bufq, i)) + return false; + } + + idpf_rx_buf_hw_update(bufq, + bufq->next_to_alloc & ~(bufq->rx_buf_stride - 1)); + + return true; +} + +/** + * idpf_rx_create_page_pool - Create a page pool + * @rxbufq: RX queue to create page pool for + * + * Returns &page_pool on success, casted -errno on failure + */ +static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq) +{ + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, + .pool_size = rxbufq->desc_count, + .nid = NUMA_NO_NODE, + .dev = rxbufq->vport->netdev->dev.parent, + .max_len = PAGE_SIZE, + .dma_dir = DMA_FROM_DEVICE, + .offset = 0, + }; + + if (rxbufq->rx_buf_size == IDPF_RX_BUF_2048) + pp.flags |= PP_FLAG_PAGE_FRAG; + + return page_pool_create(&pp); +} + +/** + * idpf_rx_buf_alloc_all - Allocate memory for all buffer resources + * @rxbufq: queue for which the buffers are allocated; equivalent to + * rxq when operating in singleq mode + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq) +{ + int err = 0; + + /* Allocate book keeping buffers */ + rxbufq->rx_buf.buf = kcalloc(rxbufq->desc_count, + sizeof(struct idpf_rx_buf), GFP_KERNEL); + if (!rxbufq->rx_buf.buf) { + err = -ENOMEM; + goto rx_buf_alloc_all_out; + } + + if (rxbufq->rx_hsplit_en) { + err = idpf_rx_hdr_buf_alloc_all(rxbufq); + if (err) + goto rx_buf_alloc_all_out; + } + + /* Allocate buffers to be given to HW. */ + if (idpf_is_queue_model_split(rxbufq->vport->rxq_model)) { + int working_set = IDPF_RX_BUFQ_WORKING_SET(rxbufq); + + if (!idpf_rx_post_init_bufs(rxbufq, working_set)) + err = -ENOMEM; + } else { + if (idpf_rx_singleq_buf_hw_alloc_all(rxbufq, + rxbufq->desc_count - 1)) + err = -ENOMEM; + } + +rx_buf_alloc_all_out: + if (err) + idpf_rx_buf_rel_all(rxbufq); + + return err; +} + +/** + * idpf_rx_bufs_init - Initialize page pool, allocate rx bufs, and post to HW + * @rxbufq: RX queue to create page pool for + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_bufs_init(struct idpf_queue *rxbufq) +{ + struct page_pool *pool; + + pool = idpf_rx_create_page_pool(rxbufq); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + rxbufq->pp = pool; + + return idpf_rx_buf_alloc_all(rxbufq); +} + +/** + * idpf_rx_bufs_init_all - Initialize all RX bufs + * @vport: virtual port struct + * + * Returns 0 on success, negative on failure + */ +int idpf_rx_bufs_init_all(struct idpf_vport *vport) +{ + struct idpf_rxq_group *rx_qgrp; + struct idpf_queue *q; + int i, j, err; + + for (i = 0; i < vport->num_rxq_grp; i++) { + rx_qgrp = &vport->rxq_grps[i]; + + /* Allocate bufs for the rxq itself in singleq */ + if (!idpf_is_queue_model_split(vport->rxq_model)) { + int num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + q = rx_qgrp->singleq.rxqs[j]; + err = idpf_rx_bufs_init(q); + if (err) + return err; + } + + continue; + } + + /* Otherwise, allocate bufs for the buffer queues */ + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + err = idpf_rx_bufs_init(q); + if (err) + return err; + } + } + + return 0; +} + +/** + * idpf_rx_desc_alloc - Allocate queue Rx resources + * @rxq: Rx queue for which the resources are setup + * @bufq: buffer or completion queue + * @q_model: single or split queue model + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) +{ + struct device *dev = rxq->dev; + + if (bufq) + rxq->size = rxq->desc_count * + sizeof(struct virtchnl2_splitq_rx_buf_desc); + else + rxq->size = rxq->desc_count * + sizeof(union virtchnl2_rx_desc); + + /* Allocate descriptors and also round up to nearest 4K */ + rxq->size = ALIGN(rxq->size, 4096); + rxq->desc_ring = dmam_alloc_coherent(dev, rxq->size, + &rxq->dma, GFP_KERNEL); + if (!rxq->desc_ring) { + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", + rxq->size); + return -ENOMEM; + } + + rxq->next_to_alloc = 0; + rxq->next_to_clean = 0; + rxq->next_to_use = 0; + set_bit(__IDPF_Q_GEN_CHK, rxq->flags); + + return 0; +} + +/** + * idpf_rx_desc_alloc_all - allocate all RX queues resources + * @vport: virtual port structure + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) +{ + struct device *dev = &vport->adapter->pdev->dev; + struct idpf_rxq_group *rx_qgrp; + struct idpf_queue *q; + int i, j, err; + u16 num_rxq; + + for (i = 0; i < vport->num_rxq_grp; i++) { + rx_qgrp = &vport->rxq_grps[i]; + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + err = idpf_rx_desc_alloc(q, false, vport->rxq_model); + if (err) { + dev_err(dev, "Memory allocation for Rx Queue %u failed\n", + i); + goto err_out; + } + } + + if (!idpf_is_queue_model_split(vport->rxq_model)) + continue; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + err = idpf_rx_desc_alloc(q, true, vport->rxq_model); + if (err) { + dev_err(dev, "Memory allocation for Rx Buffer Queue %u failed\n", + i); + goto err_out; + } + } + } + + return 0; + +err_out: + idpf_rx_desc_rel_all(vport); + + return err; +} + +/** + * idpf_txq_group_rel - Release all resources for txq groups + * @vport: vport to release txq groups on + */ +static void idpf_txq_group_rel(struct idpf_vport *vport) +{ + int i, j; + + if (!vport->txq_grps) + return; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (j = 0; j < txq_grp->num_txq; j++) { + kfree(txq_grp->txqs[j]); + txq_grp->txqs[j] = NULL; + } + kfree(txq_grp->complq); + txq_grp->complq = NULL; + } + kfree(vport->txq_grps); + vport->txq_grps = NULL; +} + +/** + * idpf_rxq_sw_queue_rel - Release software queue resources + * @rx_qgrp: rx queue group with software queues + */ +static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp) +{ + int i, j; + + for (i = 0; i < rx_qgrp->vport->num_bufqs_per_qgrp; i++) { + struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i]; + + for (j = 0; j < bufq_set->num_refillqs; j++) { + kfree(bufq_set->refillqs[j].ring); + bufq_set->refillqs[j].ring = NULL; + } + kfree(bufq_set->refillqs); + bufq_set->refillqs = NULL; + } +} + +/** + * idpf_rxq_group_rel - Release all resources for rxq groups + * @vport: vport to release rxq groups on + */ +static void idpf_rxq_group_rel(struct idpf_vport *vport) +{ + int i; + + if (!vport->rxq_grps) + return; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + int j; + + if (idpf_is_queue_model_split(vport->rxq_model)) { + num_rxq = rx_qgrp->splitq.num_rxq_sets; + for (j = 0; j < num_rxq; j++) { + kfree(rx_qgrp->splitq.rxq_sets[j]); + rx_qgrp->splitq.rxq_sets[j] = NULL; + } + + idpf_rxq_sw_queue_rel(rx_qgrp); + kfree(rx_qgrp->splitq.bufq_sets); + rx_qgrp->splitq.bufq_sets = NULL; + } else { + num_rxq = rx_qgrp->singleq.num_rxq; + for (j = 0; j < num_rxq; j++) { + kfree(rx_qgrp->singleq.rxqs[j]); + rx_qgrp->singleq.rxqs[j] = NULL; + } + } + } + kfree(vport->rxq_grps); + vport->rxq_grps = NULL; +} + +/** + * idpf_vport_queue_grp_rel_all - Release all queue groups + * @vport: vport to release queue groups for + */ +static void idpf_vport_queue_grp_rel_all(struct idpf_vport *vport) +{ + idpf_txq_group_rel(vport); + idpf_rxq_group_rel(vport); +} + +/** + * idpf_vport_queues_rel - Free memory for all queues + * @vport: virtual port + * + * Free the memory allocated for queues associated to a vport + */ +void idpf_vport_queues_rel(struct idpf_vport *vport) +{ + idpf_tx_desc_rel_all(vport); + idpf_rx_desc_rel_all(vport); + idpf_vport_queue_grp_rel_all(vport); + + kfree(vport->txqs); + vport->txqs = NULL; +} + +/** + * idpf_vport_init_fast_path_txqs - Initialize fast path txq array + * @vport: vport to init txqs on + * + * We get a queue index from skb->queue_mapping and we need a fast way to + * dereference the queue from queue groups. This allows us to quickly pull a + * txq based on a queue index. + * + * Returns 0 on success, negative on failure + */ +static int idpf_vport_init_fast_path_txqs(struct idpf_vport *vport) +{ + int i, j, k = 0; + + vport->txqs = kcalloc(vport->num_txq, sizeof(struct idpf_queue *), + GFP_KERNEL); + + if (!vport->txqs) + return -ENOMEM; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_grp = &vport->txq_grps[i]; + + for (j = 0; j < tx_grp->num_txq; j++, k++) { + vport->txqs[k] = tx_grp->txqs[j]; + vport->txqs[k]->idx = k; + } + } + + return 0; +} + +/** + * idpf_vport_init_num_qs - Initialize number of queues + * @vport: vport to initialize queues + * @vport_msg: data to be filled into vport + */ +void idpf_vport_init_num_qs(struct idpf_vport *vport, + struct virtchnl2_create_vport *vport_msg) +{ + struct idpf_vport_user_config_data *config_data; + u16 idx = vport->idx; + + config_data = &vport->adapter->vport_config[idx]->user_config; + vport->num_txq = le16_to_cpu(vport_msg->num_tx_q); + vport->num_rxq = le16_to_cpu(vport_msg->num_rx_q); + /* number of txqs and rxqs in config data will be zeros only in the + * driver load path and we dont update them there after + */ + if (!config_data->num_req_tx_qs && !config_data->num_req_rx_qs) { + config_data->num_req_tx_qs = le16_to_cpu(vport_msg->num_tx_q); + config_data->num_req_rx_qs = le16_to_cpu(vport_msg->num_rx_q); + } + + if (idpf_is_queue_model_split(vport->txq_model)) + vport->num_complq = le16_to_cpu(vport_msg->num_tx_complq); + if (idpf_is_queue_model_split(vport->rxq_model)) + vport->num_bufq = le16_to_cpu(vport_msg->num_rx_bufq); + + /* Adjust number of buffer queues per Rx queue group. */ + if (!idpf_is_queue_model_split(vport->rxq_model)) { + vport->num_bufqs_per_qgrp = 0; + vport->bufq_size[0] = IDPF_RX_BUF_2048; + + return; + } + + vport->num_bufqs_per_qgrp = IDPF_MAX_BUFQS_PER_RXQ_GRP; + /* Bufq[0] default buffer size is 4K + * Bufq[1] default buffer size is 2K + */ + vport->bufq_size[0] = IDPF_RX_BUF_4096; + vport->bufq_size[1] = IDPF_RX_BUF_2048; +} + +/** + * idpf_vport_calc_num_q_desc - Calculate number of queue groups + * @vport: vport to calculate q groups for + */ +void idpf_vport_calc_num_q_desc(struct idpf_vport *vport) +{ + struct idpf_vport_user_config_data *config_data; + int num_bufqs = vport->num_bufqs_per_qgrp; + u32 num_req_txq_desc, num_req_rxq_desc; + u16 idx = vport->idx; + int i; + + config_data = &vport->adapter->vport_config[idx]->user_config; + num_req_txq_desc = config_data->num_req_txq_desc; + num_req_rxq_desc = config_data->num_req_rxq_desc; + + vport->complq_desc_count = 0; + if (num_req_txq_desc) { + vport->txq_desc_count = num_req_txq_desc; + if (idpf_is_queue_model_split(vport->txq_model)) { + vport->complq_desc_count = num_req_txq_desc; + if (vport->complq_desc_count < IDPF_MIN_TXQ_COMPLQ_DESC) + vport->complq_desc_count = + IDPF_MIN_TXQ_COMPLQ_DESC; + } + } else { + vport->txq_desc_count = IDPF_DFLT_TX_Q_DESC_COUNT; + if (idpf_is_queue_model_split(vport->txq_model)) + vport->complq_desc_count = + IDPF_DFLT_TX_COMPLQ_DESC_COUNT; + } + + if (num_req_rxq_desc) + vport->rxq_desc_count = num_req_rxq_desc; + else + vport->rxq_desc_count = IDPF_DFLT_RX_Q_DESC_COUNT; + + for (i = 0; i < num_bufqs; i++) { + if (!vport->bufq_desc_count[i]) + vport->bufq_desc_count[i] = + IDPF_RX_BUFQ_DESC_COUNT(vport->rxq_desc_count, + num_bufqs); + } +} + +/** + * idpf_vport_calc_total_qs - Calculate total number of queues + * @adapter: private data struct + * @vport_idx: vport idx to retrieve vport pointer + * @vport_msg: message to fill with data + * @max_q: vport max queue info + * + * Return 0 on success, error value on failure. + */ +int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx, + struct virtchnl2_create_vport *vport_msg, + struct idpf_vport_max_q *max_q) +{ + int dflt_splitq_txq_grps = 0, dflt_singleq_txqs = 0; + int dflt_splitq_rxq_grps = 0, dflt_singleq_rxqs = 0; + u16 num_req_tx_qs = 0, num_req_rx_qs = 0; + struct idpf_vport_config *vport_config; + u16 num_txq_grps, num_rxq_grps; + u32 num_qs; + + vport_config = adapter->vport_config[vport_idx]; + if (vport_config) { + num_req_tx_qs = vport_config->user_config.num_req_tx_qs; + num_req_rx_qs = vport_config->user_config.num_req_rx_qs; + } else { + int num_cpus; + + /* Restrict num of queues to cpus online as a default + * configuration to give best performance. User can always + * override to a max number of queues via ethtool. + */ + num_cpus = num_online_cpus(); + + dflt_splitq_txq_grps = min_t(int, max_q->max_txq, num_cpus); + dflt_singleq_txqs = min_t(int, max_q->max_txq, num_cpus); + dflt_splitq_rxq_grps = min_t(int, max_q->max_rxq, num_cpus); + dflt_singleq_rxqs = min_t(int, max_q->max_rxq, num_cpus); + } + + if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->txq_model))) { + num_txq_grps = num_req_tx_qs ? num_req_tx_qs : dflt_splitq_txq_grps; + vport_msg->num_tx_complq = cpu_to_le16(num_txq_grps * + IDPF_COMPLQ_PER_GROUP); + vport_msg->num_tx_q = cpu_to_le16(num_txq_grps * + IDPF_DFLT_SPLITQ_TXQ_PER_GROUP); + } else { + num_txq_grps = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS; + num_qs = num_txq_grps * (num_req_tx_qs ? num_req_tx_qs : + dflt_singleq_txqs); + vport_msg->num_tx_q = cpu_to_le16(num_qs); + vport_msg->num_tx_complq = 0; + } + if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->rxq_model))) { + num_rxq_grps = num_req_rx_qs ? num_req_rx_qs : dflt_splitq_rxq_grps; + vport_msg->num_rx_bufq = cpu_to_le16(num_rxq_grps * + IDPF_MAX_BUFQS_PER_RXQ_GRP); + vport_msg->num_rx_q = cpu_to_le16(num_rxq_grps * + IDPF_DFLT_SPLITQ_RXQ_PER_GROUP); + } else { + num_rxq_grps = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS; + num_qs = num_rxq_grps * (num_req_rx_qs ? num_req_rx_qs : + dflt_singleq_rxqs); + vport_msg->num_rx_q = cpu_to_le16(num_qs); + vport_msg->num_rx_bufq = 0; + } + + return 0; +} + +/** + * idpf_vport_calc_num_q_groups - Calculate number of queue groups + * @vport: vport to calculate q groups for + */ +void idpf_vport_calc_num_q_groups(struct idpf_vport *vport) +{ + if (idpf_is_queue_model_split(vport->txq_model)) + vport->num_txq_grp = vport->num_txq; + else + vport->num_txq_grp = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS; + + if (idpf_is_queue_model_split(vport->rxq_model)) + vport->num_rxq_grp = vport->num_rxq; + else + vport->num_rxq_grp = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS; +} + +/** + * idpf_vport_calc_numq_per_grp - Calculate number of queues per group + * @vport: vport to calculate queues for + * @num_txq: return parameter for number of TX queues + * @num_rxq: return parameter for number of RX queues + */ +static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, + u16 *num_txq, u16 *num_rxq) +{ + if (idpf_is_queue_model_split(vport->txq_model)) + *num_txq = IDPF_DFLT_SPLITQ_TXQ_PER_GROUP; + else + *num_txq = vport->num_txq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + *num_rxq = IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + else + *num_rxq = vport->num_rxq; +} + +/** + * idpf_rxq_set_descids - set the descids supported by this queue + * @vport: virtual port data structure + * @q: rx queue for which descids are set + * + */ +static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q) +{ + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; + } else { + if (vport->base_rxd) + q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M; + else + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M; + } +} + +/** + * idpf_txq_group_alloc - Allocate all txq group resources + * @vport: vport to allocate txq groups for + * @num_txq: number of txqs to allocate for each group + * + * Returns 0 on success, negative on failure + */ +static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) +{ + int err, i; + + vport->txq_grps = kcalloc(vport->num_txq_grp, + sizeof(*vport->txq_grps), GFP_KERNEL); + if (!vport->txq_grps) + return -ENOMEM; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + struct idpf_adapter *adapter = vport->adapter; + int j; + + tx_qgrp->vport = vport; + tx_qgrp->num_txq = num_txq; + + for (j = 0; j < tx_qgrp->num_txq; j++) { + tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]), + GFP_KERNEL); + if (!tx_qgrp->txqs[j]) { + err = -ENOMEM; + goto err_alloc; + } + } + + for (j = 0; j < tx_qgrp->num_txq; j++) { + struct idpf_queue *q = tx_qgrp->txqs[j]; + + q->dev = &adapter->pdev->dev; + q->desc_count = vport->txq_desc_count; + q->tx_max_bufs = idpf_get_max_tx_bufs(adapter); + q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter); + q->vport = vport; + q->txq_grp = tx_qgrp; + hash_init(q->sched_buf_hash); + + if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_SPLITQ_QSCHED)) + set_bit(__IDPF_Q_FLOW_SCH_EN, q->flags); + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + tx_qgrp->complq = kcalloc(IDPF_COMPLQ_PER_GROUP, + sizeof(*tx_qgrp->complq), + GFP_KERNEL); + if (!tx_qgrp->complq) { + err = -ENOMEM; + goto err_alloc; + } + + tx_qgrp->complq->dev = &adapter->pdev->dev; + tx_qgrp->complq->desc_count = vport->complq_desc_count; + tx_qgrp->complq->vport = vport; + tx_qgrp->complq->txq_grp = tx_qgrp; + } + + return 0; + +err_alloc: + idpf_txq_group_rel(vport); + + return err; +} + +/** + * idpf_rxq_group_alloc - Allocate all rxq group resources + * @vport: vport to allocate rxq groups for + * @num_rxq: number of rxqs to allocate for each group + * + * Returns 0 on success, negative on failure + */ +static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_queue *q; + int i, k, err = 0; + + vport->rxq_grps = kcalloc(vport->num_rxq_grp, + sizeof(struct idpf_rxq_group), GFP_KERNEL); + if (!vport->rxq_grps) + return -ENOMEM; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + int j; + + rx_qgrp->vport = vport; + if (!idpf_is_queue_model_split(vport->rxq_model)) { + rx_qgrp->singleq.num_rxq = num_rxq; + for (j = 0; j < num_rxq; j++) { + rx_qgrp->singleq.rxqs[j] = + kzalloc(sizeof(*rx_qgrp->singleq.rxqs[j]), + GFP_KERNEL); + if (!rx_qgrp->singleq.rxqs[j]) { + err = -ENOMEM; + goto err_alloc; + } + } + goto skip_splitq_rx_init; + } + rx_qgrp->splitq.num_rxq_sets = num_rxq; + + for (j = 0; j < num_rxq; j++) { + rx_qgrp->splitq.rxq_sets[j] = + kzalloc(sizeof(struct idpf_rxq_set), + GFP_KERNEL); + if (!rx_qgrp->splitq.rxq_sets[j]) { + err = -ENOMEM; + goto err_alloc; + } + } + + rx_qgrp->splitq.bufq_sets = kcalloc(vport->num_bufqs_per_qgrp, + sizeof(struct idpf_bufq_set), + GFP_KERNEL); + if (!rx_qgrp->splitq.bufq_sets) { + err = -ENOMEM; + goto err_alloc; + } + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_bufq_set *bufq_set = + &rx_qgrp->splitq.bufq_sets[j]; + int swq_size = sizeof(struct idpf_sw_queue); + + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->dev = &adapter->pdev->dev; + q->desc_count = vport->bufq_desc_count[j]; + q->vport = vport; + q->rxq_grp = rx_qgrp; + q->idx = j; + q->rx_buf_size = vport->bufq_size[j]; + q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; + q->rx_buf_stride = IDPF_RX_BUF_STRIDE; + if (idpf_is_cap_ena_all(adapter, IDPF_HSPLIT_CAPS, + IDPF_CAP_HSPLIT) && + idpf_is_queue_model_split(vport->rxq_model)) { + q->rx_hsplit_en = true; + q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; + } + + bufq_set->num_refillqs = num_rxq; + bufq_set->refillqs = kcalloc(num_rxq, swq_size, + GFP_KERNEL); + if (!bufq_set->refillqs) { + err = -ENOMEM; + goto err_alloc; + } + for (k = 0; k < bufq_set->num_refillqs; k++) { + struct idpf_sw_queue *refillq = + &bufq_set->refillqs[k]; + + refillq->dev = &vport->adapter->pdev->dev; + refillq->desc_count = + vport->bufq_desc_count[j]; + set_bit(__IDPF_Q_GEN_CHK, refillq->flags); + set_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); + refillq->ring = kcalloc(refillq->desc_count, + sizeof(u16), + GFP_KERNEL); + if (!refillq->ring) { + err = -ENOMEM; + goto err_alloc; + } + } + } + +skip_splitq_rx_init: + for (j = 0; j < num_rxq; j++) { + if (!idpf_is_queue_model_split(vport->rxq_model)) { + q = rx_qgrp->singleq.rxqs[j]; + goto setup_rxq; + } + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + rx_qgrp->splitq.rxq_sets[j]->refillq0 = + &rx_qgrp->splitq.bufq_sets[0].refillqs[j]; + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) + rx_qgrp->splitq.rxq_sets[j]->refillq1 = + &rx_qgrp->splitq.bufq_sets[1].refillqs[j]; + + if (idpf_is_cap_ena_all(adapter, IDPF_HSPLIT_CAPS, + IDPF_CAP_HSPLIT) && + idpf_is_queue_model_split(vport->rxq_model)) { + q->rx_hsplit_en = true; + q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; + } + +setup_rxq: + q->dev = &adapter->pdev->dev; + q->desc_count = vport->rxq_desc_count; + q->vport = vport; + q->rxq_grp = rx_qgrp; + q->idx = (i * num_rxq) + j; + /* In splitq mode, RXQ buffer size should be + * set to that of the first buffer queue + * associated with this RXQ + */ + q->rx_buf_size = vport->bufq_size[0]; + q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; + q->rx_max_pkt_size = vport->netdev->mtu + + IDPF_PACKET_HDR_PAD; + idpf_rxq_set_descids(vport, q); + } + } + +err_alloc: + if (err) + idpf_rxq_group_rel(vport); + + return err; +} + +/** + * idpf_vport_queue_grp_alloc_all - Allocate all queue groups/resources + * @vport: vport with qgrps to allocate + * + * Returns 0 on success, negative on failure + */ +static int idpf_vport_queue_grp_alloc_all(struct idpf_vport *vport) +{ + u16 num_txq, num_rxq; + int err; + + idpf_vport_calc_numq_per_grp(vport, &num_txq, &num_rxq); + + err = idpf_txq_group_alloc(vport, num_txq); + if (err) + goto err_out; + + err = idpf_rxq_group_alloc(vport, num_rxq); + if (err) + goto err_out; + + return 0; + +err_out: + idpf_vport_queue_grp_rel_all(vport); + + return err; +} + +/** + * idpf_vport_queues_alloc - Allocate memory for all queues + * @vport: virtual port + * + * Allocate memory for queues associated with a vport. Returns 0 on success, + * negative on failure. + */ +int idpf_vport_queues_alloc(struct idpf_vport *vport) +{ + int err; + + err = idpf_vport_queue_grp_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_tx_desc_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_rx_desc_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_vport_init_fast_path_txqs(vport); + if (err) + goto err_out; + + return 0; + +err_out: + idpf_vport_queues_rel(vport); + + return err; +} + +/** + * idpf_tx_handle_sw_marker - Handle queue marker packet + * @tx_q: tx queue to handle software marker + */ +static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) +{ + struct idpf_vport *vport = tx_q->vport; + int i; + + clear_bit(__IDPF_Q_SW_MARKER, tx_q->flags); + /* Hardware must write marker packets to all queues associated with + * completion queues. So check if all queues received marker packets + */ + for (i = 0; i < vport->num_txq; i++) + /* If we're still waiting on any other TXQ marker completions, + * just return now since we cannot wake up the marker_wq yet. + */ + if (test_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags)) + return; + + /* Drain complete */ + set_bit(IDPF_VPORT_SW_MARKER, vport->flags); + wake_up(&vport->sw_marker_wq); +} + +/** + * idpf_tx_splitq_clean_hdr - Clean TX buffer resources for header portion of + * packet + * @tx_q: tx queue to clean buffer from + * @tx_buf: buffer to be cleaned + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @napi_budget: Used to determine if we are in netpoll + */ +static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q, + struct idpf_tx_buf *tx_buf, + struct idpf_cleaned_stats *cleaned, + int napi_budget) +{ + napi_consume_skb(tx_buf->skb, napi_budget); + + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_single(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + dma_unmap_len_set(tx_buf, len, 0); + } + + /* clear tx_buf data */ + tx_buf->skb = NULL; + + cleaned->bytes += tx_buf->bytecount; + cleaned->packets += tx_buf->gso_segs; +} + +/** + * idpf_tx_clean_stashed_bufs - clean bufs that were stored for + * out of order completions + * @txq: queue to clean + * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @budget: Used to determine if we are in netpoll + */ +static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, + struct idpf_cleaned_stats *cleaned, + int budget) +{ + struct idpf_tx_stash *stash; + struct hlist_node *tmp_buf; + + /* Buffer completion */ + hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf, + hlist, compl_tag) { + if (unlikely(stash->buf.compl_tag != (int)compl_tag)) + continue; + + if (stash->buf.skb) { + idpf_tx_splitq_clean_hdr(txq, &stash->buf, cleaned, + budget); + } else if (dma_unmap_len(&stash->buf, len)) { + dma_unmap_page(txq->dev, + dma_unmap_addr(&stash->buf, dma), + dma_unmap_len(&stash->buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(&stash->buf, len, 0); + } + + /* Push shadow buf back onto stack */ + idpf_buf_lifo_push(&txq->buf_stack, stash); + + hash_del(&stash->hlist); + } +} + +/** + * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a + * later time (only relevant for flow scheduling mode) + * @txq: Tx queue to clean + * @tx_buf: buffer to store + */ +static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, + struct idpf_tx_buf *tx_buf) +{ + struct idpf_tx_stash *stash; + + if (unlikely(!dma_unmap_addr(tx_buf, dma) && + !dma_unmap_len(tx_buf, len))) + return 0; + + stash = idpf_buf_lifo_pop(&txq->buf_stack); + if (unlikely(!stash)) { + net_err_ratelimited("%s: No out-of-order TX buffers left!\n", + txq->vport->netdev->name); + + return -ENOMEM; + } + + /* Store buffer params in shadow buffer */ + stash->buf.skb = tx_buf->skb; + stash->buf.bytecount = tx_buf->bytecount; + stash->buf.gso_segs = tx_buf->gso_segs; + dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); + dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); + stash->buf.compl_tag = tx_buf->compl_tag; + + /* Add buffer to buf_hash table to be freed later */ + hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag); + + memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); + + /* Reinitialize buf_id portion of tag */ + tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + return 0; +} + +#define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ +do { \ + (ntc)++; \ + if (unlikely(!(ntc))) { \ + ntc -= (txq)->desc_count; \ + buf = (txq)->tx_buf; \ + desc = IDPF_FLEX_TX_DESC(txq, 0); \ + } else { \ + (buf)++; \ + (desc)++; \ + } \ +} while (0) + +/** + * idpf_tx_splitq_clean - Reclaim resources from buffer queue + * @tx_q: Tx queue to clean + * @end: queue index until which it should be cleaned + * @napi_budget: Used to determine if we are in netpoll + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @descs_only: true if queue is using flow-based scheduling and should + * not clean buffers at this time + * + * Cleans the queue descriptor ring. If the queue is using queue-based + * scheduling, the buffers will be cleaned as well. If the queue is using + * flow-based scheduling, only the descriptors are cleaned at this time. + * Separate packet completion events will be reported on the completion queue, + * and the buffers will be cleaned separately. The stats are not updated from + * this function when using flow-based scheduling. + */ +static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, + int napi_budget, + struct idpf_cleaned_stats *cleaned, + bool descs_only) +{ + union idpf_tx_flex_desc *next_pending_desc = NULL; + union idpf_tx_flex_desc *tx_desc; + s16 ntc = tx_q->next_to_clean; + struct idpf_tx_buf *tx_buf; + + tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc); + next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end); + tx_buf = &tx_q->tx_buf[ntc]; + ntc -= tx_q->desc_count; + + while (tx_desc != next_pending_desc) { + union idpf_tx_flex_desc *eop_desc; + + /* If this entry in the ring was used as a context descriptor, + * it's corresponding entry in the buffer ring will have an + * invalid completion tag since no buffer was used. We can + * skip this descriptor since there is no buffer to clean. + */ + if (unlikely(tx_buf->compl_tag == IDPF_SPLITQ_TX_INVAL_COMPL_TAG)) + goto fetch_next_txq_desc; + + eop_desc = (union idpf_tx_flex_desc *)tx_buf->next_to_watch; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + if (descs_only) { + if (idpf_stash_flow_sch_buffers(tx_q, tx_buf)) + goto tx_splitq_clean_out; + + while (tx_desc != eop_desc) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); + + if (dma_unmap_len(tx_buf, len)) { + if (idpf_stash_flow_sch_buffers(tx_q, + tx_buf)) + goto tx_splitq_clean_out; + } + } + } else { + idpf_tx_splitq_clean_hdr(tx_q, tx_buf, cleaned, + napi_budget); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + } + +fetch_next_txq_desc: + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); + } + +tx_splitq_clean_out: + ntc += tx_q->desc_count; + tx_q->next_to_clean = ntc; +} + +#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ +do { \ + (buf)++; \ + (ntc)++; \ + if (unlikely((ntc) == (txq)->desc_count)) { \ + buf = (txq)->tx_buf; \ + ntc = 0; \ + } \ +} while (0) + +/** + * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers + * @txq: queue to clean + * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @budget: Used to determine if we are in netpoll + * + * Cleans all buffers associated with the input completion tag either from the + * TX buffer ring or from the hash table if the buffers were previously + * stashed. Returns the byte/segment count for the cleaned packet associated + * this completion tag. + */ +static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, + struct idpf_cleaned_stats *cleaned, + int budget) +{ + u16 idx = compl_tag & txq->compl_tag_bufid_m; + struct idpf_tx_buf *tx_buf = NULL; + u16 ntc = txq->next_to_clean; + u16 num_descs_cleaned = 0; + u16 orig_idx = idx; + + tx_buf = &txq->tx_buf[idx]; + + while (tx_buf->compl_tag == (int)compl_tag) { + if (tx_buf->skb) { + idpf_tx_splitq_clean_hdr(txq, tx_buf, cleaned, budget); + } else if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(txq->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + + memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); + tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + num_descs_cleaned++; + idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + } + + /* If we didn't clean anything on the ring for this completion, there's + * nothing more to do. + */ + if (unlikely(!num_descs_cleaned)) + return false; + + /* Otherwise, if we did clean a packet on the ring directly, it's safe + * to assume that the descriptors starting from the original + * next_to_clean up until the previously cleaned packet can be reused. + * Therefore, we will go back in the ring and stash any buffers still + * in the ring into the hash table to be cleaned later. + */ + tx_buf = &txq->tx_buf[ntc]; + while (tx_buf != &txq->tx_buf[orig_idx]) { + idpf_stash_flow_sch_buffers(txq, tx_buf); + idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); + } + + /* Finally, update next_to_clean to reflect the work that was just done + * on the ring, if any. If the packet was only cleaned from the hash + * table, the ring will not be impacted, therefore we should not touch + * next_to_clean. The updated idx is used here + */ + txq->next_to_clean = idx; + + return true; +} + +/** + * idpf_tx_handle_rs_completion - clean a single packet and all of its buffers + * whether on the buffer ring or in the hash table + * @txq: Tx ring to clean + * @desc: pointer to completion queue descriptor to extract completion + * information from + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @budget: Used to determine if we are in netpoll + * + * Returns bytes/packets cleaned + */ +static void idpf_tx_handle_rs_completion(struct idpf_queue *txq, + struct idpf_splitq_tx_compl_desc *desc, + struct idpf_cleaned_stats *cleaned, + int budget) +{ + u16 compl_tag; + + if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) { + u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); + + return idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + } + + compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); + + /* If we didn't clean anything on the ring, this packet must be + * in the hash table. Go clean it there. + */ + if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget)) + idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); +} + +/** + * idpf_tx_clean_complq - Reclaim resources on completion queue + * @complq: Tx ring to clean + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + int *cleaned) +{ + struct idpf_splitq_tx_compl_desc *tx_desc; + struct idpf_vport *vport = complq->vport; + s16 ntc = complq->next_to_clean; + struct idpf_netdev_priv *np; + unsigned int complq_budget; + bool complq_ok = true; + int i; + + complq_budget = vport->compln_clean_budget; + tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, ntc); + ntc -= complq->desc_count; + + do { + struct idpf_cleaned_stats cleaned_stats = { }; + struct idpf_queue *tx_q; + int rel_tx_qid; + u16 hw_head; + u8 ctype; /* completion type */ + u16 gen; + + /* if the descriptor isn't done, no work yet to do */ + gen = (le16_to_cpu(tx_desc->qid_comptype_gen) & + IDPF_TXD_COMPLQ_GEN_M) >> IDPF_TXD_COMPLQ_GEN_S; + if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen) + break; + + /* Find necessary info of TX queue to clean buffers */ + rel_tx_qid = (le16_to_cpu(tx_desc->qid_comptype_gen) & + IDPF_TXD_COMPLQ_QID_M) >> IDPF_TXD_COMPLQ_QID_S; + if (rel_tx_qid >= complq->txq_grp->num_txq || + !complq->txq_grp->txqs[rel_tx_qid]) { + dev_err(&complq->vport->adapter->pdev->dev, + "TxQ not found\n"); + goto fetch_next_desc; + } + tx_q = complq->txq_grp->txqs[rel_tx_qid]; + + /* Determine completion type */ + ctype = (le16_to_cpu(tx_desc->qid_comptype_gen) & + IDPF_TXD_COMPLQ_COMPL_TYPE_M) >> + IDPF_TXD_COMPLQ_COMPL_TYPE_S; + switch (ctype) { + case IDPF_TXD_COMPLT_RE: + hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head); + + idpf_tx_splitq_clean(tx_q, hw_head, budget, + &cleaned_stats, true); + break; + case IDPF_TXD_COMPLT_RS: + idpf_tx_handle_rs_completion(tx_q, tx_desc, + &cleaned_stats, budget); + break; + case IDPF_TXD_COMPLT_SW_MARKER: + idpf_tx_handle_sw_marker(tx_q); + break; + default: + dev_err(&tx_q->vport->adapter->pdev->dev, + "Unknown TX completion type: %d\n", + ctype); + goto fetch_next_desc; + } + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_add(&tx_q->q_stats.tx.packets, cleaned_stats.packets); + u64_stats_add(&tx_q->q_stats.tx.bytes, cleaned_stats.bytes); + tx_q->cleaned_pkts += cleaned_stats.packets; + tx_q->cleaned_bytes += cleaned_stats.bytes; + complq->num_completions++; + u64_stats_update_end(&tx_q->stats_sync); + +fetch_next_desc: + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= complq->desc_count; + tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, 0); + change_bit(__IDPF_Q_GEN_CHK, complq->flags); + } + + prefetch(tx_desc); + + /* update budget accounting */ + complq_budget--; + } while (likely(complq_budget)); + + /* Store the state of the complq to be used later in deciding if a + * TXQ can be started again + */ + if (unlikely(IDPF_TX_COMPLQ_PENDING(complq->txq_grp) > + IDPF_TX_COMPLQ_OVERFLOW_THRESH(complq))) + complq_ok = false; + + np = netdev_priv(complq->vport->netdev); + for (i = 0; i < complq->txq_grp->num_txq; ++i) { + struct idpf_queue *tx_q = complq->txq_grp->txqs[i]; + struct netdev_queue *nq; + bool dont_wake; + + /* We didn't clean anything on this queue, move along */ + if (!tx_q->cleaned_bytes) + continue; + + *cleaned += tx_q->cleaned_pkts; + + /* Update BQL */ + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + + dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || + np->state != __IDPF_VPORT_UP || + !netif_carrier_ok(tx_q->vport->netdev); + /* Check if the TXQ needs to and can be restarted */ + __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, + dont_wake); + + /* Reset cleaned stats for the next time this queue is + * cleaned + */ + tx_q->cleaned_bytes = 0; + tx_q->cleaned_pkts = 0; + } + + ntc += complq->desc_count; + complq->next_to_clean = ntc; + + return !!complq_budget; +} + +/** + * idpf_tx_splitq_build_ctb - populate command tag and size for queue + * based scheduling descriptors + * @desc: descriptor to populate + * @params: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size) +{ + desc->q.qw1.cmd_dtype = + cpu_to_le16(params->dtype & IDPF_FLEX_TXD_QW1_DTYPE_M); + desc->q.qw1.cmd_dtype |= + cpu_to_le16((td_cmd << IDPF_FLEX_TXD_QW1_CMD_S) & + IDPF_FLEX_TXD_QW1_CMD_M); + desc->q.qw1.buf_size = cpu_to_le16((u16)size); + desc->q.qw1.l2tags.l2tag1 = cpu_to_le16(params->td_tag); +} + +/** + * idpf_tx_splitq_build_flow_desc - populate command tag and size for flow + * scheduling descriptors + * @desc: descriptor to populate + * @params: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size) +{ + desc->flow.qw1.cmd_dtype = (u16)params->dtype | td_cmd; + desc->flow.qw1.rxr_bufsize = cpu_to_le16((u16)size); + desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); +} + +/** + * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions + * @tx_q: the queue to be checked + * @size: number of descriptors we want to assure is available + * + * Returns 0 if stop is not needed + */ +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) +{ + struct netdev_queue *nq; + + if (likely(IDPF_DESC_UNUSED(tx_q) >= size)) + return 0; + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + + return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); +} + +/** + * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions + * @tx_q: the queue to be checked + * @descs_needed: number of descriptors required for this packet + * + * Returns 0 if stop is not needed + */ +static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, + unsigned int descs_needed) +{ + if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) + goto splitq_stop; + + /* If there are too many outstanding completions expected on the + * completion queue, stop the TX queue to give the device some time to + * catch up + */ + if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > + IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq))) + goto splitq_stop; + + /* Also check for available book keeping buffers; if we are low, stop + * the queue to wait for more completions + */ + if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q))) + goto splitq_stop; + + return 0; + +splitq_stop: + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); + + return -EBUSY; +} + +/** + * idpf_tx_buf_hw_update - Store the new tail value + * @tx_q: queue to bump + * @val: new tail index + * @xmit_more: more skb's pending + * + * The naming here is special in that 'hw' signals that this function is about + * to do a register write to update our queue status. We know this can only + * mean tail here as HW should be owning head for TX. + */ +void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, + bool xmit_more) +{ + struct netdev_queue *nq; + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + tx_q->next_to_use = val; + + idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + + /* notify HW of packet */ + if (netif_xmit_stopped(nq) || !xmit_more) + writel(val, tx_q->tail); +} + +/** + * idpf_tx_desc_count_required - calculate number of Tx descriptors needed + * @txq: queue to send buffer on + * @skb: send buffer + * + * Returns number of data descriptors needed for this skb. + */ +unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo; + unsigned int count = 0, i; + + count += !!skb_headlen(skb); + + if (!skb_is_nonlinear(skb)) + return count; + + shinfo = skb_shinfo(skb); + for (i = 0; i < shinfo->nr_frags; i++) { + unsigned int size; + + size = skb_frag_size(&shinfo->frags[i]); + + /* We only need to use the idpf_size_to_txd_count check if the + * fragment is going to span multiple descriptors, + * i.e. size >= 16K. + */ + if (size >= SZ_16K) + count += idpf_size_to_txd_count(size); + else + count++; + } + + if (idpf_chk_linearize(skb, txq->tx_max_bufs, count)) { + if (__skb_linearize(skb)) + return 0; + + count = idpf_size_to_txd_count(skb->len); + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.tx.linearize); + u64_stats_update_end(&txq->stats_sync); + } + + return count; +} + +/** + * idpf_tx_dma_map_error - handle TX DMA map errors + * @txq: queue to send buffer on + * @skb: send buffer + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) +{ + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.tx.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ + for (;;) { + struct idpf_tx_buf *tx_buf; + + tx_buf = &txq->tx_buf[idx]; + idpf_tx_buf_rel(txq, tx_buf); + if (tx_buf == first) + break; + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + if (skb_is_gso(skb)) { + union idpf_tx_flex_desc *tx_desc; + + /* If we failed a DMA mapping for a TSO packet, we will have + * used one additional descriptor for a context + * descriptor. Reset that here. + */ + tx_desc = IDPF_FLEX_TX_DESC(txq, idx); + memset(tx_desc, 0, sizeof(struct idpf_flex_tx_ctx_desc)); + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + /* Update tail in case netdev_xmit_more was previously true */ + idpf_tx_buf_hw_update(txq, idx, false); +} + +/** + * idpf_tx_splitq_bump_ntu - adjust NTU and generation + * @txq: the tx ring to wrap + * @ntu: ring index to bump + */ +static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu) +{ + ntu++; + + if (ntu == txq->desc_count) { + ntu = 0; + txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq); + } + + return ntu; +} + +/** + * idpf_tx_splitq_map - Build the Tx flex descriptor + * @tx_q: queue to send buffer on + * @params: pointer to splitq params struct + * @first: first buffer info buffer to use + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit flex descriptor. + */ +static void idpf_tx_splitq_map(struct idpf_queue *tx_q, + struct idpf_tx_splitq_params *params, + struct idpf_tx_buf *first) +{ + union idpf_tx_flex_desc *tx_desc; + unsigned int data_len, size; + struct idpf_tx_buf *tx_buf; + u16 i = tx_q->next_to_use; + struct netdev_queue *nq; + struct sk_buff *skb; + skb_frag_t *frag; + u16 td_cmd = 0; + dma_addr_t dma; + + skb = first->skb; + + td_cmd = params->offload.td_cmd; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = IDPF_FLEX_TX_DESC(tx_q, i); + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buf = first; + + params->compl_tag = + (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + + if (dma_mapping_error(tx_q->dev, dma)) + return idpf_tx_dma_map_error(tx_q, skb, first, i); + + tx_buf->compl_tag = params->compl_tag; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + /* buf_addr is in same location for both desc types */ + tx_desc->q.buf_addr = cpu_to_le64(dma); + + /* The stack can send us fragments that are too large for a + * single descriptor i.e. frag size > 16K-1. We will need to + * split the fragment across multiple descriptors in this case. + * To adhere to HW alignment restrictions, the fragment needs + * to be split such that the first chunk ends on a 4K boundary + * and all subsequent chunks start on a 4K boundary. We still + * want to send as much data as possible though, so our + * intermediate descriptor chunk size will be 12K. + * + * For example, consider a 32K fragment mapped to DMA addr 2600. + * ------------------------------------------------------------ + * | frag_size = 32K | + * ------------------------------------------------------------ + * |2600 |16384 |28672 + * + * 3 descriptors will be used for this fragment. The HW expects + * the descriptors to contain the following: + * ------------------------------------------------------------ + * | size = 13784 | size = 12K | size = 6696 | + * | dma = 2600 | dma = 16384 | dma = 28672 | + * ------------------------------------------------------------ + * + * We need to first adjust the max_data for the first chunk so + * that it ends on a 4K boundary. By negating the value of the + * DMA address and taking only the low order bits, we're + * effectively calculating + * 4K - (DMA addr lower order bits) = + * bytes to next boundary. + * + * Add that to our base aligned max_data (12K) and we have + * our first chunk size. In the example above, + * 13784 = 12K + (4096-2600) + * + * After guaranteeing the first chunk ends on a 4K boundary, we + * will give the intermediate descriptors 12K chunks and + * whatever is left to the final descriptor. This ensures that + * all descriptors used for the remaining chunks of the + * fragment start on a 4K boundary and we use as few + * descriptors as possible. + */ + max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); + while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { + idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, + max_data); + + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); + i = 0; + tx_q->compl_tag_cur_gen = + IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); + } + + /* Since this packet has a buffer that is going to span + * multiple descriptors, it's going to leave holes in + * to the TX buffer ring. To ensure these holes do not + * cause issues in the cleaning routines, we will clear + * them of any stale data and assign them the same + * completion tag as the current packet. Then when the + * packet is being cleaned, the cleaning routines will + * simply pass over these holes and finish cleaning the + * rest of the packet. + */ + memset(&tx_q->tx_buf[i], 0, sizeof(struct idpf_tx_buf)); + tx_q->tx_buf[i].compl_tag = params->compl_tag; + + /* Adjust the DMA offset and the remaining size of the + * fragment. On the first iteration of this loop, + * max_data will be >= 12K and <= 16K-1. On any + * subsequent iteration of this loop, max_data will + * always be 12K. + */ + dma += max_data; + size -= max_data; + + /* Reset max_data since remaining chunks will be 12K + * at most + */ + max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + + /* buf_addr is in same location for both desc types */ + tx_desc->q.buf_addr = cpu_to_le64(dma); + } + + if (!data_len) + break; + + idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); + i = 0; + tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_buf = &tx_q->tx_buf[i]; + } + + /* record SW timestamp if HW timestamp is not available */ + skb_tx_timestamp(skb); + + /* write last descriptor with RS and EOP bits */ + td_cmd |= params->eop_cmd; + idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); + i = idpf_tx_splitq_bump_ntu(tx_q, i); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + tx_q->txq_grp->num_completions_pending++; + + /* record bytecount for BQL */ + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +} + +/** + * idpf_tso - computes mss and TSO length to prepare for TSO + * @skb: pointer to skb + * @off: pointer to struct that holds offload parameters + * + * Returns error (negative) if TSO was requested but cannot be applied to the + * given skb, 0 if TSO does not apply to the given skb, or 1 otherwise. + */ +int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_start; + int err; + + if (!shinfo->gso_size) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else if (ip.v6->version == 6) { + ip.v6->payload_len = 0; + } + + l4_start = skb_transport_offset(skb); + + /* remove payload length from checksum */ + paylen = skb->len - l4_start; + + switch (shinfo->gso_type & ~SKB_GSO_DODGY) { + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + off->tso_hdr_len = __tcp_hdrlen(l4.tcp) + l4_start; + break; + case SKB_GSO_UDP_L4: + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + /* compute length of segmentation header */ + off->tso_hdr_len = sizeof(struct udphdr) + l4_start; + l4.udp->len = htons(shinfo->gso_size + sizeof(struct udphdr)); + break; + default: + return -EINVAL; + } + + off->tso_len = skb->len - off->tso_hdr_len; + off->mss = shinfo->gso_size; + off->tso_segs = shinfo->gso_segs; + + off->tx_flags |= IDPF_TX_FLAGS_TSO; + + return 1; +} + +/** + * __idpf_chk_linearize - Check skb is not using too many buffers + * @skb: send buffer + * @max_bufs: maximum number of buffers + * + * For TSO we need to count the TSO header and segment payload separately. As + * such we need to check cases where we have max_bufs-1 fragments or more as we + * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 + * for the segment payload in the first descriptor, and another max_buf-1 for + * the fragments. + */ +static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const skb_frag_t *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than max_bufs - 1 */ + nr_frags = shinfo->nr_frags; + if (nr_frags < (max_bufs - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of max_bufs-2 fragments totals at least gso_size. + */ + nr_frags -= max_bufs - 2; + frag = &shinfo->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We use + * this as the worst case scenario in which the frag ahead of us only + * provides one byte which is why we are limited to max_bufs-2 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - shinfo->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + for (stale = &shinfo->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + + sum += skb_frag_size(frag++); + + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > IDPF_TX_MAX_DESC_DATA) { + int align_pad = -(skb_frag_off(stale)) & + (IDPF_TX_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + } while (stale_size > IDPF_TX_MAX_DESC_DATA); + } + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= stale_size; + } + + return false; +} + +/** + * idpf_chk_linearize - Check if skb exceeds max descriptors per packet + * @skb: send buffer + * @max_bufs: maximum scatter gather buffers for single packet + * @count: number of buffers this packet needs + * + * Make sure we don't exceed maximum scatter gather buffers for a single + * packet. We have to do some special checking around the boundary (max_bufs-1) + * if TSO is on since we need count the TSO header and payload separately. + * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO + * header, 1 for segment payload, and then 7 for the fragments. + */ +bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count) +{ + if (likely(count < max_bufs)) + return false; + if (skb_is_gso(skb)) + return __idpf_chk_linearize(skb, max_bufs); + + return count > max_bufs; +} + +/** + * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring + * @txq: queue to put context descriptor on + * + * Since the TX buffer rings mimics the descriptor ring, update the tx buffer + * ring entry to reflect that this index is a context descriptor + */ +static struct idpf_flex_tx_ctx_desc * +idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) +{ + struct idpf_flex_tx_ctx_desc *desc; + int i = txq->next_to_use; + + memset(&txq->tx_buf[i], 0, sizeof(struct idpf_tx_buf)); + txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + /* grab the next descriptor */ + desc = IDPF_FLEX_TX_CTX_DESC(txq, i); + txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); + + return desc; +} + +/** + * idpf_tx_drop_skb - free the SKB and bump tail if necessary + * @tx_q: queue to send buffer on + * @skb: pointer to skb + */ +netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb) +{ + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.skb_drops); + u64_stats_update_end(&tx_q->stats_sync); + + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +/** + * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors + * @skb: send buffer + * @tx_q: queue to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, + struct idpf_queue *tx_q) +{ + struct idpf_tx_splitq_params tx_params = { }; + struct idpf_tx_buf *first; + unsigned int count; + int tso; + + count = idpf_tx_desc_count_required(tx_q, skb); + if (unlikely(!count)) + return idpf_tx_drop_skb(tx_q, skb); + + tso = idpf_tso(skb, &tx_params.offload); + if (unlikely(tso < 0)) + return idpf_tx_drop_skb(tx_q, skb); + + /* Check for splitq specific TX resources */ + count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso); + if (idpf_tx_maybe_stop_splitq(tx_q, count)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_BUSY; + } + + if (tso) { + /* If tso is needed, set up context desc */ + struct idpf_flex_tx_ctx_desc *ctx_desc = + idpf_tx_splitq_get_ctx_desc(tx_q); + + ctx_desc->tso.qw1.cmd_dtype = + cpu_to_le16(IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | + IDPF_TX_FLEX_CTX_DESC_CMD_TSO); + ctx_desc->tso.qw0.flex_tlen = + cpu_to_le32(tx_params.offload.tso_len & + IDPF_TXD_FLEX_CTX_TLEN_M); + ctx_desc->tso.qw0.mss_rt = + cpu_to_le16(tx_params.offload.mss & + IDPF_TXD_FLEX_CTX_MSS_RT_M); + ctx_desc->tso.qw0.hdr_len = tx_params.offload.tso_hdr_len; + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.lso_pkts); + u64_stats_update_end(&tx_q->stats_sync); + } + + /* record the location of the first descriptor for this packet */ + first = &tx_q->tx_buf[tx_q->next_to_use]; + first->skb = skb; + + if (tso) { + first->gso_segs = tx_params.offload.tso_segs; + first->bytecount = skb->len + + ((first->gso_segs - 1) * tx_params.offload.tso_hdr_len); + } else { + first->gso_segs = 1; + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + } + + if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_q->flags)) { + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; + tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; + /* Set the RE bit to catch any packets that may have not been + * stashed during RS completion cleaning. MIN_GAP is set to + * MIN_RING size to ensure it will be set at least once each + * time around the ring. + */ + if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) { + tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; + tx_q->txq_grp->num_completions_pending++; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) + tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; + + } else { + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; + tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; + } + + idpf_tx_splitq_map(tx_q, &tx_params, first); + + return NETDEV_TX_OK; +} + +/** + * idpf_tx_splitq_start - Selects the right Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev) +{ + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_queue *tx_q; + + if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) { + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; + } + + tx_q = vport->txqs[skb_get_queue_mapping(skb)]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (skb_put_padto(skb, tx_q->tx_min_pkt_len)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_OK; + } + + return idpf_tx_splitq_frame(skb, tx_q); +} + +/** + * idpf_ptype_to_htype - get a hash type + * @decoded: Decoded Rx packet type related fields + * + * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by + * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of + * Rx desc. + */ +enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded) +{ + if (!decoded->known) + return PKT_HASH_TYPE_NONE; + if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 && + decoded->inner_prot) + return PKT_HASH_TYPE_L4; + if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 && + decoded->outer_ip) + return PKT_HASH_TYPE_L3; + if (decoded->outer_ip == IDPF_RX_PTYPE_OUTER_L2) + return PKT_HASH_TYPE_L2; + + return PKT_HASH_TYPE_NONE; +} + +/** + * idpf_rx_hash - set the hash value in the skb + * @rxq: Rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being populated + * @rx_desc: Receive descriptor + * @decoded: Decoded Rx packet type related fields + */ +static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb, + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + u32 hash; + + if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXHASH))) + return; + + hash = le16_to_cpu(rx_desc->hash1) | + (rx_desc->ff2_mirrid_hash2.hash2 << 16) | + (rx_desc->hash3 << 24); + + skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded)); +} + +/** + * idpf_rx_csum - Indicate in skb if checksum is good + * @rxq: Rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being populated + * @csum_bits: checksum fields extracted from the descriptor + * @decoded: Decoded Rx packet type related fields + * + * skb->protocol must be set before this function is called + */ +static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, + struct idpf_rx_csum_decoded *csum_bits, + struct idpf_rx_ptype_decoded *decoded) +{ + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ + if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXCSUM))) + return; + + /* check if HW has decoded the packet and checksum */ + if (!(csum_bits->l3l4p)) + return; + + ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4); + ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6); + + if (ipv4 && (csum_bits->ipe || csum_bits->eipe)) + goto checksum_fail; + + if (ipv6 && csum_bits->ipv6exadd) + return; + + /* check for L4 errors and handle packets that were not able to be + * checksummed + */ + if (csum_bits->l4e) + goto checksum_fail; + + /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */ + switch (decoded->inner_prot) { + case IDPF_RX_PTYPE_INNER_PROT_ICMP: + case IDPF_RX_PTYPE_INNER_PROT_TCP: + case IDPF_RX_PTYPE_INNER_PROT_UDP: + if (!csum_bits->raw_csum_inv) { + u16 csum = csum_bits->raw_csum; + + skb->csum = csum_unfold((__force __sum16)~swab16(csum)); + skb->ip_summed = CHECKSUM_COMPLETE; + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + break; + case IDPF_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + default: + break; + } + + return; + +checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); +} + +/** + * idpf_rx_splitq_extract_csum_bits - Extract checksum bits from descriptor + * @rx_desc: receive descriptor + * @csum: structure to extract checksum fields + * + **/ +static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_csum_decoded *csum) +{ + u8 qword0, qword1; + + qword0 = rx_desc->status_err0_qw0; + qword1 = rx_desc->status_err0_qw1; + + csum->ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M, + qword1); + csum->eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M, + qword1); + csum->l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M, + qword1); + csum->l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M, + qword1); + csum->ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M, + qword0); + csum->raw_csum_inv = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M, + le16_to_cpu(rx_desc->ptype_err_fflags0)); + csum->raw_csum = le16_to_cpu(rx_desc->misc.raw_cs); +} + +/** + * idpf_rx_rsc - Set the RSC fields in the skb + * @rxq : Rx descriptor ring packet is being transacted on + * @skb : pointer to current skb being populated + * @rx_desc: Receive descriptor + * @decoded: Decoded Rx packet type related fields + * + * Return 0 on success and error code on failure + * + * Populate the skb fields with the total number of RSC segments, RSC payload + * length and packet type. + */ +static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + u16 rsc_segments, rsc_seg_len; + bool ipv4, ipv6; + int len; + + if (unlikely(!decoded->outer_ip)) + return -EINVAL; + + rsc_seg_len = le16_to_cpu(rx_desc->misc.rscseglen); + if (unlikely(!rsc_seg_len)) + return -EINVAL; + + ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4); + ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6); + + if (unlikely(!(ipv4 ^ ipv6))) + return -EINVAL; + + rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len); + if (unlikely(rsc_segments == 1)) + return 0; + + NAPI_GRO_CB(skb)->count = rsc_segments; + skb_shinfo(skb)->gso_size = rsc_seg_len; + + skb_reset_network_header(skb); + len = skb->len - skb_transport_offset(skb); + + if (ipv4) { + struct iphdr *ipv4h = ip_hdr(skb); + + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + /* Reset and set transport header offset in skb */ + skb_set_transport_header(skb, sizeof(struct iphdr)); + + /* Compute the TCP pseudo header checksum*/ + tcp_hdr(skb)->check = + ~tcp_v4_check(len, ipv4h->saddr, ipv4h->daddr, 0); + } else { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + tcp_hdr(skb)->check = + ~tcp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0); + } + + tcp_gro_complete(skb); + + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.rsc_pkts); + u64_stats_update_end(&rxq->stats_sync); + + return 0; +} + +/** + * idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor + * @rxq: Rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being populated + * @rx_desc: Receive descriptor + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, protocol, and + * other fields within the skb. + */ +static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, + struct sk_buff *skb, + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +{ + struct idpf_rx_csum_decoded csum_bits = { }; + struct idpf_rx_ptype_decoded decoded; + u16 rx_ptype; + + rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M, + le16_to_cpu(rx_desc->ptype_err_fflags0)); + + decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; + /* If we don't know the ptype we can't do anything else with it. Just + * pass it up the stack as-is. + */ + if (!decoded.known) + return 0; + + /* process RSS/hash */ + idpf_rx_hash(rxq, skb, rx_desc, &decoded); + + skb->protocol = eth_type_trans(skb, rxq->vport->netdev); + + if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M, + le16_to_cpu(rx_desc->hdrlen_flags))) + return idpf_rx_rsc(rxq, skb, rx_desc, &decoded); + + idpf_rx_splitq_extract_csum_bits(rx_desc, &csum_bits); + idpf_rx_csum(rxq, skb, &csum_bits, &decoded); + + return 0; +} + +/** + * idpf_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag + * @rx_buf: buffer containing page to add + * @skb: sk_buff to place the data into + * @size: packet length from rx_desc + * + * This function will add the data contained in rx_buf->page to the skb. + * It will just attach the page as a frag to the skb. + * The function will then update the page offset. + */ +void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size) +{ + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, + rx_buf->page_offset, size, rx_buf->truesize); + + rx_buf->page = NULL; +} + +/** + * idpf_rx_construct_skb - Allocate skb and populate it + * @rxq: Rx descriptor queue + * @rx_buf: Rx buffer to pull data from + * @size: the length of the packet + * + * This function allocates an skb. It then populates it with the page + * data from the current receive descriptor, taking care to set up the + * skb correctly. + */ +struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + struct idpf_rx_buf *rx_buf, + unsigned int size) +{ + unsigned int headlen; + struct sk_buff *skb; + void *va; + + va = page_address(rx_buf->page) + rx_buf->page_offset; + + /* prefetch first cache line of first page */ + net_prefetch(va); + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE, + GFP_ATOMIC); + if (unlikely(!skb)) { + idpf_rx_put_page(rx_buf); + + return NULL; + } + + skb_record_rx_queue(skb, rxq->idx); + skb_mark_for_recycle(skb); + + /* Determine available headroom for copy */ + headlen = size; + if (headlen > IDPF_RX_HDR_SIZE) + headlen = eth_get_headlen(skb->dev, va, IDPF_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* if we exhaust the linear part then add what is left as a frag */ + size -= headlen; + if (!size) { + idpf_rx_put_page(rx_buf); + + return skb; + } + + skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen, + size, rx_buf->truesize); + + /* Since we're giving the page to the stack, clear our reference to it. + * We'll get a new one during buffer posting. + */ + rx_buf->page = NULL; + + return skb; +} + +/** + * idpf_rx_hdr_construct_skb - Allocate skb and populate it from header buffer + * @rxq: Rx descriptor queue + * @va: Rx buffer to pull data from + * @size: the length of the packet + * + * This function allocates an skb. It then populates it with the page data from + * the current receive descriptor, taking care to set up the skb correctly. + * This specifically uses a header buffer to start building the skb. + */ +static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq, + const void *va, + unsigned int size) +{ + struct sk_buff *skb; + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + skb_record_rx_queue(skb, rxq->idx); + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* More than likely, a payload fragment, which will use a page from + * page_pool will be added to the SKB so mark it for recycle + * preemptively. And if not, it's inconsequential. + */ + skb_mark_for_recycle(skb); + + return skb; +} + +/** + * idpf_rx_splitq_test_staterr - tests bits in Rx descriptor + * status and error fields + * @stat_err_field: field from descriptor to test bits in + * @stat_err_bits: value to mask + * + */ +static bool idpf_rx_splitq_test_staterr(const u8 stat_err_field, + const u8 stat_err_bits) +{ + return !!(stat_err_field & stat_err_bits); +} + +/** + * idpf_rx_splitq_is_eop - process handling of EOP buffers + * @rx_desc: Rx descriptor for current buffer + * + * If the buffer is an EOP buffer, this function exits returning true, + * otherwise return false indicating that this is in fact a non-EOP buffer. + */ +static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +{ + /* if we are the last buffer then there is nothing else to do */ + return likely(idpf_rx_splitq_test_staterr(rx_desc->status_err0_qw1, + IDPF_RXD_EOF_SPLITQ)); +} + +/** + * idpf_rx_splitq_clean - Clean completed descriptors from Rx queue + * @rxq: Rx descriptor queue to retrieve receive buffer queue + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the system. + * + * Returns amount of work completed + */ +static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) +{ + int total_rx_bytes = 0, total_rx_pkts = 0; + struct idpf_queue *rx_bufq = NULL; + struct sk_buff *skb = rxq->skb; + u16 ntc = rxq->next_to_clean; + + /* Process Rx packets bounded by budget */ + while (likely(total_rx_pkts < budget)) { + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; + struct idpf_sw_queue *refillq = NULL; + struct idpf_rxq_set *rxq_set = NULL; + struct idpf_rx_buf *rx_buf = NULL; + union virtchnl2_rx_desc *desc; + unsigned int pkt_len = 0; + unsigned int hdr_len = 0; + u16 gen_id, buf_id = 0; + /* Header buffer overflow only valid for header split */ + bool hbo = false; + int bufq_id; + u8 rxdid; + + /* get the Rx desc from Rx queue based on 'next_to_clean' */ + desc = IDPF_RX_DESC(rxq, ntc); + rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)desc; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc + */ + dma_rmb(); + + /* if the descriptor isn't done, no work yet to do */ + gen_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id); + gen_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M, gen_id); + + if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id) + break; + + rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M, + rx_desc->rxdid_ucast); + if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) { + IDPF_RX_BUMP_NTC(rxq, ntc); + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.bad_descs); + u64_stats_update_end(&rxq->stats_sync); + continue; + } + + pkt_len = le16_to_cpu(rx_desc->pktlen_gen_bufq_id); + pkt_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M, + pkt_len); + + hbo = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M, + rx_desc->status_err0_qw1); + + if (unlikely(hbo)) { + /* If a header buffer overflow, occurs, i.e. header is + * too large to fit in the header split buffer, HW will + * put the entire packet, including headers, in the + * data/payload buffer. + */ + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hsplit_buf_ovf); + u64_stats_update_end(&rxq->stats_sync); + goto bypass_hsplit; + } + + hdr_len = le16_to_cpu(rx_desc->hdrlen_flags); + hdr_len = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M, + hdr_len); + +bypass_hsplit: + bufq_id = le16_to_cpu(rx_desc->pktlen_gen_bufq_id); + bufq_id = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M, + bufq_id); + + rxq_set = container_of(rxq, struct idpf_rxq_set, rxq); + if (!bufq_id) + refillq = rxq_set->refillq0; + else + refillq = rxq_set->refillq1; + + /* retrieve buffer from the rxq */ + rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq; + + buf_id = le16_to_cpu(rx_desc->buf_id); + + rx_buf = &rx_bufq->rx_buf.buf[buf_id]; + + if (hdr_len) { + const void *va = (u8 *)rx_bufq->rx_buf.hdr_buf_va + + (u32)buf_id * IDPF_HDR_BUF_SIZE; + + skb = idpf_rx_hdr_construct_skb(rxq, va, hdr_len); + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hsplit_pkts); + u64_stats_update_end(&rxq->stats_sync); + } + + if (pkt_len) { + idpf_rx_sync_for_cpu(rx_buf, pkt_len); + if (skb) + idpf_rx_add_frag(rx_buf, skb, pkt_len); + else + skb = idpf_rx_construct_skb(rxq, rx_buf, + pkt_len); + } else { + idpf_rx_put_page(rx_buf); + } + + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; + + idpf_rx_post_buf_refill(refillq, buf_id); + + IDPF_RX_BUMP_NTC(rxq, ntc); + /* skip if it is non EOP desc */ + if (!idpf_rx_splitq_is_eop(rx_desc)) + continue; + + /* pad skb if needed (to make valid ethernet frame) */ + if (eth_skb_pad(skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + /* protocol */ + if (unlikely(idpf_rx_process_skb_fields(rxq, skb, rx_desc))) { + dev_kfree_skb_any(skb); + skb = NULL; + continue; + } + + /* send completed skb up the stack */ + napi_gro_receive(&rxq->q_vector->napi, skb); + skb = NULL; + + /* update budget accounting */ + total_rx_pkts++; + } + + rxq->next_to_clean = ntc; + + rxq->skb = skb; + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_add(&rxq->q_stats.rx.packets, total_rx_pkts); + u64_stats_add(&rxq->q_stats.rx.bytes, total_rx_bytes); + u64_stats_update_end(&rxq->stats_sync); + + /* guarantee a trip back through this routine if there was a failure */ + return total_rx_pkts; +} + +/** + * idpf_rx_update_bufq_desc - Update buffer queue descriptor + * @bufq: Pointer to the buffer queue + * @refill_desc: SW Refill queue descriptor containing buffer ID + * @buf_desc: Buffer queue descriptor + * + * Return 0 on success and negative on failure. + */ +static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, + struct virtchnl2_splitq_rx_buf_desc *buf_desc) +{ + struct idpf_rx_buf *buf; + dma_addr_t addr; + u16 buf_id; + + buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + + buf = &bufq->rx_buf.buf[buf_id]; + + addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size); + if (unlikely(addr == DMA_MAPPING_ERROR)) + return -ENOMEM; + + buf_desc->pkt_addr = cpu_to_le64(addr); + buf_desc->qword0.buf_id = cpu_to_le16(buf_id); + + if (!bufq->rx_hsplit_en) + return 0; + + buf_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa + + (u32)buf_id * IDPF_HDR_BUF_SIZE); + + return 0; +} + +/** + * idpf_rx_clean_refillq - Clean refill queue buffers + * @bufq: buffer queue to post buffers back to + * @refillq: refill queue to clean + * + * This function takes care of the buffer refill management + */ +static void idpf_rx_clean_refillq(struct idpf_queue *bufq, + struct idpf_sw_queue *refillq) +{ + struct virtchnl2_splitq_rx_buf_desc *buf_desc; + u16 bufq_nta = bufq->next_to_alloc; + u16 ntc = refillq->next_to_clean; + int cleaned = 0; + u16 gen; + + buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, bufq_nta); + + /* make sure we stop at ring wrap in the unlikely case ring is full */ + while (likely(cleaned < refillq->desc_count)) { + u16 refill_desc = IDPF_SPLITQ_RX_BI_DESC(refillq, ntc); + bool failure; + + gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc); + if (test_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags) != gen) + break; + + failure = idpf_rx_update_bufq_desc(bufq, refill_desc, + buf_desc); + if (failure) + break; + + if (unlikely(++ntc == refillq->desc_count)) { + change_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); + ntc = 0; + } + + if (unlikely(++bufq_nta == bufq->desc_count)) { + buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, 0); + bufq_nta = 0; + } else { + buf_desc++; + } + + cleaned++; + } + + if (!cleaned) + return; + + /* We want to limit how many transactions on the bus we trigger with + * tail writes so we only do it in strides. It's also important we + * align the write to a multiple of 8 as required by HW. + */ + if (((bufq->next_to_use <= bufq_nta ? 0 : bufq->desc_count) + + bufq_nta - bufq->next_to_use) >= IDPF_RX_BUF_POST_STRIDE) + idpf_rx_buf_hw_update(bufq, ALIGN_DOWN(bufq_nta, + IDPF_RX_BUF_POST_STRIDE)); + + /* update next to alloc since we have filled the ring */ + refillq->next_to_clean = ntc; + bufq->next_to_alloc = bufq_nta; +} + +/** + * idpf_rx_clean_refillq_all - Clean all refill queues + * @bufq: buffer queue with refill queues + * + * Iterates through all refill queues assigned to the buffer queue assigned to + * this vector. Returns true if clean is complete within budget, false + * otherwise. + */ +static void idpf_rx_clean_refillq_all(struct idpf_queue *bufq) +{ + struct idpf_bufq_set *bufq_set; + int i; + + bufq_set = container_of(bufq, struct idpf_bufq_set, bufq); + for (i = 0; i < bufq_set->num_refillqs; i++) + idpf_rx_clean_refillq(bufq, &bufq_set->refillqs[i]); +} + +/** + * idpf_vport_intr_clean_queues - MSIX mode Interrupt Handler + * @irq: interrupt number + * @data: pointer to a q_vector + * + */ +static irqreturn_t idpf_vport_intr_clean_queues(int __always_unused irq, + void *data) +{ + struct idpf_q_vector *q_vector = (struct idpf_q_vector *)data; + + q_vector->total_events++; + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * idpf_vport_intr_napi_del_all - Unregister napi for all q_vectors in vport + * @vport: virtual port structure + * + */ +static void idpf_vport_intr_napi_del_all(struct idpf_vport *vport) +{ + u16 v_idx; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) + netif_napi_del(&vport->q_vectors[v_idx].napi); +} + +/** + * idpf_vport_intr_napi_dis_all - Disable NAPI for all q_vectors in the vport + * @vport: main vport structure + */ +static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) +{ + int v_idx; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) + napi_disable(&vport->q_vectors[v_idx].napi); +} + +/** + * idpf_vport_intr_rel - Free memory allocated for interrupt vectors + * @vport: virtual port + * + * Free the memory allocated for interrupt vectors associated to a vport + */ +void idpf_vport_intr_rel(struct idpf_vport *vport) +{ + int i, j, v_idx; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + + kfree(q_vector->bufq); + q_vector->bufq = NULL; + kfree(q_vector->tx); + q_vector->tx = NULL; + kfree(q_vector->rx); + q_vector->rx = NULL; + } + + /* Clean up the mapping of queues to vectors */ + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + if (idpf_is_queue_model_split(vport->rxq_model)) + for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) + rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; + else + for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) + rx_qgrp->singleq.rxqs[j]->q_vector = NULL; + } + + if (idpf_is_queue_model_split(vport->txq_model)) + for (i = 0; i < vport->num_txq_grp; i++) + vport->txq_grps[i].complq->q_vector = NULL; + else + for (i = 0; i < vport->num_txq_grp; i++) + for (j = 0; j < vport->txq_grps[i].num_txq; j++) + vport->txq_grps[i].txqs[j]->q_vector = NULL; + + kfree(vport->q_vectors); + vport->q_vectors = NULL; +} + +/** + * idpf_vport_intr_rel_irq - Free the IRQ association with the OS + * @vport: main vport structure + */ +static void idpf_vport_intr_rel_irq(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + int vector; + + for (vector = 0; vector < vport->num_q_vectors; vector++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; + int irq_num, vidx; + + /* free only the irqs that were actually requested */ + if (!q_vector) + continue; + + vidx = vport->q_vector_idxs[vector]; + irq_num = adapter->msix_entries[vidx].vector; + + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, q_vector); + } +} + +/** + * idpf_vport_intr_dis_irq_all - Disable all interrupt + * @vport: main vport structure + */ +static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport) +{ + struct idpf_q_vector *q_vector = vport->q_vectors; + int q_idx; + + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) + writel(0, q_vector[q_idx].intr_reg.dyn_ctl); +} + +/** + * idpf_vport_intr_buildreg_itr - Enable default interrupt generation settings + * @q_vector: pointer to q_vector + * @type: itr index + * @itr: itr value + */ +static u32 idpf_vport_intr_buildreg_itr(struct idpf_q_vector *q_vector, + const int type, u16 itr) +{ + u32 itr_val; + + itr &= IDPF_ITR_MASK; + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ + itr_val = q_vector->intr_reg.dyn_ctl_intena_m | + (type << q_vector->intr_reg.dyn_ctl_itridx_s) | + (itr << (q_vector->intr_reg.dyn_ctl_intrvl_s - 1)); + + return itr_val; +} + +/** + * idpf_update_dim_sample - Update dim sample with packets and bytes + * @q_vector: the vector associated with the interrupt + * @dim_sample: dim sample to update + * @dim: dim instance structure + * @packets: total packets + * @bytes: total bytes + * + * Update the dim sample with the packets and bytes which are passed to this + * function. Set the dim state appropriately if the dim settings gets stale. + */ +static void idpf_update_dim_sample(struct idpf_q_vector *q_vector, + struct dim_sample *dim_sample, + struct dim *dim, u64 packets, u64 bytes) +{ + dim_update_sample(q_vector->total_events, packets, bytes, dim_sample); + dim_sample->comp_ctr = 0; + + /* if dim settings get stale, like when not updated for 1 second or + * longer, force it to start again. This addresses the frequent case + * of an idle queue being switched to by the scheduler. + */ + if (ktime_ms_delta(dim_sample->time, dim->start_sample.time) >= HZ) + dim->state = DIM_START_MEASURE; +} + +/** + * idpf_net_dim - Update net DIM algorithm + * @q_vector: the vector associated with the interrupt + * + * Create a DIM sample and notify net_dim() so that it can possibly decide + * a new ITR value based on incoming packets, bytes, and interrupts. + * + * This function is a no-op if the queue is not configured to dynamic ITR. + */ +static void idpf_net_dim(struct idpf_q_vector *q_vector) +{ + struct dim_sample dim_sample = { }; + u64 packets, bytes; + u32 i; + + if (!IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode)) + goto check_rx_itr; + + for (i = 0, packets = 0, bytes = 0; i < q_vector->num_txq; i++) { + struct idpf_queue *txq = q_vector->tx[i]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&txq->stats_sync); + packets += u64_stats_read(&txq->q_stats.tx.packets); + bytes += u64_stats_read(&txq->q_stats.tx.bytes); + } while (u64_stats_fetch_retry(&txq->stats_sync, start)); + } + + idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim, + packets, bytes); + net_dim(&q_vector->tx_dim, dim_sample); + +check_rx_itr: + if (!IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode)) + return; + + for (i = 0, packets = 0, bytes = 0; i < q_vector->num_rxq; i++) { + struct idpf_queue *rxq = q_vector->rx[i]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rxq->stats_sync); + packets += u64_stats_read(&rxq->q_stats.rx.packets); + bytes += u64_stats_read(&rxq->q_stats.rx.bytes); + } while (u64_stats_fetch_retry(&rxq->stats_sync, start)); + } + + idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim, + packets, bytes); + net_dim(&q_vector->rx_dim, dim_sample); +} + +/** + * idpf_vport_intr_update_itr_ena_irq - Update itr and re-enable MSIX interrupt + * @q_vector: q_vector for which itr is being updated and interrupt enabled + * + * Update the net_dim() algorithm and re-enable the interrupt associated with + * this vector. + */ +void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) +{ + u32 intval; + + /* net_dim() updates ITR out-of-band using a work item */ + idpf_net_dim(q_vector); + + intval = idpf_vport_intr_buildreg_itr(q_vector, + IDPF_NO_ITR_UPDATE_IDX, 0); + + writel(intval, q_vector->intr_reg.dyn_ctl); +} + +/** + * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport + * @vport: main vport structure + * @basename: name for the vector + */ +static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +{ + struct idpf_adapter *adapter = vport->adapter; + int vector, err, irq_num, vidx; + const char *vec_name; + + for (vector = 0; vector < vport->num_q_vectors; vector++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; + + vidx = vport->q_vector_idxs[vector]; + irq_num = adapter->msix_entries[vidx].vector; + + if (q_vector->num_rxq && q_vector->num_txq) + vec_name = "TxRx"; + else if (q_vector->num_rxq) + vec_name = "Rx"; + else if (q_vector->num_txq) + vec_name = "Tx"; + else + continue; + + q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", + basename, vec_name, vidx); + + err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, + q_vector->name, q_vector); + if (err) { + netdev_err(vport->netdev, + "Request_irq failed, error: %d\n", err); + goto free_q_irqs; + } + /* assign the mask for this irq */ + irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); + } + + return 0; + +free_q_irqs: + while (--vector >= 0) { + vidx = vport->q_vector_idxs[vector]; + irq_num = adapter->msix_entries[vidx].vector; + free_irq(irq_num, &vport->q_vectors[vector]); + } + + return err; +} + +/** + * idpf_vport_intr_write_itr - Write ITR value to the ITR register + * @q_vector: q_vector structure + * @itr: Interrupt throttling rate + * @tx: Tx or Rx ITR + */ +void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector, u16 itr, bool tx) +{ + struct idpf_intr_reg *intr_reg; + + if (tx && !q_vector->tx) + return; + else if (!tx && !q_vector->rx) + return; + + intr_reg = &q_vector->intr_reg; + writel(ITR_REG_ALIGN(itr) >> IDPF_ITR_GRAN_S, + tx ? intr_reg->tx_itr : intr_reg->rx_itr); +} + +/** + * idpf_vport_intr_ena_irq_all - Enable IRQ for the given vport + * @vport: main vport structure + */ +static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport) +{ + bool dynamic; + int q_idx; + u16 itr; + + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) { + struct idpf_q_vector *qv = &vport->q_vectors[q_idx]; + + /* Set the initial ITR values */ + if (qv->num_txq) { + dynamic = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode); + itr = vport->tx_itr_profile[qv->tx_dim.profile_ix]; + idpf_vport_intr_write_itr(qv, dynamic ? + itr : qv->tx_itr_value, + true); + } + + if (qv->num_rxq) { + dynamic = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode); + itr = vport->rx_itr_profile[qv->rx_dim.profile_ix]; + idpf_vport_intr_write_itr(qv, dynamic ? + itr : qv->rx_itr_value, + false); + } + + if (qv->num_txq || qv->num_rxq) + idpf_vport_intr_update_itr_ena_irq(qv); + } +} + +/** + * idpf_vport_intr_deinit - Release all vector associations for the vport + * @vport: main vport structure + */ +void idpf_vport_intr_deinit(struct idpf_vport *vport) +{ + idpf_vport_intr_napi_dis_all(vport); + idpf_vport_intr_napi_del_all(vport); + idpf_vport_intr_dis_irq_all(vport); + idpf_vport_intr_rel_irq(vport); +} + +/** + * idpf_tx_dim_work - Call back from the stack + * @work: work queue structure + */ +static void idpf_tx_dim_work(struct work_struct *work) +{ + struct idpf_q_vector *q_vector; + struct idpf_vport *vport; + struct dim *dim; + u16 itr; + + dim = container_of(work, struct dim, work); + q_vector = container_of(dim, struct idpf_q_vector, tx_dim); + vport = q_vector->vport; + + if (dim->profile_ix >= ARRAY_SIZE(vport->tx_itr_profile)) + dim->profile_ix = ARRAY_SIZE(vport->tx_itr_profile) - 1; + + /* look up the values in our local table */ + itr = vport->tx_itr_profile[dim->profile_ix]; + + idpf_vport_intr_write_itr(q_vector, itr, true); + + dim->state = DIM_START_MEASURE; +} + +/** + * idpf_rx_dim_work - Call back from the stack + * @work: work queue structure + */ +static void idpf_rx_dim_work(struct work_struct *work) +{ + struct idpf_q_vector *q_vector; + struct idpf_vport *vport; + struct dim *dim; + u16 itr; + + dim = container_of(work, struct dim, work); + q_vector = container_of(dim, struct idpf_q_vector, rx_dim); + vport = q_vector->vport; + + if (dim->profile_ix >= ARRAY_SIZE(vport->rx_itr_profile)) + dim->profile_ix = ARRAY_SIZE(vport->rx_itr_profile) - 1; + + /* look up the values in our local table */ + itr = vport->rx_itr_profile[dim->profile_ix]; + + idpf_vport_intr_write_itr(q_vector, itr, false); + + dim->state = DIM_START_MEASURE; +} + +/** + * idpf_init_dim - Set up dynamic interrupt moderation + * @qv: q_vector structure + */ +static void idpf_init_dim(struct idpf_q_vector *qv) +{ + INIT_WORK(&qv->tx_dim.work, idpf_tx_dim_work); + qv->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + qv->tx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX; + + INIT_WORK(&qv->rx_dim.work, idpf_rx_dim_work); + qv->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + qv->rx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX; +} + +/** + * idpf_vport_intr_napi_ena_all - Enable NAPI for all q_vectors in the vport + * @vport: main vport structure + */ +static void idpf_vport_intr_napi_ena_all(struct idpf_vport *vport) +{ + int q_idx; + + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[q_idx]; + + idpf_init_dim(q_vector); + napi_enable(&q_vector->napi); + } +} + +/** + * idpf_tx_splitq_clean_all- Clean completion queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec, + int budget, int *cleaned) +{ + u16 num_txq = q_vec->num_txq; + bool clean_complete = true; + int i, budget_per_q; + + if (unlikely(!num_txq)) + return true; + + budget_per_q = DIV_ROUND_UP(budget, num_txq); + for (i = 0; i < num_txq; i++) + clean_complete &= idpf_tx_clean_complq(q_vec->tx[i], + budget_per_q, cleaned); + + return clean_complete; +} + +/** + * idpf_rx_splitq_clean_all- Clean completion queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_rxq = q_vec->num_rxq; + bool clean_complete = true; + int pkts_cleaned = 0; + int i, budget_per_q; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { + struct idpf_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q); + /* if we clean as many as budgeted, we must not be done */ + if (pkts_cleaned_per_q >= budget_per_q) + clean_complete = false; + pkts_cleaned += pkts_cleaned_per_q; + } + *cleaned = pkts_cleaned; + + for (i = 0; i < q_vec->num_bufq; i++) + idpf_rx_clean_refillq_all(q_vec->bufq[i]); + + return clean_complete; +} + +/** + * idpf_vport_splitq_napi_poll - NAPI handler + * @napi: struct from which you get q_vector + * @budget: budget provided by stack + */ +static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) +{ + struct idpf_q_vector *q_vector = + container_of(napi, struct idpf_q_vector, napi); + bool clean_complete; + int work_done = 0; + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (unlikely(!budget)) { + idpf_tx_splitq_clean_all(q_vector, budget, &work_done); + + return 0; + } + + clean_complete = idpf_rx_splitq_clean_all(q_vector, budget, &work_done); + clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done); + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) + return budget; + + work_done = min_t(int, work_done, budget - 1); + + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + idpf_vport_intr_update_itr_ena_irq(q_vector); + + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that + */ + if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE, + q_vector->tx[0]->flags))) + return budget; + else + return work_done; +} + +/** + * idpf_vport_intr_map_vector_to_qs - Map vectors to queues + * @vport: virtual port + * + * Mapping for vectors to queues + */ +static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) +{ + u16 num_txq_grp = vport->num_txq_grp; + int i, j, qv_idx, bufq_vidx = 0; + struct idpf_rxq_group *rx_qgrp; + struct idpf_txq_group *tx_qgrp; + struct idpf_queue *q, *bufq; + u16 q_index; + + for (i = 0, qv_idx = 0; i < vport->num_rxq_grp; i++) { + u16 num_rxq; + + rx_qgrp = &vport->rxq_grps[i]; + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; + + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->q_vector = &vport->q_vectors[qv_idx]; + q_index = q->q_vector->num_rxq; + q->q_vector->rx[q_index] = q; + q->q_vector->num_rxq++; + qv_idx++; + } + + if (idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; + bufq->q_vector = &vport->q_vectors[bufq_vidx]; + q_index = bufq->q_vector->num_bufq; + bufq->q_vector->bufq[q_index] = bufq; + bufq->q_vector->num_bufq++; + } + if (++bufq_vidx >= vport->num_q_vectors) + bufq_vidx = 0; + } + } + + for (i = 0, qv_idx = 0; i < num_txq_grp; i++) { + u16 num_txq; + + tx_qgrp = &vport->txq_grps[i]; + num_txq = tx_qgrp->num_txq; + + if (idpf_is_queue_model_split(vport->txq_model)) { + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; + + q = tx_qgrp->complq; + q->q_vector = &vport->q_vectors[qv_idx]; + q_index = q->q_vector->num_txq; + q->q_vector->tx[q_index] = q; + q->q_vector->num_txq++; + qv_idx++; + } else { + for (j = 0; j < num_txq; j++) { + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; + + q = tx_qgrp->txqs[j]; + q->q_vector = &vport->q_vectors[qv_idx]; + q_index = q->q_vector->num_txq; + q->q_vector->tx[q_index] = q; + q->q_vector->num_txq++; + + qv_idx++; + } + } + } +} + +/** + * idpf_vport_intr_init_vec_idx - Initialize the vector indexes + * @vport: virtual port + * + * Initialize vector indexes with values returened over mailbox + */ +static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_alloc_vectors *ac; + u16 *vecids, total_vecs; + int i; + + ac = adapter->req_vec_chunks; + if (!ac) { + for (i = 0; i < vport->num_q_vectors; i++) + vport->q_vectors[i].v_idx = vport->q_vector_idxs[i]; + + return 0; + } + + total_vecs = idpf_get_reserved_vecs(adapter); + vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL); + if (!vecids) + return -ENOMEM; + + idpf_get_vec_ids(adapter, vecids, total_vecs, &ac->vchunks); + + for (i = 0; i < vport->num_q_vectors; i++) + vport->q_vectors[i].v_idx = vecids[vport->q_vector_idxs[i]]; + + kfree(vecids); + + return 0; +} + +/** + * idpf_vport_intr_napi_add_all- Register napi handler for all qvectors + * @vport: virtual port structure + */ +static void idpf_vport_intr_napi_add_all(struct idpf_vport *vport) +{ + int (*napi_poll)(struct napi_struct *napi, int budget); + u16 v_idx; + + if (idpf_is_queue_model_split(vport->txq_model)) + napi_poll = idpf_vport_splitq_napi_poll; + else + napi_poll = idpf_vport_singleq_napi_poll; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + + netif_napi_add(vport->netdev, &q_vector->napi, napi_poll); + + /* only set affinity_mask if the CPU is online */ + if (cpu_online(v_idx)) + cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + } +} + +/** + * idpf_vport_intr_alloc - Allocate memory for interrupt vectors + * @vport: virtual port + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +int idpf_vport_intr_alloc(struct idpf_vport *vport) +{ + u16 txqs_per_vector, rxqs_per_vector, bufqs_per_vector; + struct idpf_q_vector *q_vector; + int v_idx, err; + + vport->q_vectors = kcalloc(vport->num_q_vectors, + sizeof(struct idpf_q_vector), GFP_KERNEL); + if (!vport->q_vectors) + return -ENOMEM; + + txqs_per_vector = DIV_ROUND_UP(vport->num_txq, vport->num_q_vectors); + rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq, vport->num_q_vectors); + bufqs_per_vector = vport->num_bufqs_per_qgrp * + DIV_ROUND_UP(vport->num_rxq_grp, + vport->num_q_vectors); + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + q_vector = &vport->q_vectors[v_idx]; + q_vector->vport = vport; + + q_vector->tx_itr_value = IDPF_ITR_TX_DEF; + q_vector->tx_intr_mode = IDPF_ITR_DYNAMIC; + q_vector->tx_itr_idx = VIRTCHNL2_ITR_IDX_1; + + q_vector->rx_itr_value = IDPF_ITR_RX_DEF; + q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC; + q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0; + + q_vector->tx = kcalloc(txqs_per_vector, + sizeof(struct idpf_queue *), + GFP_KERNEL); + if (!q_vector->tx) { + err = -ENOMEM; + goto error; + } + + q_vector->rx = kcalloc(rxqs_per_vector, + sizeof(struct idpf_queue *), + GFP_KERNEL); + if (!q_vector->rx) { + err = -ENOMEM; + goto error; + } + + if (!idpf_is_queue_model_split(vport->rxq_model)) + continue; + + q_vector->bufq = kcalloc(bufqs_per_vector, + sizeof(struct idpf_queue *), + GFP_KERNEL); + if (!q_vector->bufq) { + err = -ENOMEM; + goto error; + } + } + + return 0; + +error: + idpf_vport_intr_rel(vport); + + return err; +} + +/** + * idpf_vport_intr_init - Setup all vectors for the given vport + * @vport: virtual port + * + * Returns 0 on success or negative on failure + */ +int idpf_vport_intr_init(struct idpf_vport *vport) +{ + char *int_name; + int err; + + err = idpf_vport_intr_init_vec_idx(vport); + if (err) + return err; + + idpf_vport_intr_map_vector_to_qs(vport); + idpf_vport_intr_napi_add_all(vport); + idpf_vport_intr_napi_ena_all(vport); + + err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport); + if (err) + goto unroll_vectors_alloc; + + int_name = kasprintf(GFP_KERNEL, "%s-%s", + dev_driver_string(&vport->adapter->pdev->dev), + vport->netdev->name); + + err = idpf_vport_intr_req_irq(vport, int_name); + if (err) + goto unroll_vectors_alloc; + + idpf_vport_intr_ena_irq_all(vport); + + return 0; + +unroll_vectors_alloc: + idpf_vport_intr_napi_dis_all(vport); + idpf_vport_intr_napi_del_all(vport); + + return err; +} + +/** + * idpf_config_rss - Send virtchnl messages to configure RSS + * @vport: virtual port + * + * Return 0 on success, negative on failure + */ +int idpf_config_rss(struct idpf_vport *vport) +{ + int err; + + err = idpf_send_get_set_rss_key_msg(vport, false); + if (err) + return err; + + return idpf_send_get_set_rss_lut_msg(vport, false); +} + +/** + * idpf_fill_dflt_rss_lut - Fill the indirection table with the default values + * @vport: virtual port structure + */ +static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + u16 num_active_rxq = vport->num_rxq; + struct idpf_rss_data *rss_data; + int i; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + + for (i = 0; i < rss_data->rss_lut_size; i++) { + rss_data->rss_lut[i] = i % num_active_rxq; + rss_data->cached_lut[i] = rss_data->rss_lut[i]; + } +} + +/** + * idpf_init_rss - Allocate and initialize RSS resources + * @vport: virtual port + * + * Return 0 on success, negative on failure + */ +int idpf_init_rss(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_rss_data *rss_data; + u32 lut_size; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + + lut_size = rss_data->rss_lut_size * sizeof(u32); + rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL); + if (!rss_data->rss_lut) + return -ENOMEM; + + rss_data->cached_lut = kzalloc(lut_size, GFP_KERNEL); + if (!rss_data->cached_lut) { + kfree(rss_data->rss_lut); + rss_data->rss_lut = NULL; + + return -ENOMEM; + } + + /* Fill the default RSS lut values */ + idpf_fill_dflt_rss_lut(vport); + + return idpf_config_rss(vport); +} + +/** + * idpf_deinit_rss - Release RSS resources + * @vport: virtual port + */ +void idpf_deinit_rss(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_rss_data *rss_data; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + kfree(rss_data->cached_lut); + rss_data->cached_lut = NULL; + kfree(rss_data->rss_lut); + rss_data->rss_lut = NULL; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h new file mode 100644 index 000000000000..df76493faa75 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -0,0 +1,1023 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_TXRX_H_ +#define _IDPF_TXRX_H_ + +#include <net/page_pool/helpers.h> +#include <net/tcp.h> +#include <net/netdev_queues.h> + +#define IDPF_LARGE_MAX_Q 256 +#define IDPF_MAX_Q 16 +#define IDPF_MIN_Q 2 +/* Mailbox Queue */ +#define IDPF_MAX_MBXQ 1 + +#define IDPF_MIN_TXQ_DESC 64 +#define IDPF_MIN_RXQ_DESC 64 +#define IDPF_MIN_TXQ_COMPLQ_DESC 256 +#define IDPF_MAX_QIDS 256 + +/* Number of descriptors in a queue should be a multiple of 32. RX queue + * descriptors alone should be a multiple of IDPF_REQ_RXQ_DESC_MULTIPLE + * to achieve BufQ descriptors aligned to 32 + */ +#define IDPF_REQ_DESC_MULTIPLE 32 +#define IDPF_REQ_RXQ_DESC_MULTIPLE (IDPF_MAX_BUFQS_PER_RXQ_GRP * 32) +#define IDPF_MIN_TX_DESC_NEEDED (MAX_SKB_FRAGS + 6) +#define IDPF_TX_WAKE_THRESH ((u16)IDPF_MIN_TX_DESC_NEEDED * 2) + +#define IDPF_MAX_DESCS 8160 +#define IDPF_MAX_TXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_DESC_MULTIPLE) +#define IDPF_MAX_RXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_RXQ_DESC_MULTIPLE) +#define MIN_SUPPORT_TXDID (\ + VIRTCHNL2_TXDID_FLEX_FLOW_SCHED |\ + VIRTCHNL2_TXDID_FLEX_TSO_CTX) + +#define IDPF_DFLT_SINGLEQ_TX_Q_GROUPS 1 +#define IDPF_DFLT_SINGLEQ_RX_Q_GROUPS 1 +#define IDPF_DFLT_SINGLEQ_TXQ_PER_GROUP 4 +#define IDPF_DFLT_SINGLEQ_RXQ_PER_GROUP 4 + +#define IDPF_COMPLQ_PER_GROUP 1 +#define IDPF_SINGLE_BUFQ_PER_RXQ_GRP 1 +#define IDPF_MAX_BUFQS_PER_RXQ_GRP 2 +#define IDPF_BUFQ2_ENA 1 +#define IDPF_NUMQ_PER_CHUNK 1 + +#define IDPF_DFLT_SPLITQ_TXQ_PER_GROUP 1 +#define IDPF_DFLT_SPLITQ_RXQ_PER_GROUP 1 + +/* Default vector sharing */ +#define IDPF_MBX_Q_VEC 1 +#define IDPF_MIN_Q_VEC 1 + +#define IDPF_DFLT_TX_Q_DESC_COUNT 512 +#define IDPF_DFLT_TX_COMPLQ_DESC_COUNT 512 +#define IDPF_DFLT_RX_Q_DESC_COUNT 512 + +/* IMPORTANT: We absolutely _cannot_ have more buffers in the system than a + * given RX completion queue has descriptors. This includes _ALL_ buffer + * queues. E.g.: If you have two buffer queues of 512 descriptors and buffers, + * you have a total of 1024 buffers so your RX queue _must_ have at least that + * many descriptors. This macro divides a given number of RX descriptors by + * number of buffer queues to calculate how many descriptors each buffer queue + * can have without overrunning the RX queue. + * + * If you give hardware more buffers than completion descriptors what will + * happen is that if hardware gets a chance to post more than ring wrap of + * descriptors before SW gets an interrupt and overwrites SW head, the gen bit + * in the descriptor will be wrong. Any overwritten descriptors' buffers will + * be gone forever and SW has no reasonable way to tell that this has happened. + * From SW perspective, when we finally get an interrupt, it looks like we're + * still waiting for descriptor to be done, stalling forever. + */ +#define IDPF_RX_BUFQ_DESC_COUNT(RXD, NUM_BUFQ) ((RXD) / (NUM_BUFQ)) + +#define IDPF_RX_BUFQ_WORKING_SET(rxq) ((rxq)->desc_count - 1) + +#define IDPF_RX_BUMP_NTC(rxq, ntc) \ +do { \ + if (unlikely(++(ntc) == (rxq)->desc_count)) { \ + ntc = 0; \ + change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags); \ + } \ +} while (0) + +#define IDPF_SINGLEQ_BUMP_RING_IDX(q, idx) \ +do { \ + if (unlikely(++(idx) == (q)->desc_count)) \ + idx = 0; \ +} while (0) + +#define IDPF_RX_HDR_SIZE 256 +#define IDPF_RX_BUF_2048 2048 +#define IDPF_RX_BUF_4096 4096 +#define IDPF_RX_BUF_STRIDE 32 +#define IDPF_RX_BUF_POST_STRIDE 16 +#define IDPF_LOW_WATERMARK 64 +/* Size of header buffer specifically for header split */ +#define IDPF_HDR_BUF_SIZE 256 +#define IDPF_PACKET_HDR_PAD \ + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2) +#define IDPF_TX_TSO_MIN_MSS 88 + +/* Minimum number of descriptors between 2 descriptors with the RE bit set; + * only relevant in flow scheduling mode + */ +#define IDPF_TX_SPLITQ_RE_MIN_GAP 64 + +#define IDPF_RX_BI_BUFID_S 0 +#define IDPF_RX_BI_BUFID_M GENMASK(14, 0) +#define IDPF_RX_BI_GEN_S 15 +#define IDPF_RX_BI_GEN_M BIT(IDPF_RX_BI_GEN_S) +#define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M +#define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M + +#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i) \ + (&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i])) +#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i) \ + (&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i])) +#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i]) + +#define IDPF_BASE_TX_DESC(txq, i) \ + (&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i])) +#define IDPF_BASE_TX_CTX_DESC(txq, i) \ + (&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i])) +#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i) \ + (&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i])) + +#define IDPF_FLEX_TX_DESC(txq, i) \ + (&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i])) +#define IDPF_FLEX_TX_CTX_DESC(txq, i) \ + (&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i])) + +#define IDPF_DESC_UNUSED(txq) \ + ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ + (txq)->next_to_clean - (txq)->next_to_use - 1) + +#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->buf_stack.top) +#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ + (txq)->desc_count >> 2) + +#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) +/* Determine the absolute number of completions pending, i.e. the number of + * completions that are expected to arrive on the TX completion queue. + */ +#define IDPF_TX_COMPLQ_PENDING(txq) \ + (((txq)->num_completions_pending >= (txq)->complq->num_completions ? \ + 0 : U64_MAX) + \ + (txq)->num_completions_pending - (txq)->complq->num_completions) + +#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16 +#define IDPF_SPLITQ_TX_INVAL_COMPL_TAG -1 +/* Adjust the generation for the completion tag and wrap if necessary */ +#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \ + ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ + 0 : (txq)->compl_tag_cur_gen) + +#define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) + +#define IDPF_TX_FLAGS_TSO BIT(0) +#define IDPF_TX_FLAGS_IPV4 BIT(1) +#define IDPF_TX_FLAGS_IPV6 BIT(2) +#define IDPF_TX_FLAGS_TUNNEL BIT(3) + +union idpf_tx_flex_desc { + struct idpf_flex_tx_desc q; /* queue based scheduling */ + struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */ +}; + +/** + * struct idpf_tx_buf + * @next_to_watch: Next descriptor to clean + * @skb: Pointer to the skb + * @dma: DMA address + * @len: DMA length + * @bytecount: Number of bytes + * @gso_segs: Number of GSO segments + * @compl_tag: Splitq only, unique identifier for a buffer. Used to compare + * with completion tag returned in buffer completion event. + * Because the completion tag is expected to be the same in all + * data descriptors for a given packet, and a single packet can + * span multiple buffers, we need this field to track all + * buffers associated with this completion tag independently of + * the buf_id. The tag consists of a N bit buf_id and M upper + * order "generation bits". See compl_tag_bufid_m and + * compl_tag_gen_s in struct idpf_queue. We'll use a value of -1 + * to indicate the tag is not valid. + * @ctx_entry: Singleq only. Used to indicate the corresponding entry + * in the descriptor ring was used for a context descriptor and + * this buffer entry should be skipped. + */ +struct idpf_tx_buf { + void *next_to_watch; + struct sk_buff *skb; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + unsigned int bytecount; + unsigned short gso_segs; + + union { + int compl_tag; + + bool ctx_entry; + }; +}; + +struct idpf_tx_stash { + struct hlist_node hlist; + struct idpf_tx_buf buf; +}; + +/** + * struct idpf_buf_lifo - LIFO for managing OOO completions + * @top: Used to know how many buffers are left + * @size: Total size of LIFO + * @bufs: Backing array + */ +struct idpf_buf_lifo { + u16 top; + u16 size; + struct idpf_tx_stash **bufs; +}; + +/** + * struct idpf_tx_offload_params - Offload parameters for a given packet + * @tx_flags: Feature flags enabled for this packet + * @hdr_offsets: Offset parameter for single queue model + * @cd_tunneling: Type of tunneling enabled for single queue model + * @tso_len: Total length of payload to segment + * @mss: Segment size + * @tso_segs: Number of segments to be sent + * @tso_hdr_len: Length of headers to be duplicated + * @td_cmd: Command field to be inserted into descriptor + */ +struct idpf_tx_offload_params { + u32 tx_flags; + + u32 hdr_offsets; + u32 cd_tunneling; + + u32 tso_len; + u16 mss; + u16 tso_segs; + u16 tso_hdr_len; + + u16 td_cmd; +}; + +/** + * struct idpf_tx_splitq_params + * @dtype: General descriptor info + * @eop_cmd: Type of EOP + * @compl_tag: Associated tag for completion + * @td_tag: Descriptor tunneling tag + * @offload: Offload parameters + */ +struct idpf_tx_splitq_params { + enum idpf_tx_desc_dtype_value dtype; + u16 eop_cmd; + union { + u16 compl_tag; + u16 td_tag; + }; + + struct idpf_tx_offload_params offload; +}; + +enum idpf_tx_ctx_desc_eipt_offload { + IDPF_TX_CTX_EXT_IP_NONE = 0x0, + IDPF_TX_CTX_EXT_IP_IPV6 = 0x1, + IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2, + IDPF_TX_CTX_EXT_IP_IPV4 = 0x3 +}; + +/* Checksum offload bits decoded from the receive descriptor. */ +struct idpf_rx_csum_decoded { + u32 l3l4p : 1; + u32 ipe : 1; + u32 eipe : 1; + u32 eudpe : 1; + u32 ipv6exadd : 1; + u32 l4e : 1; + u32 pprs : 1; + u32 nat : 1; + u32 raw_csum_inv : 1; + u32 raw_csum : 16; +}; + +struct idpf_rx_extracted { + unsigned int size; + u16 rx_ptype; +}; + +#define IDPF_TX_COMPLQ_CLEAN_BUDGET 256 +#define IDPF_TX_MIN_PKT_LEN 17 +#define IDPF_TX_DESCS_FOR_SKB_DATA_PTR 1 +#define IDPF_TX_DESCS_PER_CACHE_LINE (L1_CACHE_BYTES / \ + sizeof(struct idpf_flex_tx_desc)) +#define IDPF_TX_DESCS_FOR_CTX 1 +/* TX descriptors needed, worst case */ +#define IDPF_TX_DESC_NEEDED (MAX_SKB_FRAGS + IDPF_TX_DESCS_FOR_CTX + \ + IDPF_TX_DESCS_PER_CACHE_LINE + \ + IDPF_TX_DESCS_FOR_SKB_DATA_PTR) + +/* The size limit for a transmit buffer in a descriptor is (16K - 1). + * In order to align with the read requests we will align the value to + * the nearest 4K which represents our maximum read request size. + */ +#define IDPF_TX_MAX_READ_REQ_SIZE SZ_4K +#define IDPF_TX_MAX_DESC_DATA (SZ_16K - 1) +#define IDPF_TX_MAX_DESC_DATA_ALIGNED \ + ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE) + +#define IDPF_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +#define IDPF_RX_DESC(rxq, i) \ + (&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i])) + +struct idpf_rx_buf { + struct page *page; + unsigned int page_offset; + u16 truesize; +}; + +#define IDPF_RX_MAX_PTYPE_PROTO_IDS 32 +#define IDPF_RX_MAX_PTYPE_SZ (sizeof(struct virtchnl2_ptype) + \ + (sizeof(u16) * IDPF_RX_MAX_PTYPE_PROTO_IDS)) +#define IDPF_RX_PTYPE_HDR_SZ sizeof(struct virtchnl2_get_ptype_info) +#define IDPF_RX_MAX_PTYPES_PER_BUF \ + DIV_ROUND_DOWN_ULL((IDPF_CTLQ_MAX_BUF_LEN - IDPF_RX_PTYPE_HDR_SZ), \ + IDPF_RX_MAX_PTYPE_SZ) + +#define IDPF_GET_PTYPE_SIZE(p) struct_size((p), proto_id, (p)->proto_id_count) + +#define IDPF_TUN_IP_GRE (\ + IDPF_PTYPE_TUNNEL_IP |\ + IDPF_PTYPE_TUNNEL_IP_GRENAT) + +#define IDPF_TUN_IP_GRE_MAC (\ + IDPF_TUN_IP_GRE |\ + IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC) + +#define IDPF_RX_MAX_PTYPE 1024 +#define IDPF_RX_MAX_BASE_PTYPE 256 +#define IDPF_INVALID_PTYPE_ID 0xFFFF + +/* Packet type non-ip values */ +enum idpf_rx_ptype_l2 { + IDPF_RX_PTYPE_L2_RESERVED = 0, + IDPF_RX_PTYPE_L2_MAC_PAY2 = 1, + IDPF_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, + IDPF_RX_PTYPE_L2_FIP_PAY2 = 3, + IDPF_RX_PTYPE_L2_OUI_PAY2 = 4, + IDPF_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, + IDPF_RX_PTYPE_L2_LLDP_PAY2 = 6, + IDPF_RX_PTYPE_L2_ECP_PAY2 = 7, + IDPF_RX_PTYPE_L2_EVB_PAY2 = 8, + IDPF_RX_PTYPE_L2_QCN_PAY2 = 9, + IDPF_RX_PTYPE_L2_EAPOL_PAY2 = 10, + IDPF_RX_PTYPE_L2_ARP = 11, +}; + +enum idpf_rx_ptype_outer_ip { + IDPF_RX_PTYPE_OUTER_L2 = 0, + IDPF_RX_PTYPE_OUTER_IP = 1, +}; + +#define IDPF_RX_PTYPE_TO_IPV(ptype, ipv) \ + (((ptype)->outer_ip == IDPF_RX_PTYPE_OUTER_IP) && \ + ((ptype)->outer_ip_ver == (ipv))) + +enum idpf_rx_ptype_outer_ip_ver { + IDPF_RX_PTYPE_OUTER_NONE = 0, + IDPF_RX_PTYPE_OUTER_IPV4 = 1, + IDPF_RX_PTYPE_OUTER_IPV6 = 2, +}; + +enum idpf_rx_ptype_outer_fragmented { + IDPF_RX_PTYPE_NOT_FRAG = 0, + IDPF_RX_PTYPE_FRAG = 1, +}; + +enum idpf_rx_ptype_tunnel_type { + IDPF_RX_PTYPE_TUNNEL_NONE = 0, + IDPF_RX_PTYPE_TUNNEL_IP_IP = 1, + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT = 2, + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, +}; + +enum idpf_rx_ptype_tunnel_end_prot { + IDPF_RX_PTYPE_TUNNEL_END_NONE = 0, + IDPF_RX_PTYPE_TUNNEL_END_IPV4 = 1, + IDPF_RX_PTYPE_TUNNEL_END_IPV6 = 2, +}; + +enum idpf_rx_ptype_inner_prot { + IDPF_RX_PTYPE_INNER_PROT_NONE = 0, + IDPF_RX_PTYPE_INNER_PROT_UDP = 1, + IDPF_RX_PTYPE_INNER_PROT_TCP = 2, + IDPF_RX_PTYPE_INNER_PROT_SCTP = 3, + IDPF_RX_PTYPE_INNER_PROT_ICMP = 4, + IDPF_RX_PTYPE_INNER_PROT_TIMESYNC = 5, +}; + +enum idpf_rx_ptype_payload_layer { + IDPF_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, +}; + +enum idpf_tunnel_state { + IDPF_PTYPE_TUNNEL_IP = BIT(0), + IDPF_PTYPE_TUNNEL_IP_GRENAT = BIT(1), + IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC = BIT(2), +}; + +struct idpf_ptype_state { + bool outer_ip; + bool outer_frag; + u8 tunnel_state; +}; + +struct idpf_rx_ptype_decoded { + u32 ptype:10; + u32 known:1; + u32 outer_ip:1; + u32 outer_ip_ver:2; + u32 outer_frag:1; + u32 tunnel_type:3; + u32 tunnel_end_prot:2; + u32 tunnel_end_frag:1; + u32 inner_prot:4; + u32 payload_layer:3; +}; + +/** + * enum idpf_queue_flags_t + * @__IDPF_Q_GEN_CHK: Queues operating in splitq mode use a generation bit to + * identify new descriptor writebacks on the ring. HW sets + * the gen bit to 1 on the first writeback of any given + * descriptor. After the ring wraps, HW sets the gen bit of + * those descriptors to 0, and continues flipping + * 0->1 or 1->0 on each ring wrap. SW maintains its own + * gen bit to know what value will indicate writebacks on + * the next pass around the ring. E.g. it is initialized + * to 1 and knows that reading a gen bit of 1 in any + * descriptor on the initial pass of the ring indicates a + * writeback. It also flips on every ring wrap. + * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit + * and RFLGQ_GEN is the SW bit. + * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling + * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions + * @__IDPF_Q_POLL_MODE: Enable poll mode + * @__IDPF_Q_FLAGS_NBITS: Must be last + */ +enum idpf_queue_flags_t { + __IDPF_Q_GEN_CHK, + __IDPF_RFLQ_GEN_CHK, + __IDPF_Q_FLOW_SCH_EN, + __IDPF_Q_SW_MARKER, + __IDPF_Q_POLL_MODE, + + __IDPF_Q_FLAGS_NBITS, +}; + +/** + * struct idpf_vec_regs + * @dyn_ctl_reg: Dynamic control interrupt register offset + * @itrn_reg: Interrupt Throttling Rate register offset + * @itrn_index_spacing: Register spacing between ITR registers of the same + * vector + */ +struct idpf_vec_regs { + u32 dyn_ctl_reg; + u32 itrn_reg; + u32 itrn_index_spacing; +}; + +/** + * struct idpf_intr_reg + * @dyn_ctl: Dynamic control interrupt register + * @dyn_ctl_intena_m: Mask for dyn_ctl interrupt enable + * @dyn_ctl_itridx_s: Register bit offset for ITR index + * @dyn_ctl_itridx_m: Mask for ITR index + * @dyn_ctl_intrvl_s: Register bit offset for ITR interval + * @rx_itr: RX ITR register + * @tx_itr: TX ITR register + * @icr_ena: Interrupt cause register offset + * @icr_ena_ctlq_m: Mask for ICR + */ +struct idpf_intr_reg { + void __iomem *dyn_ctl; + u32 dyn_ctl_intena_m; + u32 dyn_ctl_itridx_s; + u32 dyn_ctl_itridx_m; + u32 dyn_ctl_intrvl_s; + void __iomem *rx_itr; + void __iomem *tx_itr; + void __iomem *icr_ena; + u32 icr_ena_ctlq_m; +}; + +/** + * struct idpf_q_vector + * @vport: Vport back pointer + * @affinity_mask: CPU affinity mask + * @napi: napi handler + * @v_idx: Vector index + * @intr_reg: See struct idpf_intr_reg + * @num_txq: Number of TX queues + * @tx: Array of TX queues to service + * @tx_dim: Data for TX net_dim algorithm + * @tx_itr_value: TX interrupt throttling rate + * @tx_intr_mode: Dynamic ITR or not + * @tx_itr_idx: TX ITR index + * @num_rxq: Number of RX queues + * @rx: Array of RX queues to service + * @rx_dim: Data for RX net_dim algorithm + * @rx_itr_value: RX interrupt throttling rate + * @rx_intr_mode: Dynamic ITR or not + * @rx_itr_idx: RX ITR index + * @num_bufq: Number of buffer queues + * @bufq: Array of buffer queues to service + * @total_events: Number of interrupts processed + * @name: Queue vector name + */ +struct idpf_q_vector { + struct idpf_vport *vport; + cpumask_t affinity_mask; + struct napi_struct napi; + u16 v_idx; + struct idpf_intr_reg intr_reg; + + u16 num_txq; + struct idpf_queue **tx; + struct dim tx_dim; + u16 tx_itr_value; + bool tx_intr_mode; + u32 tx_itr_idx; + + u16 num_rxq; + struct idpf_queue **rx; + struct dim rx_dim; + u16 rx_itr_value; + bool rx_intr_mode; + u32 rx_itr_idx; + + u16 num_bufq; + struct idpf_queue **bufq; + + u16 total_events; + char *name; +}; + +struct idpf_rx_queue_stats { + u64_stats_t packets; + u64_stats_t bytes; + u64_stats_t rsc_pkts; + u64_stats_t hw_csum_err; + u64_stats_t hsplit_pkts; + u64_stats_t hsplit_buf_ovf; + u64_stats_t bad_descs; +}; + +struct idpf_tx_queue_stats { + u64_stats_t packets; + u64_stats_t bytes; + u64_stats_t lso_pkts; + u64_stats_t linearize; + u64_stats_t q_busy; + u64_stats_t skb_drops; + u64_stats_t dma_map_errs; +}; + +struct idpf_cleaned_stats { + u32 packets; + u32 bytes; +}; + +union idpf_queue_stats { + struct idpf_rx_queue_stats rx; + struct idpf_tx_queue_stats tx; +}; + +#define IDPF_ITR_DYNAMIC 1 +#define IDPF_ITR_MAX 0x1FE0 +#define IDPF_ITR_20K 0x0032 +#define IDPF_ITR_GRAN_S 1 /* Assume ITR granularity is 2us */ +#define IDPF_ITR_MASK 0x1FFE /* ITR register value alignment mask */ +#define ITR_REG_ALIGN(setting) ((setting) & IDPF_ITR_MASK) +#define IDPF_ITR_IS_DYNAMIC(itr_mode) (itr_mode) +#define IDPF_ITR_TX_DEF IDPF_ITR_20K +#define IDPF_ITR_RX_DEF IDPF_ITR_20K +/* Index used for 'No ITR' update in DYN_CTL register */ +#define IDPF_NO_ITR_UPDATE_IDX 3 +#define IDPF_ITR_IDX_SPACING(spacing, dflt) (spacing ? spacing : dflt) +#define IDPF_DIM_DEFAULT_PROFILE_IX 1 + +/** + * struct idpf_queue + * @dev: Device back pointer for DMA mapping + * @vport: Back pointer to associated vport + * @txq_grp: See struct idpf_txq_group + * @rxq_grp: See struct idpf_rxq_group + * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean, + * buffer queue uses this index to determine which group of refill queues + * to clean. + * For TX queue, it is used as index to map between TX queue group and + * hot path TX pointers stored in vport. Used in both singleq/splitq. + * For RX queue, it is used to index to total RX queue across groups and + * used for skb reporting. + * @tail: Tail offset. Used for both queue models single and split. In splitq + * model relevant only for TX queue and RX queue. + * @tx_buf: See struct idpf_tx_buf + * @rx_buf: Struct with RX buffer related members + * @rx_buf.buf: See struct idpf_rx_buf + * @rx_buf.hdr_buf_pa: DMA handle + * @rx_buf.hdr_buf_va: Virtual address + * @pp: Page pool pointer + * @skb: Pointer to the skb + * @q_type: Queue type (TX, RX, TX completion, RX buffer) + * @q_id: Queue id + * @desc_count: Number of descriptors + * @next_to_use: Next descriptor to use. Relevant in both split & single txq + * and bufq. + * @next_to_clean: Next descriptor to clean. In split queue model, only + * relevant to TX completion queue and RX queue. + * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model + * only relevant to RX queue. + * @flags: See enum idpf_queue_flags_t + * @q_stats: See union idpf_queue_stats + * @stats_sync: See struct u64_stats_sync + * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on + * the TX completion queue, it can be for any TXQ associated + * with that completion queue. This means we can clean up to + * N TXQs during a single call to clean the completion queue. + * cleaned_bytes|pkts tracks the clean stats per TXQ during + * that single call to clean the completion queue. By doing so, + * we can update BQL with aggregate cleaned stats for each TXQ + * only once at the end of the cleaning routine. + * @cleaned_pkts: Number of packets cleaned for the above said case + * @rx_hsplit_en: RX headsplit enable + * @rx_hbuf_size: Header buffer size + * @rx_buf_size: Buffer size + * @rx_max_pkt_size: RX max packet size + * @rx_buf_stride: RX buffer stride + * @rx_buffer_low_watermark: RX buffer low watermark + * @rxdids: Supported RX descriptor ids + * @q_vector: Backreference to associated vector + * @size: Length of descriptor ring in bytes + * @dma: Physical address of ring + * @desc_ring: Descriptor ring memory + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather + * @tx_min_pkt_len: Min supported packet length + * @num_completions: Only relevant for TX completion queue. It tracks the + * number of completions received to compare against the + * number of completions pending, as accumulated by the + * TX queues. + * @buf_stack: Stack of empty buffers to store buffer info for out of order + * buffer completions. See struct idpf_buf_lifo. + * @compl_tag_bufid_m: Completion tag buffer id mask + * @compl_tag_gen_s: Completion tag generation bit + * The format of the completion tag will change based on the TXQ + * descriptor ring size so that we can maintain roughly the same level + * of "uniqueness" across all descriptor sizes. For example, if the + * TXQ descriptor ring size is 64 (the minimum size supported), the + * completion tag will be formatted as below: + * 15 6 5 0 + * -------------------------------- + * | GEN=0-1023 |IDX = 0-63| + * -------------------------------- + * + * This gives us 64*1024 = 65536 possible unique values. Similarly, if + * the TXQ descriptor ring size is 8160 (the maximum size supported), + * the completion tag will be formatted as below: + * 15 13 12 0 + * -------------------------------- + * |GEN | IDX = 0-8159 | + * -------------------------------- + * + * This gives us 8*8160 = 65280 possible unique values. + * @compl_tag_cur_gen: Used to keep track of current completion tag generation + * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset + * @sched_buf_hash: Hash table to stores buffers + */ +struct idpf_queue { + struct device *dev; + struct idpf_vport *vport; + union { + struct idpf_txq_group *txq_grp; + struct idpf_rxq_group *rxq_grp; + }; + u16 idx; + void __iomem *tail; + union { + struct idpf_tx_buf *tx_buf; + struct { + struct idpf_rx_buf *buf; + dma_addr_t hdr_buf_pa; + void *hdr_buf_va; + } rx_buf; + }; + struct page_pool *pp; + struct sk_buff *skb; + u16 q_type; + u32 q_id; + u16 desc_count; + + u16 next_to_use; + u16 next_to_clean; + u16 next_to_alloc; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + + union idpf_queue_stats q_stats; + struct u64_stats_sync stats_sync; + + u32 cleaned_bytes; + u16 cleaned_pkts; + + bool rx_hsplit_en; + u16 rx_hbuf_size; + u16 rx_buf_size; + u16 rx_max_pkt_size; + u16 rx_buf_stride; + u8 rx_buffer_low_watermark; + u64 rxdids; + struct idpf_q_vector *q_vector; + unsigned int size; + dma_addr_t dma; + void *desc_ring; + + u16 tx_max_bufs; + u8 tx_min_pkt_len; + + u32 num_completions; + + struct idpf_buf_lifo buf_stack; + + u16 compl_tag_bufid_m; + u16 compl_tag_gen_s; + + u16 compl_tag_cur_gen; + u16 compl_tag_gen_max; + + DECLARE_HASHTABLE(sched_buf_hash, 12); +} ____cacheline_internodealigned_in_smp; + +/** + * struct idpf_sw_queue + * @next_to_clean: Next descriptor to clean + * @next_to_alloc: Buffer to allocate at + * @flags: See enum idpf_queue_flags_t + * @ring: Pointer to the ring + * @desc_count: Descriptor count + * @dev: Device back pointer for DMA mapping + * + * Software queues are used in splitq mode to manage buffers between rxq + * producer and the bufq consumer. These are required in order to maintain a + * lockless buffer management system and are strictly software only constructs. + */ +struct idpf_sw_queue { + u16 next_to_clean; + u16 next_to_alloc; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + u16 *ring; + u16 desc_count; + struct device *dev; +} ____cacheline_internodealigned_in_smp; + +/** + * struct idpf_rxq_set + * @rxq: RX queue + * @refillq0: Pointer to refill queue 0 + * @refillq1: Pointer to refill queue 1 + * + * Splitq only. idpf_rxq_set associates an rxq with at an array of refillqs. + * Each rxq needs a refillq to return used buffers back to the respective bufq. + * Bufqs then clean these refillqs for buffers to give to hardware. + */ +struct idpf_rxq_set { + struct idpf_queue rxq; + struct idpf_sw_queue *refillq0; + struct idpf_sw_queue *refillq1; +}; + +/** + * struct idpf_bufq_set + * @bufq: Buffer queue + * @num_refillqs: Number of refill queues. This is always equal to num_rxq_sets + * in idpf_rxq_group. + * @refillqs: Pointer to refill queues array. + * + * Splitq only. idpf_bufq_set associates a bufq to an array of refillqs. + * In this bufq_set, there will be one refillq for each rxq in this rxq_group. + * Used buffers received by rxqs will be put on refillqs which bufqs will + * clean to return new buffers back to hardware. + * + * Buffers needed by some number of rxqs associated in this rxq_group are + * managed by at most two bufqs (depending on performance configuration). + */ +struct idpf_bufq_set { + struct idpf_queue bufq; + int num_refillqs; + struct idpf_sw_queue *refillqs; +}; + +/** + * struct idpf_rxq_group + * @vport: Vport back pointer + * @singleq: Struct with single queue related members + * @singleq.num_rxq: Number of RX queues associated + * @singleq.rxqs: Array of RX queue pointers + * @splitq: Struct with split queue related members + * @splitq.num_rxq_sets: Number of RX queue sets + * @splitq.rxq_sets: Array of RX queue sets + * @splitq.bufq_sets: Buffer queue set pointer + * + * In singleq mode, an rxq_group is simply an array of rxqs. In splitq, a + * rxq_group contains all the rxqs, bufqs and refillqs needed to + * manage buffers in splitq mode. + */ +struct idpf_rxq_group { + struct idpf_vport *vport; + + union { + struct { + u16 num_rxq; + struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q]; + } singleq; + struct { + u16 num_rxq_sets; + struct idpf_rxq_set *rxq_sets[IDPF_LARGE_MAX_Q]; + struct idpf_bufq_set *bufq_sets; + } splitq; + }; +}; + +/** + * struct idpf_txq_group + * @vport: Vport back pointer + * @num_txq: Number of TX queues associated + * @txqs: Array of TX queue pointers + * @complq: Associated completion queue pointer, split queue only + * @num_completions_pending: Total number of completions pending for the + * completion queue, acculumated for all TX queues + * associated with that completion queue. + * + * Between singleq and splitq, a txq_group is largely the same except for the + * complq. In splitq a single complq is responsible for handling completions + * for some number of txqs associated in this txq_group. + */ +struct idpf_txq_group { + struct idpf_vport *vport; + + u16 num_txq; + struct idpf_queue *txqs[IDPF_LARGE_MAX_Q]; + + struct idpf_queue *complq; + + u32 num_completions_pending; +}; + +/** + * idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag + * @size: transmit request size in bytes + * + * In the case where a large frag (>= 16K) needs to be split across multiple + * descriptors, we need to assume that we can have no more than 12K of data + * per descriptor due to hardware alignment restrictions (4K alignment). + */ +static inline u32 idpf_size_to_txd_count(unsigned int size) +{ + return DIV_ROUND_UP(size, IDPF_TX_MAX_DESC_DATA_ALIGNED); +} + +/** + * idpf_tx_singleq_build_ctob - populate command tag offset and size + * @td_cmd: Command to be filled in desc + * @td_offset: Offset to be filled in desc + * @size: Size of the buffer + * @td_tag: td tag to be filled + * + * Returns the 64 bit value populated with the input parameters + */ +static inline __le64 idpf_tx_singleq_build_ctob(u64 td_cmd, u64 td_offset, + unsigned int size, u64 td_tag) +{ + return cpu_to_le64(IDPF_TX_DESC_DTYPE_DATA | + (td_cmd << IDPF_TXD_QW1_CMD_S) | + (td_offset << IDPF_TXD_QW1_OFFSET_S) | + ((u64)size << IDPF_TXD_QW1_TX_BUF_SZ_S) | + (td_tag << IDPF_TXD_QW1_L2TAG1_S)); +} + +void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size); +void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size); +/** + * idpf_tx_splitq_build_desc - determine which type of data descriptor to build + * @desc: descriptor to populate + * @params: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size) +{ + if (params->dtype == IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2) + idpf_tx_splitq_build_ctb(desc, params, td_cmd, size); + else + idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size); +} + +/** + * idpf_alloc_page - Allocate a new RX buffer from the page pool + * @pool: page_pool to allocate from + * @buf: metadata struct to populate with page info + * @buf_size: 2K or 4K + * + * Returns &dma_addr_t to be passed to HW for Rx, %DMA_MAPPING_ERROR otherwise. + */ +static inline dma_addr_t idpf_alloc_page(struct page_pool *pool, + struct idpf_rx_buf *buf, + unsigned int buf_size) +{ + if (buf_size == IDPF_RX_BUF_2048) + buf->page = page_pool_dev_alloc_frag(pool, &buf->page_offset, + buf_size); + else + buf->page = page_pool_dev_alloc_pages(pool); + + if (!buf->page) + return DMA_MAPPING_ERROR; + + buf->truesize = buf_size; + + return page_pool_get_dma_addr(buf->page) + buf->page_offset + + pool->p.offset; +} + +/** + * idpf_rx_put_page - Return RX buffer page to pool + * @rx_buf: RX buffer metadata struct + */ +static inline void idpf_rx_put_page(struct idpf_rx_buf *rx_buf) +{ + page_pool_put_page(rx_buf->page->pp, rx_buf->page, + rx_buf->truesize, true); + rx_buf->page = NULL; +} + +/** + * idpf_rx_sync_for_cpu - Synchronize DMA buffer + * @rx_buf: RX buffer metadata struct + * @len: frame length from descriptor + */ +static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len) +{ + struct page *page = rx_buf->page; + struct page_pool *pp = page->pp; + + dma_sync_single_range_for_cpu(pp->p.dev, + page_pool_get_dma_addr(page), + rx_buf->page_offset + pp->p.offset, len, + page_pool_get_dma_dir(pp)); +} + +int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); +void idpf_vport_init_num_qs(struct idpf_vport *vport, + struct virtchnl2_create_vport *vport_msg); +void idpf_vport_calc_num_q_desc(struct idpf_vport *vport); +int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_index, + struct virtchnl2_create_vport *vport_msg, + struct idpf_vport_max_q *max_q); +void idpf_vport_calc_num_q_groups(struct idpf_vport *vport); +int idpf_vport_queues_alloc(struct idpf_vport *vport); +void idpf_vport_queues_rel(struct idpf_vport *vport); +void idpf_vport_intr_rel(struct idpf_vport *vport); +int idpf_vport_intr_alloc(struct idpf_vport *vport); +void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector); +void idpf_vport_intr_deinit(struct idpf_vport *vport); +int idpf_vport_intr_init(struct idpf_vport *vport); +enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded); +int idpf_config_rss(struct idpf_vport *vport); +int idpf_init_rss(struct idpf_vport *vport); +void idpf_deinit_rss(struct idpf_vport *vport); +int idpf_rx_bufs_init_all(struct idpf_vport *vport); +void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size); +struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + struct idpf_rx_buf *rx_buf, + unsigned int size); +bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf); +void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val); +void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, + bool xmit_more); +unsigned int idpf_size_to_txd_count(unsigned int size); +netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb); +void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 ring_idx); +unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + struct sk_buff *skb); +bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count); +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size); +void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); +netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev); +netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev); +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq, + u16 cleaned_count); +int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); + +#endif /* !_IDPF_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c new file mode 100644 index 000000000000..8ade4e3a9fe1 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_lan_vf_regs.h" + +#define IDPF_VF_ITR_IDX_SPACING 0x40 + +/** + * idpf_vf_ctlq_reg_init - initialize default mailbox registers + * @cq: pointer to the array of create control queues + */ +static void idpf_vf_ctlq_reg_init(struct idpf_ctlq_create_info *cq) +{ + int i; + + for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) { + struct idpf_ctlq_create_info *ccq = cq + i; + + switch (ccq->type) { + case IDPF_CTLQ_TYPE_MAILBOX_TX: + /* set head and tail registers in our local struct */ + ccq->reg.head = VF_ATQH; + ccq->reg.tail = VF_ATQT; + ccq->reg.len = VF_ATQLEN; + ccq->reg.bah = VF_ATQBAH; + ccq->reg.bal = VF_ATQBAL; + ccq->reg.len_mask = VF_ATQLEN_ATQLEN_M; + ccq->reg.len_ena_mask = VF_ATQLEN_ATQENABLE_M; + ccq->reg.head_mask = VF_ATQH_ATQH_M; + break; + case IDPF_CTLQ_TYPE_MAILBOX_RX: + /* set head and tail registers in our local struct */ + ccq->reg.head = VF_ARQH; + ccq->reg.tail = VF_ARQT; + ccq->reg.len = VF_ARQLEN; + ccq->reg.bah = VF_ARQBAH; + ccq->reg.bal = VF_ARQBAL; + ccq->reg.len_mask = VF_ARQLEN_ARQLEN_M; + ccq->reg.len_ena_mask = VF_ARQLEN_ARQENABLE_M; + ccq->reg.head_mask = VF_ARQH_ARQH_M; + break; + default: + break; + } + } +} + +/** + * idpf_vf_mb_intr_reg_init - Initialize the mailbox register + * @adapter: adapter structure + */ +static void idpf_vf_mb_intr_reg_init(struct idpf_adapter *adapter) +{ + struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg; + u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl); + + intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl); + intr->dyn_ctl_intena_m = VF_INT_DYN_CTL0_INTENA_M; + intr->dyn_ctl_itridx_m = VF_INT_DYN_CTL0_ITR_INDX_M; + intr->icr_ena = idpf_get_reg_addr(adapter, VF_INT_ICR0_ENA1); + intr->icr_ena_ctlq_m = VF_INT_ICR0_ENA1_ADMINQ_M; +} + +/** + * idpf_vf_intr_reg_init - Initialize interrupt registers + * @vport: virtual port structure + */ +static int idpf_vf_intr_reg_init(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + int num_vecs = vport->num_q_vectors; + struct idpf_vec_regs *reg_vals; + int num_regs, i, err = 0; + u32 rx_itr, tx_itr; + u16 total_vecs; + + total_vecs = idpf_get_reserved_vecs(vport->adapter); + reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs), + GFP_KERNEL); + if (!reg_vals) + return -ENOMEM; + + num_regs = idpf_get_reg_intr_vecs(vport, reg_vals); + if (num_regs < num_vecs) { + err = -EINVAL; + goto free_reg_vals; + } + + for (i = 0; i < num_vecs; i++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[i]; + u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC; + struct idpf_intr_reg *intr = &q_vector->intr_reg; + u32 spacing; + + intr->dyn_ctl = idpf_get_reg_addr(adapter, + reg_vals[vec_id].dyn_ctl_reg); + intr->dyn_ctl_intena_m = VF_INT_DYN_CTLN_INTENA_M; + intr->dyn_ctl_itridx_s = VF_INT_DYN_CTLN_ITR_INDX_S; + + spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing, + IDPF_VF_ITR_IDX_SPACING); + rx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_0, + reg_vals[vec_id].itrn_reg, + spacing); + tx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_1, + reg_vals[vec_id].itrn_reg, + spacing); + intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr); + intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); + } + +free_reg_vals: + kfree(reg_vals); + + return err; +} + +/** + * idpf_vf_reset_reg_init - Initialize reset registers + * @adapter: Driver specific private structure + */ +static void idpf_vf_reset_reg_init(struct idpf_adapter *adapter) +{ + adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, VFGEN_RSTAT); + adapter->reset_reg.rstat_m = VFGEN_RSTAT_VFR_STATE_M; +} + +/** + * idpf_vf_trigger_reset - trigger reset + * @adapter: Driver specific private structure + * @trig_cause: Reason to trigger a reset + */ +static void idpf_vf_trigger_reset(struct idpf_adapter *adapter, + enum idpf_flags trig_cause) +{ + /* Do not send VIRTCHNL2_OP_RESET_VF message on driver unload */ + if (trig_cause == IDPF_HR_FUNC_RESET && + !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL); +} + +/** + * idpf_vf_reg_ops_init - Initialize register API function pointers + * @adapter: Driver specific private structure + */ +static void idpf_vf_reg_ops_init(struct idpf_adapter *adapter) +{ + adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_vf_ctlq_reg_init; + adapter->dev_ops.reg_ops.intr_reg_init = idpf_vf_intr_reg_init; + adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_vf_mb_intr_reg_init; + adapter->dev_ops.reg_ops.reset_reg_init = idpf_vf_reset_reg_init; + adapter->dev_ops.reg_ops.trigger_reset = idpf_vf_trigger_reset; +} + +/** + * idpf_vf_dev_ops_init - Initialize device API function pointers + * @adapter: Driver specific private structure + */ +void idpf_vf_dev_ops_init(struct idpf_adapter *adapter) +{ + idpf_vf_reg_ops_init(adapter); +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c new file mode 100644 index 000000000000..9bc85b2f1709 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -0,0 +1,3791 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" + +/** + * idpf_recv_event_msg - Receive virtchnl event message + * @vport: virtual port structure + * @ctlq_msg: message to copy from + * + * Receive virtchnl event message + */ +static void idpf_recv_event_msg(struct idpf_vport *vport, + struct idpf_ctlq_msg *ctlq_msg) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + struct virtchnl2_event *v2e; + bool link_status; + u32 event; + + v2e = (struct virtchnl2_event *)ctlq_msg->ctx.indirect.payload->va; + event = le32_to_cpu(v2e->event); + + switch (event) { + case VIRTCHNL2_EVENT_LINK_CHANGE: + vport->link_speed_mbps = le32_to_cpu(v2e->link_speed); + link_status = v2e->link_status; + + if (vport->link_up == link_status) + break; + + vport->link_up = link_status; + if (np->state == __IDPF_VPORT_UP) { + if (vport->link_up) { + netif_carrier_on(vport->netdev); + netif_tx_start_all_queues(vport->netdev); + } else { + netif_tx_stop_all_queues(vport->netdev); + netif_carrier_off(vport->netdev); + } + } + break; + default: + dev_err(&vport->adapter->pdev->dev, + "Unknown event %d from PF\n", event); + break; + } +} + +/** + * idpf_mb_clean - Reclaim the send mailbox queue entries + * @adapter: Driver specific private structure + * + * Reclaim the send mailbox queue entries to be used to send further messages + * + * Returns 0 on success, negative on failure + */ +static int idpf_mb_clean(struct idpf_adapter *adapter) +{ + u16 i, num_q_msg = IDPF_DFLT_MBX_Q_LEN; + struct idpf_ctlq_msg **q_msg; + struct idpf_dma_mem *dma_mem; + int err; + + q_msg = kcalloc(num_q_msg, sizeof(struct idpf_ctlq_msg *), GFP_ATOMIC); + if (!q_msg) + return -ENOMEM; + + err = idpf_ctlq_clean_sq(adapter->hw.asq, &num_q_msg, q_msg); + if (err) + goto err_kfree; + + for (i = 0; i < num_q_msg; i++) { + if (!q_msg[i]) + continue; + dma_mem = q_msg[i]->ctx.indirect.payload; + if (dma_mem) + dma_free_coherent(&adapter->pdev->dev, dma_mem->size, + dma_mem->va, dma_mem->pa); + kfree(q_msg[i]); + kfree(dma_mem); + } + +err_kfree: + kfree(q_msg); + + return err; +} + +/** + * idpf_send_mb_msg - Send message over mailbox + * @adapter: Driver specific private structure + * @op: virtchnl opcode + * @msg_size: size of the payload + * @msg: pointer to buffer holding the payload + * + * Will prepare the control queue message and initiates the send api + * + * Returns 0 on success, negative on failure + */ +int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, + u16 msg_size, u8 *msg) +{ + struct idpf_ctlq_msg *ctlq_msg; + struct idpf_dma_mem *dma_mem; + int err; + + /* If we are here and a reset is detected nothing much can be + * done. This thread should silently abort and expected to + * be corrected with a new run either by user or driver + * flows after reset + */ + if (idpf_is_reset_detected(adapter)) + return 0; + + err = idpf_mb_clean(adapter); + if (err) + return err; + + ctlq_msg = kzalloc(sizeof(*ctlq_msg), GFP_ATOMIC); + if (!ctlq_msg) + return -ENOMEM; + + dma_mem = kzalloc(sizeof(*dma_mem), GFP_ATOMIC); + if (!dma_mem) { + err = -ENOMEM; + goto dma_mem_error; + } + + ctlq_msg->opcode = idpf_mbq_opc_send_msg_to_cp; + ctlq_msg->func_id = 0; + ctlq_msg->data_len = msg_size; + ctlq_msg->cookie.mbx.chnl_opcode = op; + ctlq_msg->cookie.mbx.chnl_retval = 0; + dma_mem->size = IDPF_CTLQ_MAX_BUF_LEN; + dma_mem->va = dma_alloc_coherent(&adapter->pdev->dev, dma_mem->size, + &dma_mem->pa, GFP_ATOMIC); + if (!dma_mem->va) { + err = -ENOMEM; + goto dma_alloc_error; + } + memcpy(dma_mem->va, msg, msg_size); + ctlq_msg->ctx.indirect.payload = dma_mem; + + err = idpf_ctlq_send(&adapter->hw, adapter->hw.asq, 1, ctlq_msg); + if (err) + goto send_error; + + return 0; + +send_error: + dma_free_coherent(&adapter->pdev->dev, dma_mem->size, dma_mem->va, + dma_mem->pa); +dma_alloc_error: + kfree(dma_mem); +dma_mem_error: + kfree(ctlq_msg); + + return err; +} + +/** + * idpf_find_vport - Find vport pointer from control queue message + * @adapter: driver specific private structure + * @vport: address of vport pointer to copy the vport from adapters vport list + * @ctlq_msg: control queue message + * + * Return 0 on success, error value on failure. Also this function does check + * for the opcodes which expect to receive payload and return error value if + * it is not the case. + */ +static int idpf_find_vport(struct idpf_adapter *adapter, + struct idpf_vport **vport, + struct idpf_ctlq_msg *ctlq_msg) +{ + bool no_op = false, vid_found = false; + int i, err = 0; + char *vc_msg; + u32 v_id; + + vc_msg = kcalloc(IDPF_CTLQ_MAX_BUF_LEN, sizeof(char), GFP_KERNEL); + if (!vc_msg) + return -ENOMEM; + + if (ctlq_msg->data_len) { + size_t payload_size = ctlq_msg->ctx.indirect.payload->size; + + if (!payload_size) { + dev_err(&adapter->pdev->dev, "Failed to receive payload buffer\n"); + kfree(vc_msg); + + return -EINVAL; + } + + memcpy(vc_msg, ctlq_msg->ctx.indirect.payload->va, + min_t(size_t, payload_size, IDPF_CTLQ_MAX_BUF_LEN)); + } + + switch (ctlq_msg->cookie.mbx.chnl_opcode) { + case VIRTCHNL2_OP_VERSION: + case VIRTCHNL2_OP_GET_CAPS: + case VIRTCHNL2_OP_CREATE_VPORT: + case VIRTCHNL2_OP_SET_SRIOV_VFS: + case VIRTCHNL2_OP_ALLOC_VECTORS: + case VIRTCHNL2_OP_DEALLOC_VECTORS: + case VIRTCHNL2_OP_GET_PTYPE_INFO: + goto free_vc_msg; + case VIRTCHNL2_OP_ENABLE_VPORT: + case VIRTCHNL2_OP_DISABLE_VPORT: + case VIRTCHNL2_OP_DESTROY_VPORT: + v_id = le32_to_cpu(((struct virtchnl2_vport *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_CONFIG_TX_QUEUES: + v_id = le32_to_cpu(((struct virtchnl2_config_tx_queues *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_CONFIG_RX_QUEUES: + v_id = le32_to_cpu(((struct virtchnl2_config_rx_queues *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_ENABLE_QUEUES: + case VIRTCHNL2_OP_DISABLE_QUEUES: + case VIRTCHNL2_OP_DEL_QUEUES: + v_id = le32_to_cpu(((struct virtchnl2_del_ena_dis_queues *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_ADD_QUEUES: + v_id = le32_to_cpu(((struct virtchnl2_add_queues *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_MAP_QUEUE_VECTOR: + case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR: + v_id = le32_to_cpu(((struct virtchnl2_queue_vector_maps *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_GET_STATS: + v_id = le32_to_cpu(((struct virtchnl2_vport_stats *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_GET_RSS_LUT: + case VIRTCHNL2_OP_SET_RSS_LUT: + v_id = le32_to_cpu(((struct virtchnl2_rss_lut *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_GET_RSS_KEY: + case VIRTCHNL2_OP_SET_RSS_KEY: + v_id = le32_to_cpu(((struct virtchnl2_rss_key *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_EVENT: + v_id = le32_to_cpu(((struct virtchnl2_event *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_LOOPBACK: + v_id = le32_to_cpu(((struct virtchnl2_loopback *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE: + v_id = le32_to_cpu(((struct virtchnl2_promisc_info *)vc_msg)->vport_id); + break; + case VIRTCHNL2_OP_ADD_MAC_ADDR: + case VIRTCHNL2_OP_DEL_MAC_ADDR: + v_id = le32_to_cpu(((struct virtchnl2_mac_addr_list *)vc_msg)->vport_id); + break; + default: + no_op = true; + break; + } + + if (no_op) + goto free_vc_msg; + + for (i = 0; i < idpf_get_max_vports(adapter); i++) { + if (adapter->vport_ids[i] == v_id) { + vid_found = true; + break; + } + } + + if (vid_found) + *vport = adapter->vports[i]; + else + err = -EINVAL; + +free_vc_msg: + kfree(vc_msg); + + return err; +} + +/** + * idpf_copy_data_to_vc_buf - Copy the virtchnl response data into the buffer. + * @adapter: driver specific private structure + * @vport: virtual port structure + * @ctlq_msg: msg to copy from + * @err_enum: err bit to set on error + * + * Copies the payload from ctlq_msg into virtchnl buffer. Returns 0 on success, + * negative on failure. + */ +static int idpf_copy_data_to_vc_buf(struct idpf_adapter *adapter, + struct idpf_vport *vport, + struct idpf_ctlq_msg *ctlq_msg, + enum idpf_vport_vc_state err_enum) +{ + if (ctlq_msg->cookie.mbx.chnl_retval) { + if (vport) + set_bit(err_enum, vport->vc_state); + else + set_bit(err_enum, adapter->vc_state); + + return -EINVAL; + } + + if (vport) + memcpy(vport->vc_msg, ctlq_msg->ctx.indirect.payload->va, + min_t(int, ctlq_msg->ctx.indirect.payload->size, + IDPF_CTLQ_MAX_BUF_LEN)); + else + memcpy(adapter->vc_msg, ctlq_msg->ctx.indirect.payload->va, + min_t(int, ctlq_msg->ctx.indirect.payload->size, + IDPF_CTLQ_MAX_BUF_LEN)); + + return 0; +} + +/** + * idpf_recv_vchnl_op - helper function with common logic when handling the + * reception of VIRTCHNL OPs. + * @adapter: driver specific private structure + * @vport: virtual port structure + * @ctlq_msg: msg to copy from + * @state: state bit used on timeout check + * @err_state: err bit to set on error + */ +static void idpf_recv_vchnl_op(struct idpf_adapter *adapter, + struct idpf_vport *vport, + struct idpf_ctlq_msg *ctlq_msg, + enum idpf_vport_vc_state state, + enum idpf_vport_vc_state err_state) +{ + wait_queue_head_t *vchnl_wq; + int err; + + if (vport) + vchnl_wq = &vport->vchnl_wq; + else + vchnl_wq = &adapter->vchnl_wq; + + err = idpf_copy_data_to_vc_buf(adapter, vport, ctlq_msg, err_state); + if (wq_has_sleeper(vchnl_wq)) { + if (vport) + set_bit(state, vport->vc_state); + else + set_bit(state, adapter->vc_state); + + wake_up(vchnl_wq); + } else { + if (!err) { + dev_warn(&adapter->pdev->dev, "opcode %d received without waiting thread\n", + ctlq_msg->cookie.mbx.chnl_opcode); + } else { + /* Clear the errors since there is no sleeper to pass + * them on + */ + if (vport) + clear_bit(err_state, vport->vc_state); + else + clear_bit(err_state, adapter->vc_state); + } + } +} + +/** + * idpf_recv_mb_msg - Receive message over mailbox + * @adapter: Driver specific private structure + * @op: virtchannel operation code + * @msg: Received message holding buffer + * @msg_size: message size + * + * Will receive control queue message and posts the receive buffer. Returns 0 + * on success and negative on failure. + */ +int idpf_recv_mb_msg(struct idpf_adapter *adapter, u32 op, + void *msg, int msg_size) +{ + struct idpf_vport *vport = NULL; + struct idpf_ctlq_msg ctlq_msg; + struct idpf_dma_mem *dma_mem; + bool work_done = false; + int num_retry = 2000; + u16 num_q_msg; + int err; + + while (1) { + struct idpf_vport_config *vport_config; + int payload_size = 0; + + /* Try to get one message */ + num_q_msg = 1; + dma_mem = NULL; + err = idpf_ctlq_recv(adapter->hw.arq, &num_q_msg, &ctlq_msg); + /* If no message then decide if we have to retry based on + * opcode + */ + if (err || !num_q_msg) { + /* Increasing num_retry to consider the delayed + * responses because of large number of VF's mailbox + * messages. If the mailbox message is received from + * the other side, we come out of the sleep cycle + * immediately else we wait for more time. + */ + if (!op || !num_retry--) + break; + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) { + err = -EIO; + break; + } + msleep(20); + continue; + } + + /* If we are here a message is received. Check if we are looking + * for a specific message based on opcode. If it is different + * ignore and post buffers + */ + if (op && ctlq_msg.cookie.mbx.chnl_opcode != op) + goto post_buffs; + + err = idpf_find_vport(adapter, &vport, &ctlq_msg); + if (err) + goto post_buffs; + + if (ctlq_msg.data_len) + payload_size = ctlq_msg.ctx.indirect.payload->size; + + /* All conditions are met. Either a message requested is + * received or we received a message to be processed + */ + switch (ctlq_msg.cookie.mbx.chnl_opcode) { + case VIRTCHNL2_OP_VERSION: + case VIRTCHNL2_OP_GET_CAPS: + if (ctlq_msg.cookie.mbx.chnl_retval) { + dev_err(&adapter->pdev->dev, "Failure initializing, vc op: %u retval: %u\n", + ctlq_msg.cookie.mbx.chnl_opcode, + ctlq_msg.cookie.mbx.chnl_retval); + err = -EBADMSG; + } else if (msg) { + memcpy(msg, ctlq_msg.ctx.indirect.payload->va, + min_t(int, payload_size, msg_size)); + } + work_done = true; + break; + case VIRTCHNL2_OP_CREATE_VPORT: + idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, + IDPF_VC_CREATE_VPORT, + IDPF_VC_CREATE_VPORT_ERR); + break; + case VIRTCHNL2_OP_ENABLE_VPORT: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_ENA_VPORT, + IDPF_VC_ENA_VPORT_ERR); + break; + case VIRTCHNL2_OP_DISABLE_VPORT: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_DIS_VPORT, + IDPF_VC_DIS_VPORT_ERR); + break; + case VIRTCHNL2_OP_DESTROY_VPORT: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_DESTROY_VPORT, + IDPF_VC_DESTROY_VPORT_ERR); + break; + case VIRTCHNL2_OP_CONFIG_TX_QUEUES: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_CONFIG_TXQ, + IDPF_VC_CONFIG_TXQ_ERR); + break; + case VIRTCHNL2_OP_CONFIG_RX_QUEUES: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_CONFIG_RXQ, + IDPF_VC_CONFIG_RXQ_ERR); + break; + case VIRTCHNL2_OP_ENABLE_QUEUES: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_ENA_QUEUES, + IDPF_VC_ENA_QUEUES_ERR); + break; + case VIRTCHNL2_OP_DISABLE_QUEUES: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_DIS_QUEUES, + IDPF_VC_DIS_QUEUES_ERR); + break; + case VIRTCHNL2_OP_ADD_QUEUES: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_ADD_QUEUES, + IDPF_VC_ADD_QUEUES_ERR); + break; + case VIRTCHNL2_OP_DEL_QUEUES: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_DEL_QUEUES, + IDPF_VC_DEL_QUEUES_ERR); + break; + case VIRTCHNL2_OP_MAP_QUEUE_VECTOR: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_MAP_IRQ, + IDPF_VC_MAP_IRQ_ERR); + break; + case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_UNMAP_IRQ, + IDPF_VC_UNMAP_IRQ_ERR); + break; + case VIRTCHNL2_OP_GET_STATS: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_GET_STATS, + IDPF_VC_GET_STATS_ERR); + break; + case VIRTCHNL2_OP_GET_RSS_LUT: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_GET_RSS_LUT, + IDPF_VC_GET_RSS_LUT_ERR); + break; + case VIRTCHNL2_OP_SET_RSS_LUT: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_SET_RSS_LUT, + IDPF_VC_SET_RSS_LUT_ERR); + break; + case VIRTCHNL2_OP_GET_RSS_KEY: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_GET_RSS_KEY, + IDPF_VC_GET_RSS_KEY_ERR); + break; + case VIRTCHNL2_OP_SET_RSS_KEY: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_SET_RSS_KEY, + IDPF_VC_SET_RSS_KEY_ERR); + break; + case VIRTCHNL2_OP_SET_SRIOV_VFS: + idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, + IDPF_VC_SET_SRIOV_VFS, + IDPF_VC_SET_SRIOV_VFS_ERR); + break; + case VIRTCHNL2_OP_ALLOC_VECTORS: + idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, + IDPF_VC_ALLOC_VECTORS, + IDPF_VC_ALLOC_VECTORS_ERR); + break; + case VIRTCHNL2_OP_DEALLOC_VECTORS: + idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, + IDPF_VC_DEALLOC_VECTORS, + IDPF_VC_DEALLOC_VECTORS_ERR); + break; + case VIRTCHNL2_OP_GET_PTYPE_INFO: + idpf_recv_vchnl_op(adapter, NULL, &ctlq_msg, + IDPF_VC_GET_PTYPE_INFO, + IDPF_VC_GET_PTYPE_INFO_ERR); + break; + case VIRTCHNL2_OP_LOOPBACK: + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_LOOPBACK_STATE, + IDPF_VC_LOOPBACK_STATE_ERR); + break; + case VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE: + /* This message can only be sent asynchronously. As + * such we'll have lost the context in which it was + * called and thus can only really report if it looks + * like an error occurred. Don't bother setting ERR bit + * or waking chnl_wq since no work queue will be waiting + * to read the message. + */ + if (ctlq_msg.cookie.mbx.chnl_retval) { + dev_err(&adapter->pdev->dev, "Failed to set promiscuous mode: %d\n", + ctlq_msg.cookie.mbx.chnl_retval); + } + break; + case VIRTCHNL2_OP_ADD_MAC_ADDR: + vport_config = adapter->vport_config[vport->idx]; + if (test_and_clear_bit(IDPF_VPORT_ADD_MAC_REQ, + vport_config->flags)) { + /* Message was sent asynchronously. We don't + * normally print errors here, instead + * prefer to handle errors in the function + * calling wait_for_event. However, if + * asynchronous, the context in which the + * message was sent is lost. We can't really do + * anything about at it this point, but we + * should at a minimum indicate that it looks + * like something went wrong. Also don't bother + * setting ERR bit or waking vchnl_wq since no + * one will be waiting to read the async + * message. + */ + if (ctlq_msg.cookie.mbx.chnl_retval) + dev_err(&adapter->pdev->dev, "Failed to add MAC address: %d\n", + ctlq_msg.cookie.mbx.chnl_retval); + break; + } + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_ADD_MAC_ADDR, + IDPF_VC_ADD_MAC_ADDR_ERR); + break; + case VIRTCHNL2_OP_DEL_MAC_ADDR: + vport_config = adapter->vport_config[vport->idx]; + if (test_and_clear_bit(IDPF_VPORT_DEL_MAC_REQ, + vport_config->flags)) { + /* Message was sent asynchronously like the + * VIRTCHNL2_OP_ADD_MAC_ADDR + */ + if (ctlq_msg.cookie.mbx.chnl_retval) + dev_err(&adapter->pdev->dev, "Failed to delete MAC address: %d\n", + ctlq_msg.cookie.mbx.chnl_retval); + break; + } + idpf_recv_vchnl_op(adapter, vport, &ctlq_msg, + IDPF_VC_DEL_MAC_ADDR, + IDPF_VC_DEL_MAC_ADDR_ERR); + break; + case VIRTCHNL2_OP_EVENT: + idpf_recv_event_msg(vport, &ctlq_msg); + break; + default: + dev_warn(&adapter->pdev->dev, + "Unhandled virtchnl response %d\n", + ctlq_msg.cookie.mbx.chnl_opcode); + break; + } + +post_buffs: + if (ctlq_msg.data_len) + dma_mem = ctlq_msg.ctx.indirect.payload; + else + num_q_msg = 0; + + err = idpf_ctlq_post_rx_buffs(&adapter->hw, adapter->hw.arq, + &num_q_msg, &dma_mem); + /* If post failed clear the only buffer we supplied */ + if (err && dma_mem) + dma_free_coherent(&adapter->pdev->dev, dma_mem->size, + dma_mem->va, dma_mem->pa); + + /* Applies only if we are looking for a specific opcode */ + if (work_done) + break; + } + + return err; +} + +/** + * __idpf_wait_for_event - wrapper function for wait on virtchannel response + * @adapter: Driver private data structure + * @vport: virtual port structure + * @state: check on state upon timeout + * @err_check: check if this specific error bit is set + * @timeout: Max time to wait + * + * Checks if state is set upon expiry of timeout. Returns 0 on success, + * negative on failure. + */ +static int __idpf_wait_for_event(struct idpf_adapter *adapter, + struct idpf_vport *vport, + enum idpf_vport_vc_state state, + enum idpf_vport_vc_state err_check, + int timeout) +{ + int time_to_wait, num_waits; + wait_queue_head_t *vchnl_wq; + unsigned long *vc_state; + + time_to_wait = ((timeout <= IDPF_MAX_WAIT) ? timeout : IDPF_MAX_WAIT); + num_waits = ((timeout <= IDPF_MAX_WAIT) ? 1 : timeout / IDPF_MAX_WAIT); + + if (vport) { + vchnl_wq = &vport->vchnl_wq; + vc_state = vport->vc_state; + } else { + vchnl_wq = &adapter->vchnl_wq; + vc_state = adapter->vc_state; + } + + while (num_waits) { + int event; + + /* If we are here and a reset is detected do not wait but + * return. Reset timing is out of drivers control. So + * while we are cleaning resources as part of reset if the + * underlying HW mailbox is gone, wait on mailbox messages + * is not meaningful + */ + if (idpf_is_reset_detected(adapter)) + return 0; + + event = wait_event_timeout(*vchnl_wq, + test_and_clear_bit(state, vc_state), + msecs_to_jiffies(time_to_wait)); + if (event) { + if (test_and_clear_bit(err_check, vc_state)) { + dev_err(&adapter->pdev->dev, "VC response error %s\n", + idpf_vport_vc_state_str[err_check]); + + return -EINVAL; + } + + return 0; + } + num_waits--; + } + + /* Timeout occurred */ + dev_err(&adapter->pdev->dev, "VC timeout, state = %s\n", + idpf_vport_vc_state_str[state]); + + return -ETIMEDOUT; +} + +/** + * idpf_min_wait_for_event - wait for virtchannel response + * @adapter: Driver private data structure + * @vport: virtual port structure + * @state: check on state upon timeout + * @err_check: check if this specific error bit is set + * + * Returns 0 on success, negative on failure. + */ +static int idpf_min_wait_for_event(struct idpf_adapter *adapter, + struct idpf_vport *vport, + enum idpf_vport_vc_state state, + enum idpf_vport_vc_state err_check) +{ + return __idpf_wait_for_event(adapter, vport, state, err_check, + IDPF_WAIT_FOR_EVENT_TIMEO_MIN); +} + +/** + * idpf_wait_for_event - wait for virtchannel response + * @adapter: Driver private data structure + * @vport: virtual port structure + * @state: check on state upon timeout after 500ms + * @err_check: check if this specific error bit is set + * + * Returns 0 on success, negative on failure. + */ +static int idpf_wait_for_event(struct idpf_adapter *adapter, + struct idpf_vport *vport, + enum idpf_vport_vc_state state, + enum idpf_vport_vc_state err_check) +{ + /* Increasing the timeout in __IDPF_INIT_SW flow to consider large + * number of VF's mailbox message responses. When a message is received + * on mailbox, this thread is woken up by the idpf_recv_mb_msg before + * the timeout expires. Only in the error case i.e. if no message is + * received on mailbox, we wait for the complete timeout which is + * less likely to happen. + */ + return __idpf_wait_for_event(adapter, vport, state, err_check, + IDPF_WAIT_FOR_EVENT_TIMEO); +} + +/** + * idpf_wait_for_marker_event - wait for software marker response + * @vport: virtual port data structure + * + * Returns 0 success, negative on failure. + **/ +static int idpf_wait_for_marker_event(struct idpf_vport *vport) +{ + int event; + int i; + + for (i = 0; i < vport->num_txq; i++) + set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags); + + event = wait_event_timeout(vport->sw_marker_wq, + test_and_clear_bit(IDPF_VPORT_SW_MARKER, + vport->flags), + msecs_to_jiffies(500)); + + for (i = 0; i < vport->num_txq; i++) + clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); + + if (event) + return 0; + + dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n"); + + return -ETIMEDOUT; +} + +/** + * idpf_send_ver_msg - send virtchnl version message + * @adapter: Driver specific private structure + * + * Send virtchnl version message. Returns 0 on success, negative on failure. + */ +static int idpf_send_ver_msg(struct idpf_adapter *adapter) +{ + struct virtchnl2_version_info vvi; + + if (adapter->virt_ver_maj) { + vvi.major = cpu_to_le32(adapter->virt_ver_maj); + vvi.minor = cpu_to_le32(adapter->virt_ver_min); + } else { + vvi.major = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MAJOR); + vvi.minor = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MINOR); + } + + return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_VERSION, sizeof(vvi), + (u8 *)&vvi); +} + +/** + * idpf_recv_ver_msg - Receive virtchnl version message + * @adapter: Driver specific private structure + * + * Receive virtchnl version message. Returns 0 on success, -EAGAIN if we need + * to send version message again, otherwise negative on failure. + */ +static int idpf_recv_ver_msg(struct idpf_adapter *adapter) +{ + struct virtchnl2_version_info vvi; + u32 major, minor; + int err; + + err = idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_VERSION, &vvi, + sizeof(vvi)); + if (err) + return err; + + major = le32_to_cpu(vvi.major); + minor = le32_to_cpu(vvi.minor); + + if (major > IDPF_VIRTCHNL_VERSION_MAJOR) { + dev_warn(&adapter->pdev->dev, + "Virtchnl major version (%d) greater than supported\n", + major); + + return -EINVAL; + } + + if (major == IDPF_VIRTCHNL_VERSION_MAJOR && + minor > IDPF_VIRTCHNL_VERSION_MINOR) + dev_warn(&adapter->pdev->dev, + "Virtchnl minor version (%d) didn't match\n", minor); + + /* If we have a mismatch, resend version to update receiver on what + * version we will use. + */ + if (!adapter->virt_ver_maj && + major != IDPF_VIRTCHNL_VERSION_MAJOR && + minor != IDPF_VIRTCHNL_VERSION_MINOR) + err = -EAGAIN; + + adapter->virt_ver_maj = major; + adapter->virt_ver_min = minor; + + return err; +} + +/** + * idpf_send_get_caps_msg - Send virtchnl get capabilities message + * @adapter: Driver specific private structure + * + * Send virtchl get capabilities message. Returns 0 on success, negative on + * failure. + */ +static int idpf_send_get_caps_msg(struct idpf_adapter *adapter) +{ + struct virtchnl2_get_capabilities caps = { }; + + caps.csum_caps = + cpu_to_le32(VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP | + VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP | + VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL | + VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL | + VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL | + VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL | + VIRTCHNL2_CAP_RX_CSUM_GENERIC); + + caps.seg_caps = + cpu_to_le32(VIRTCHNL2_CAP_SEG_IPV4_TCP | + VIRTCHNL2_CAP_SEG_IPV4_UDP | + VIRTCHNL2_CAP_SEG_IPV4_SCTP | + VIRTCHNL2_CAP_SEG_IPV6_TCP | + VIRTCHNL2_CAP_SEG_IPV6_UDP | + VIRTCHNL2_CAP_SEG_IPV6_SCTP | + VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL); + + caps.rss_caps = + cpu_to_le64(VIRTCHNL2_CAP_RSS_IPV4_TCP | + VIRTCHNL2_CAP_RSS_IPV4_UDP | + VIRTCHNL2_CAP_RSS_IPV4_SCTP | + VIRTCHNL2_CAP_RSS_IPV4_OTHER | + VIRTCHNL2_CAP_RSS_IPV6_TCP | + VIRTCHNL2_CAP_RSS_IPV6_UDP | + VIRTCHNL2_CAP_RSS_IPV6_SCTP | + VIRTCHNL2_CAP_RSS_IPV6_OTHER); + + caps.hsplit_caps = + cpu_to_le32(VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 | + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6); + + caps.rsc_caps = + cpu_to_le32(VIRTCHNL2_CAP_RSC_IPV4_TCP | + VIRTCHNL2_CAP_RSC_IPV6_TCP); + + caps.other_caps = + cpu_to_le64(VIRTCHNL2_CAP_SRIOV | + VIRTCHNL2_CAP_MACFILTER | + VIRTCHNL2_CAP_SPLITQ_QSCHED | + VIRTCHNL2_CAP_PROMISC | + VIRTCHNL2_CAP_LOOPBACK); + + return idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, sizeof(caps), + (u8 *)&caps); +} + +/** + * idpf_recv_get_caps_msg - Receive virtchnl get capabilities message + * @adapter: Driver specific private structure + * + * Receive virtchnl get capabilities message. Returns 0 on success, negative on + * failure. + */ +static int idpf_recv_get_caps_msg(struct idpf_adapter *adapter) +{ + return idpf_recv_mb_msg(adapter, VIRTCHNL2_OP_GET_CAPS, &adapter->caps, + sizeof(struct virtchnl2_get_capabilities)); +} + +/** + * idpf_vport_alloc_max_qs - Allocate max queues for a vport + * @adapter: Driver specific private structure + * @max_q: vport max queue structure + */ +int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues; + struct virtchnl2_get_capabilities *caps = &adapter->caps; + u16 default_vports = idpf_get_default_vports(adapter); + int max_rx_q, max_tx_q; + + mutex_lock(&adapter->queue_lock); + + max_rx_q = le16_to_cpu(caps->max_rx_q) / default_vports; + max_tx_q = le16_to_cpu(caps->max_tx_q) / default_vports; + if (adapter->num_alloc_vports < default_vports) { + max_q->max_rxq = min_t(u16, max_rx_q, IDPF_MAX_Q); + max_q->max_txq = min_t(u16, max_tx_q, IDPF_MAX_Q); + } else { + max_q->max_rxq = IDPF_MIN_Q; + max_q->max_txq = IDPF_MIN_Q; + } + max_q->max_bufq = max_q->max_rxq * IDPF_MAX_BUFQS_PER_RXQ_GRP; + max_q->max_complq = max_q->max_txq; + + if (avail_queues->avail_rxq < max_q->max_rxq || + avail_queues->avail_txq < max_q->max_txq || + avail_queues->avail_bufq < max_q->max_bufq || + avail_queues->avail_complq < max_q->max_complq) { + mutex_unlock(&adapter->queue_lock); + + return -EINVAL; + } + + avail_queues->avail_rxq -= max_q->max_rxq; + avail_queues->avail_txq -= max_q->max_txq; + avail_queues->avail_bufq -= max_q->max_bufq; + avail_queues->avail_complq -= max_q->max_complq; + + mutex_unlock(&adapter->queue_lock); + + return 0; +} + +/** + * idpf_vport_dealloc_max_qs - Deallocate max queues of a vport + * @adapter: Driver specific private structure + * @max_q: vport max queue structure + */ +void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct idpf_avail_queue_info *avail_queues; + + mutex_lock(&adapter->queue_lock); + avail_queues = &adapter->avail_queues; + + avail_queues->avail_rxq += max_q->max_rxq; + avail_queues->avail_txq += max_q->max_txq; + avail_queues->avail_bufq += max_q->max_bufq; + avail_queues->avail_complq += max_q->max_complq; + + mutex_unlock(&adapter->queue_lock); +} + +/** + * idpf_init_avail_queues - Initialize available queues on the device + * @adapter: Driver specific private structure + */ +static void idpf_init_avail_queues(struct idpf_adapter *adapter) +{ + struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues; + struct virtchnl2_get_capabilities *caps = &adapter->caps; + + avail_queues->avail_rxq = le16_to_cpu(caps->max_rx_q); + avail_queues->avail_txq = le16_to_cpu(caps->max_tx_q); + avail_queues->avail_bufq = le16_to_cpu(caps->max_rx_bufq); + avail_queues->avail_complq = le16_to_cpu(caps->max_tx_complq); +} + +/** + * idpf_get_reg_intr_vecs - Get vector queue register offset + * @vport: virtual port structure + * @reg_vals: Register offsets to store in + * + * Returns number of registers that got populated + */ +int idpf_get_reg_intr_vecs(struct idpf_vport *vport, + struct idpf_vec_regs *reg_vals) +{ + struct virtchnl2_vector_chunks *chunks; + struct idpf_vec_regs reg_val; + u16 num_vchunks, num_vec; + int num_regs = 0, i, j; + + chunks = &vport->adapter->req_vec_chunks->vchunks; + num_vchunks = le16_to_cpu(chunks->num_vchunks); + + for (j = 0; j < num_vchunks; j++) { + struct virtchnl2_vector_chunk *chunk; + u32 dynctl_reg_spacing; + u32 itrn_reg_spacing; + + chunk = &chunks->vchunks[j]; + num_vec = le16_to_cpu(chunk->num_vectors); + reg_val.dyn_ctl_reg = le32_to_cpu(chunk->dynctl_reg_start); + reg_val.itrn_reg = le32_to_cpu(chunk->itrn_reg_start); + reg_val.itrn_index_spacing = le32_to_cpu(chunk->itrn_index_spacing); + + dynctl_reg_spacing = le32_to_cpu(chunk->dynctl_reg_spacing); + itrn_reg_spacing = le32_to_cpu(chunk->itrn_reg_spacing); + + for (i = 0; i < num_vec; i++) { + reg_vals[num_regs].dyn_ctl_reg = reg_val.dyn_ctl_reg; + reg_vals[num_regs].itrn_reg = reg_val.itrn_reg; + reg_vals[num_regs].itrn_index_spacing = + reg_val.itrn_index_spacing; + + reg_val.dyn_ctl_reg += dynctl_reg_spacing; + reg_val.itrn_reg += itrn_reg_spacing; + num_regs++; + } + } + + return num_regs; +} + +/** + * idpf_vport_get_q_reg - Get the queue registers for the vport + * @reg_vals: register values needing to be set + * @num_regs: amount we expect to fill + * @q_type: queue model + * @chunks: queue regs received over mailbox + * + * This function parses the queue register offsets from the queue register + * chunk information, with a specific queue type and stores it into the array + * passed as an argument. It returns the actual number of queue registers that + * are filled. + */ +static int idpf_vport_get_q_reg(u32 *reg_vals, int num_regs, u32 q_type, + struct virtchnl2_queue_reg_chunks *chunks) +{ + u16 num_chunks = le16_to_cpu(chunks->num_chunks); + int reg_filled = 0, i; + u32 reg_val; + + while (num_chunks--) { + struct virtchnl2_queue_reg_chunk *chunk; + u16 num_q; + + chunk = &chunks->chunks[num_chunks]; + if (le32_to_cpu(chunk->type) != q_type) + continue; + + num_q = le32_to_cpu(chunk->num_queues); + reg_val = le64_to_cpu(chunk->qtail_reg_start); + for (i = 0; i < num_q && reg_filled < num_regs ; i++) { + reg_vals[reg_filled++] = reg_val; + reg_val += le32_to_cpu(chunk->qtail_reg_spacing); + } + } + + return reg_filled; +} + +/** + * __idpf_queue_reg_init - initialize queue registers + * @vport: virtual port structure + * @reg_vals: registers we are initializing + * @num_regs: how many registers there are in total + * @q_type: queue model + * + * Return number of queues that are initialized + */ +static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + int num_regs, u32 q_type) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { + case VIRTCHNL2_QUEUE_TYPE_TX: + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq && k < num_regs; j++, k++) + tx_qgrp->txqs[j]->tail = + idpf_get_reg_addr(adapter, reg_vals[k]); + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_regs; j++, k++) { + q = rx_qgrp->singleq.rxqs[j]; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); + } + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_regs; j++, k++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); + } + } + break; + default: + break; + } + + return k; +} + +/** + * idpf_queue_reg_init - initialize queue registers + * @vport: virtual port structure + * + * Return 0 on success, negative on failure + */ +int idpf_queue_reg_init(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport *vport_params; + struct virtchnl2_queue_reg_chunks *chunks; + struct idpf_vport_config *vport_config; + u16 vport_idx = vport->idx; + int num_regs, ret = 0; + u32 *reg_vals; + + /* We may never deal with more than 256 same type of queues */ + reg_vals = kzalloc(sizeof(void *) * IDPF_LARGE_MAX_Q, GFP_KERNEL); + if (!reg_vals) + return -ENOMEM; + + vport_config = vport->adapter->vport_config[vport_idx]; + if (vport_config->req_qs_chunks) { + struct virtchnl2_add_queues *vc_aq = + (struct virtchnl2_add_queues *)vport_config->req_qs_chunks; + chunks = &vc_aq->chunks; + } else { + vport_params = vport->adapter->vport_params_recvd[vport_idx]; + chunks = &vport_params->chunks; + } + + /* Initialize Tx queue tail register address */ + num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q, + VIRTCHNL2_QUEUE_TYPE_TX, + chunks); + if (num_regs < vport->num_txq) { + ret = -EINVAL; + goto free_reg_vals; + } + + num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs, + VIRTCHNL2_QUEUE_TYPE_TX); + if (num_regs < vport->num_txq) { + ret = -EINVAL; + goto free_reg_vals; + } + + /* Initialize Rx/buffer queue tail register address based on Rx queue + * model + */ + if (idpf_is_queue_model_split(vport->rxq_model)) { + num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q, + VIRTCHNL2_QUEUE_TYPE_RX_BUFFER, + chunks); + if (num_regs < vport->num_bufq) { + ret = -EINVAL; + goto free_reg_vals; + } + + num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs, + VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + if (num_regs < vport->num_bufq) { + ret = -EINVAL; + goto free_reg_vals; + } + } else { + num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q, + VIRTCHNL2_QUEUE_TYPE_RX, + chunks); + if (num_regs < vport->num_rxq) { + ret = -EINVAL; + goto free_reg_vals; + } + + num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs, + VIRTCHNL2_QUEUE_TYPE_RX); + if (num_regs < vport->num_rxq) { + ret = -EINVAL; + goto free_reg_vals; + } + } + +free_reg_vals: + kfree(reg_vals); + + return ret; +} + +/** + * idpf_send_create_vport_msg - Send virtchnl create vport message + * @adapter: Driver specific private structure + * @max_q: vport max queue info + * + * send virtchnl creae vport message + * + * Returns 0 on success, negative on failure + */ +int idpf_send_create_vport_msg(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct virtchnl2_create_vport *vport_msg; + u16 idx = adapter->next_vport; + int err, buf_size; + + buf_size = sizeof(struct virtchnl2_create_vport); + if (!adapter->vport_params_reqd[idx]) { + adapter->vport_params_reqd[idx] = kzalloc(buf_size, + GFP_KERNEL); + if (!adapter->vport_params_reqd[idx]) + return -ENOMEM; + } + + vport_msg = adapter->vport_params_reqd[idx]; + vport_msg->vport_type = cpu_to_le16(VIRTCHNL2_VPORT_TYPE_DEFAULT); + vport_msg->vport_index = cpu_to_le16(idx); + + if (adapter->req_tx_splitq) + vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); + else + vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); + + if (adapter->req_rx_splitq) + vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); + else + vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); + + err = idpf_vport_calc_total_qs(adapter, idx, vport_msg, max_q); + if (err) { + dev_err(&adapter->pdev->dev, "Enough queues are not available"); + + return err; + } + + mutex_lock(&adapter->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CREATE_VPORT, buf_size, + (u8 *)vport_msg); + if (err) + goto rel_lock; + + err = idpf_wait_for_event(adapter, NULL, IDPF_VC_CREATE_VPORT, + IDPF_VC_CREATE_VPORT_ERR); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to receive create vport message"); + + goto rel_lock; + } + + if (!adapter->vport_params_recvd[idx]) { + adapter->vport_params_recvd[idx] = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, + GFP_KERNEL); + if (!adapter->vport_params_recvd[idx]) { + err = -ENOMEM; + goto rel_lock; + } + } + + vport_msg = adapter->vport_params_recvd[idx]; + memcpy(vport_msg, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN); + +rel_lock: + mutex_unlock(&adapter->vc_buf_lock); + + return err; +} + +/** + * idpf_check_supported_desc_ids - Verify we have required descriptor support + * @vport: virtual port structure + * + * Return 0 on success, error on failure + */ +int idpf_check_supported_desc_ids(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_create_vport *vport_msg; + u64 rx_desc_ids, tx_desc_ids; + + vport_msg = adapter->vport_params_recvd[vport->idx]; + + rx_desc_ids = le64_to_cpu(vport_msg->rx_desc_ids); + tx_desc_ids = le64_to_cpu(vport_msg->tx_desc_ids); + + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M)) { + dev_info(&adapter->pdev->dev, "Minimum RX descriptor support not provided, using the default\n"); + vport_msg->rx_desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + } + } else { + if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M)) + vport->base_rxd = true; + } + + if (vport->txq_model != VIRTCHNL2_QUEUE_MODEL_SPLIT) + return 0; + + if ((tx_desc_ids & MIN_SUPPORT_TXDID) != MIN_SUPPORT_TXDID) { + dev_info(&adapter->pdev->dev, "Minimum TX descriptor support not provided, using the default\n"); + vport_msg->tx_desc_ids = cpu_to_le64(MIN_SUPPORT_TXDID); + } + + return 0; +} + +/** + * idpf_send_destroy_vport_msg - Send virtchnl destroy vport message + * @vport: virtual port data structure + * + * Send virtchnl destroy vport message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_destroy_vport_msg(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_vport v_id; + int err; + + v_id.vport_id = cpu_to_le32(vport->vport_id); + + mutex_lock(&vport->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DESTROY_VPORT, + sizeof(v_id), (u8 *)&v_id); + if (err) + goto rel_lock; + + err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DESTROY_VPORT, + IDPF_VC_DESTROY_VPORT_ERR); + +rel_lock: + mutex_unlock(&vport->vc_buf_lock); + + return err; +} + +/** + * idpf_send_enable_vport_msg - Send virtchnl enable vport message + * @vport: virtual port data structure + * + * Send enable vport virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_enable_vport_msg(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_vport v_id; + int err; + + v_id.vport_id = cpu_to_le32(vport->vport_id); + + mutex_lock(&vport->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ENABLE_VPORT, + sizeof(v_id), (u8 *)&v_id); + if (err) + goto rel_lock; + + err = idpf_wait_for_event(adapter, vport, IDPF_VC_ENA_VPORT, + IDPF_VC_ENA_VPORT_ERR); + +rel_lock: + mutex_unlock(&vport->vc_buf_lock); + + return err; +} + +/** + * idpf_send_disable_vport_msg - Send virtchnl disable vport message + * @vport: virtual port data structure + * + * Send disable vport virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_disable_vport_msg(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_vport v_id; + int err; + + v_id.vport_id = cpu_to_le32(vport->vport_id); + + mutex_lock(&vport->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DISABLE_VPORT, + sizeof(v_id), (u8 *)&v_id); + if (err) + goto rel_lock; + + err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DIS_VPORT, + IDPF_VC_DIS_VPORT_ERR); + +rel_lock: + mutex_unlock(&vport->vc_buf_lock); + + return err; +} + +/** + * idpf_send_config_tx_queues_msg - Send virtchnl config tx queues message + * @vport: virtual port data structure + * + * Send config tx queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) +{ + struct virtchnl2_config_tx_queues *ctq; + u32 config_sz, chunk_sz, buf_sz; + int totqs, num_msgs, num_chunks; + struct virtchnl2_txq_info *qi; + int err = 0, i, k = 0; + + totqs = vport->num_txq + vport->num_complq; + qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL); + if (!qi) + return -ENOMEM; + + /* Populate the queue info buffer with all queue context info */ + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + int j; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { + qi[k].queue_id = + cpu_to_le32(tx_qgrp->txqs[j]->q_id); + qi[k].model = + cpu_to_le16(vport->txq_model); + qi[k].type = + cpu_to_le32(tx_qgrp->txqs[j]->q_type); + qi[k].ring_len = + cpu_to_le16(tx_qgrp->txqs[j]->desc_count); + qi[k].dma_ring_addr = + cpu_to_le64(tx_qgrp->txqs[j]->dma); + if (idpf_is_queue_model_split(vport->txq_model)) { + struct idpf_queue *q = tx_qgrp->txqs[j]; + + qi[k].tx_compl_queue_id = + cpu_to_le16(tx_qgrp->complq->q_id); + qi[k].relative_queue_id = cpu_to_le16(j); + + if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags)) + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW); + else + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); + } else { + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); + } + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qi[k].model = cpu_to_le16(vport->txq_model); + qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type); + qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma); + + k++; + } + + /* Make sure accounting agrees */ + if (k != totqs) { + err = -EINVAL; + goto error; + } + + /* Chunk up the queue contexts into multiple messages to avoid + * sending a control queue message buffer that is too large + */ + config_sz = sizeof(struct virtchnl2_config_tx_queues); + chunk_sz = sizeof(struct virtchnl2_txq_info); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + totqs); + num_msgs = DIV_ROUND_UP(totqs, num_chunks); + + buf_sz = struct_size(ctq, qinfo, num_chunks); + ctq = kzalloc(buf_sz, GFP_KERNEL); + if (!ctq) { + err = -ENOMEM; + goto error; + } + + mutex_lock(&vport->vc_buf_lock); + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(ctq, 0, buf_sz); + ctq->vport_id = cpu_to_le32(vport->vport_id); + ctq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks); + + err = idpf_send_mb_msg(vport->adapter, + VIRTCHNL2_OP_CONFIG_TX_QUEUES, + buf_sz, (u8 *)ctq); + if (err) + goto mbx_error; + + err = idpf_wait_for_event(vport->adapter, vport, + IDPF_VC_CONFIG_TXQ, + IDPF_VC_CONFIG_TXQ_ERR); + if (err) + goto mbx_error; + + k += num_chunks; + totqs -= num_chunks; + num_chunks = min(num_chunks, totqs); + /* Recalculate buffer size */ + buf_sz = struct_size(ctq, qinfo, num_chunks); + } + +mbx_error: + mutex_unlock(&vport->vc_buf_lock); + kfree(ctq); +error: + kfree(qi); + + return err; +} + +/** + * idpf_send_config_rx_queues_msg - Send virtchnl config rx queues message + * @vport: virtual port data structure + * + * Send config rx queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) +{ + struct virtchnl2_config_rx_queues *crq; + u32 config_sz, chunk_sz, buf_sz; + int totqs, num_msgs, num_chunks; + struct virtchnl2_rxq_info *qi; + int err = 0, i, k = 0; + + totqs = vport->num_rxq + vport->num_bufq; + qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL); + if (!qi) + return -ENOMEM; + + /* Populate the queue info buffer with all queue context info */ + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + int j; + + if (!idpf_is_queue_model_split(vport->rxq_model)) + goto setup_rxqs; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { + struct idpf_queue *bufq = + &rx_qgrp->splitq.bufq_sets[j].bufq; + + qi[k].queue_id = cpu_to_le32(bufq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); + qi[k].type = cpu_to_le32(bufq->q_type); + qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + qi[k].ring_len = cpu_to_le16(bufq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(bufq->dma); + qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size); + qi[k].buffer_notif_stride = bufq->rx_buf_stride; + qi[k].rx_buffer_low_watermark = + cpu_to_le16(bufq->rx_buffer_low_watermark); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + } + +setup_rxqs: + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { + struct idpf_queue *rxq; + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + rxq = rx_qgrp->singleq.rxqs[j]; + goto common_qi_fields; + } + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + qi[k].rx_bufq1_id = + cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id); + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { + qi[k].bufq2_ena = IDPF_BUFQ2_ENA; + qi[k].rx_bufq2_id = + cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id); + } + qi[k].rx_buffer_low_watermark = + cpu_to_le16(rxq->rx_buffer_low_watermark); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + +common_qi_fields: + if (rxq->rx_hsplit_en) { + qi[k].qflags |= + cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi[k].hdr_buffer_size = + cpu_to_le16(rxq->rx_hbuf_size); + } + qi[k].queue_id = cpu_to_le32(rxq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); + qi[k].type = cpu_to_le32(rxq->q_type); + qi[k].ring_len = cpu_to_le16(rxq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(rxq->dma); + qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size); + qi[k].data_buffer_size = cpu_to_le32(rxq->rx_buf_size); + qi[k].qflags |= + cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE); + qi[k].desc_ids = cpu_to_le64(rxq->rxdids); + } + } + + /* Make sure accounting agrees */ + if (k != totqs) { + err = -EINVAL; + goto error; + } + + /* Chunk up the queue contexts into multiple messages to avoid + * sending a control queue message buffer that is too large + */ + config_sz = sizeof(struct virtchnl2_config_rx_queues); + chunk_sz = sizeof(struct virtchnl2_rxq_info); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + totqs); + num_msgs = DIV_ROUND_UP(totqs, num_chunks); + + buf_sz = struct_size(crq, qinfo, num_chunks); + crq = kzalloc(buf_sz, GFP_KERNEL); + if (!crq) { + err = -ENOMEM; + goto error; + } + + mutex_lock(&vport->vc_buf_lock); + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(crq, 0, buf_sz); + crq->vport_id = cpu_to_le32(vport->vport_id); + crq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks); + + err = idpf_send_mb_msg(vport->adapter, + VIRTCHNL2_OP_CONFIG_RX_QUEUES, + buf_sz, (u8 *)crq); + if (err) + goto mbx_error; + + err = idpf_wait_for_event(vport->adapter, vport, + IDPF_VC_CONFIG_RXQ, + IDPF_VC_CONFIG_RXQ_ERR); + if (err) + goto mbx_error; + + k += num_chunks; + totqs -= num_chunks; + num_chunks = min(num_chunks, totqs); + /* Recalculate buffer size */ + buf_sz = struct_size(crq, qinfo, num_chunks); + } + +mbx_error: + mutex_unlock(&vport->vc_buf_lock); + kfree(crq); +error: + kfree(qi); + + return err; +} + +/** + * idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable + * queues message + * @vport: virtual port data structure + * @vc_op: virtchnl op code to send + * + * Send enable or disable queues virtchnl message. Returns 0 on success, + * negative on failure. + */ +static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, u32 vc_op) +{ + u32 num_msgs, num_chunks, num_txq, num_rxq, num_q; + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_del_ena_dis_queues *eq; + struct virtchnl2_queue_chunks *qcs; + struct virtchnl2_queue_chunk *qc; + u32 config_sz, chunk_sz, buf_sz; + int i, j, k = 0, err = 0; + + /* validate virtchnl op */ + switch (vc_op) { + case VIRTCHNL2_OP_ENABLE_QUEUES: + case VIRTCHNL2_OP_DISABLE_QUEUES: + break; + default: + return -EINVAL; + } + + num_txq = vport->num_txq + vport->num_complq; + num_rxq = vport->num_rxq + vport->num_bufq; + num_q = num_txq + num_rxq; + buf_sz = sizeof(struct virtchnl2_queue_chunk) * num_q; + qc = kzalloc(buf_sz, GFP_KERNEL); + if (!qc) + return -ENOMEM; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { + qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + } + if (vport->num_txq != k) { + err = -EINVAL; + goto error; + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + goto setup_rx; + + for (i = 0; i < vport->num_txq_grp; i++, k++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + if (vport->num_complq != (k - vport->num_txq)) { + err = -EINVAL; + goto error; + } + +setup_rx: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { + if (idpf_is_queue_model_split(vport->rxq_model)) { + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id); + qc[k].type = + cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type); + } else { + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id); + qc[k].type = + cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type); + } + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + } + if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) { + err = -EINVAL; + goto error; + } + + if (!idpf_is_queue_model_split(vport->rxq_model)) + goto send_msg; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { + struct idpf_queue *q; + + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + qc[k].type = cpu_to_le32(q->q_type); + qc[k].start_queue_id = cpu_to_le32(q->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + } + if (vport->num_bufq != k - (vport->num_txq + + vport->num_complq + + vport->num_rxq)) { + err = -EINVAL; + goto error; + } + +send_msg: + /* Chunk up the queue info into multiple messages */ + config_sz = sizeof(struct virtchnl2_del_ena_dis_queues); + chunk_sz = sizeof(struct virtchnl2_queue_chunk); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + num_q); + num_msgs = DIV_ROUND_UP(num_q, num_chunks); + + buf_sz = struct_size(eq, chunks.chunks, num_chunks); + eq = kzalloc(buf_sz, GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto error; + } + + mutex_lock(&vport->vc_buf_lock); + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(eq, 0, buf_sz); + eq->vport_id = cpu_to_le32(vport->vport_id); + eq->chunks.num_chunks = cpu_to_le16(num_chunks); + qcs = &eq->chunks; + memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks); + + err = idpf_send_mb_msg(adapter, vc_op, buf_sz, (u8 *)eq); + if (err) + goto mbx_error; + + if (vc_op == VIRTCHNL2_OP_ENABLE_QUEUES) + err = idpf_wait_for_event(adapter, vport, + IDPF_VC_ENA_QUEUES, + IDPF_VC_ENA_QUEUES_ERR); + else + err = idpf_min_wait_for_event(adapter, vport, + IDPF_VC_DIS_QUEUES, + IDPF_VC_DIS_QUEUES_ERR); + if (err) + goto mbx_error; + + k += num_chunks; + num_q -= num_chunks; + num_chunks = min(num_chunks, num_q); + /* Recalculate buffer size */ + buf_sz = struct_size(eq, chunks.chunks, num_chunks); + } + +mbx_error: + mutex_unlock(&vport->vc_buf_lock); + kfree(eq); +error: + kfree(qc); + + return err; +} + +/** + * idpf_send_map_unmap_queue_vector_msg - Send virtchnl map or unmap queue + * vector message + * @vport: virtual port data structure + * @map: true for map and false for unmap + * + * Send map or unmap queue vector virtchnl message. Returns 0 on success, + * negative on failure. + */ +int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_queue_vector_maps *vqvm; + struct virtchnl2_queue_vector *vqv; + u32 config_sz, chunk_sz, buf_sz; + u32 num_msgs, num_chunks, num_q; + int i, j, k = 0, err = 0; + + num_q = vport->num_txq + vport->num_rxq; + + buf_sz = sizeof(struct virtchnl2_queue_vector) * num_q; + vqv = kzalloc(buf_sz, GFP_KERNEL); + if (!vqv) + return -ENOMEM; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { + vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); + vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + + if (idpf_is_queue_model_split(vport->txq_model)) { + vqv[k].vector_id = + cpu_to_le16(tx_qgrp->complq->q_vector->v_idx); + vqv[k].itr_idx = + cpu_to_le32(tx_qgrp->complq->q_vector->tx_itr_idx); + } else { + vqv[k].vector_id = + cpu_to_le16(tx_qgrp->txqs[j]->q_vector->v_idx); + vqv[k].itr_idx = + cpu_to_le32(tx_qgrp->txqs[j]->q_vector->tx_itr_idx); + } + } + } + + if (vport->num_txq != k) { + err = -EINVAL; + goto error; + } + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { + struct idpf_queue *rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + rxq = rx_qgrp->singleq.rxqs[j]; + + vqv[k].queue_type = cpu_to_le32(rxq->q_type); + vqv[k].queue_id = cpu_to_le32(rxq->q_id); + vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx); + vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx); + } + } + + if (idpf_is_queue_model_split(vport->txq_model)) { + if (vport->num_rxq != k - vport->num_complq) { + err = -EINVAL; + goto error; + } + } else { + if (vport->num_rxq != k - vport->num_txq) { + err = -EINVAL; + goto error; + } + } + + /* Chunk up the vector info into multiple messages */ + config_sz = sizeof(struct virtchnl2_queue_vector_maps); + chunk_sz = sizeof(struct virtchnl2_queue_vector); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + num_q); + num_msgs = DIV_ROUND_UP(num_q, num_chunks); + + buf_sz = struct_size(vqvm, qv_maps, num_chunks); + vqvm = kzalloc(buf_sz, GFP_KERNEL); + if (!vqvm) { + err = -ENOMEM; + goto error; + } + + mutex_lock(&vport->vc_buf_lock); + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(vqvm, 0, buf_sz); + vqvm->vport_id = cpu_to_le32(vport->vport_id); + vqvm->num_qv_maps = cpu_to_le16(num_chunks); + memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks); + + if (map) { + err = idpf_send_mb_msg(adapter, + VIRTCHNL2_OP_MAP_QUEUE_VECTOR, + buf_sz, (u8 *)vqvm); + if (!err) + err = idpf_wait_for_event(adapter, vport, + IDPF_VC_MAP_IRQ, + IDPF_VC_MAP_IRQ_ERR); + } else { + err = idpf_send_mb_msg(adapter, + VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR, + buf_sz, (u8 *)vqvm); + if (!err) + err = + idpf_min_wait_for_event(adapter, vport, + IDPF_VC_UNMAP_IRQ, + IDPF_VC_UNMAP_IRQ_ERR); + } + if (err) + goto mbx_error; + + k += num_chunks; + num_q -= num_chunks; + num_chunks = min(num_chunks, num_q); + /* Recalculate buffer size */ + buf_sz = struct_size(vqvm, qv_maps, num_chunks); + } + +mbx_error: + mutex_unlock(&vport->vc_buf_lock); + kfree(vqvm); +error: + kfree(vqv); + + return err; +} + +/** + * idpf_send_enable_queues_msg - send enable queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send enable queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_enable_queues_msg(struct idpf_vport *vport) +{ + return idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_ENABLE_QUEUES); +} + +/** + * idpf_send_disable_queues_msg - send disable queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send disable queues virtchnl message. Returns 0 on success, negative + * on failure. + */ +int idpf_send_disable_queues_msg(struct idpf_vport *vport) +{ + int err, i; + + err = idpf_send_ena_dis_queues_msg(vport, VIRTCHNL2_OP_DISABLE_QUEUES); + if (err) + return err; + + /* switch to poll mode as interrupts will be disabled after disable + * queues virtchnl message is sent + */ + for (i = 0; i < vport->num_txq; i++) + set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); + + /* schedule the napi to receive all the marker packets */ + for (i = 0; i < vport->num_q_vectors; i++) + napi_schedule(&vport->q_vectors[i].napi); + + return idpf_wait_for_marker_event(vport); +} + +/** + * idpf_convert_reg_to_queue_chunks - Copy queue chunk information to the right + * structure + * @dchunks: Destination chunks to store data to + * @schunks: Source chunks to copy data from + * @num_chunks: number of chunks to copy + */ +static void idpf_convert_reg_to_queue_chunks(struct virtchnl2_queue_chunk *dchunks, + struct virtchnl2_queue_reg_chunk *schunks, + u16 num_chunks) +{ + u16 i; + + for (i = 0; i < num_chunks; i++) { + dchunks[i].type = schunks[i].type; + dchunks[i].start_queue_id = schunks[i].start_queue_id; + dchunks[i].num_queues = schunks[i].num_queues; + } +} + +/** + * idpf_send_delete_queues_msg - send delete queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send delete queues virtchnl message. Return 0 on success, negative on + * failure. + */ +int idpf_send_delete_queues_msg(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_create_vport *vport_params; + struct virtchnl2_queue_reg_chunks *chunks; + struct virtchnl2_del_ena_dis_queues *eq; + struct idpf_vport_config *vport_config; + u16 vport_idx = vport->idx; + int buf_size, err; + u16 num_chunks; + + vport_config = adapter->vport_config[vport_idx]; + if (vport_config->req_qs_chunks) { + struct virtchnl2_add_queues *vc_aq = + (struct virtchnl2_add_queues *)vport_config->req_qs_chunks; + chunks = &vc_aq->chunks; + } else { + vport_params = adapter->vport_params_recvd[vport_idx]; + chunks = &vport_params->chunks; + } + + num_chunks = le16_to_cpu(chunks->num_chunks); + buf_size = struct_size(eq, chunks.chunks, num_chunks); + + eq = kzalloc(buf_size, GFP_KERNEL); + if (!eq) + return -ENOMEM; + + eq->vport_id = cpu_to_le32(vport->vport_id); + eq->chunks.num_chunks = cpu_to_le16(num_chunks); + + idpf_convert_reg_to_queue_chunks(eq->chunks.chunks, chunks->chunks, + num_chunks); + + mutex_lock(&vport->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEL_QUEUES, + buf_size, (u8 *)eq); + if (err) + goto rel_lock; + + err = idpf_min_wait_for_event(adapter, vport, IDPF_VC_DEL_QUEUES, + IDPF_VC_DEL_QUEUES_ERR); + +rel_lock: + mutex_unlock(&vport->vc_buf_lock); + kfree(eq); + + return err; +} + +/** + * idpf_send_config_queues_msg - Send config queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send config queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_config_queues_msg(struct idpf_vport *vport) +{ + int err; + + err = idpf_send_config_tx_queues_msg(vport); + if (err) + return err; + + return idpf_send_config_rx_queues_msg(vport); +} + +/** + * idpf_send_add_queues_msg - Send virtchnl add queues message + * @vport: Virtual port private data structure + * @num_tx_q: number of transmit queues + * @num_complq: number of transmit completion queues + * @num_rx_q: number of receive queues + * @num_rx_bufq: number of receive buffer queues + * + * Returns 0 on success, negative on failure. vport _MUST_ be const here as + * we should not change any fields within vport itself in this function. + */ +int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, + u16 num_complq, u16 num_rx_q, u16 num_rx_bufq) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport_config *vport_config; + struct virtchnl2_add_queues aq = { }; + struct virtchnl2_add_queues *vc_msg; + u16 vport_idx = vport->idx; + int size, err; + + vport_config = adapter->vport_config[vport_idx]; + + aq.vport_id = cpu_to_le32(vport->vport_id); + aq.num_tx_q = cpu_to_le16(num_tx_q); + aq.num_tx_complq = cpu_to_le16(num_complq); + aq.num_rx_q = cpu_to_le16(num_rx_q); + aq.num_rx_bufq = cpu_to_le16(num_rx_bufq); + + mutex_lock(&((struct idpf_vport *)vport)->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ADD_QUEUES, + sizeof(struct virtchnl2_add_queues), (u8 *)&aq); + if (err) + goto rel_lock; + + /* We want vport to be const to prevent incidental code changes making + * changes to the vport config. We're making a special exception here + * to discard const to use the virtchnl. + */ + err = idpf_wait_for_event(adapter, (struct idpf_vport *)vport, + IDPF_VC_ADD_QUEUES, IDPF_VC_ADD_QUEUES_ERR); + if (err) + goto rel_lock; + + kfree(vport_config->req_qs_chunks); + vport_config->req_qs_chunks = NULL; + + vc_msg = (struct virtchnl2_add_queues *)vport->vc_msg; + /* compare vc_msg num queues with vport num queues */ + if (le16_to_cpu(vc_msg->num_tx_q) != num_tx_q || + le16_to_cpu(vc_msg->num_rx_q) != num_rx_q || + le16_to_cpu(vc_msg->num_tx_complq) != num_complq || + le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq) { + err = -EINVAL; + goto rel_lock; + } + + size = struct_size(vc_msg, chunks.chunks, + le16_to_cpu(vc_msg->chunks.num_chunks)); + vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL); + if (!vport_config->req_qs_chunks) { + err = -ENOMEM; + goto rel_lock; + } + +rel_lock: + mutex_unlock(&((struct idpf_vport *)vport)->vc_buf_lock); + + return err; +} + +/** + * idpf_send_alloc_vectors_msg - Send virtchnl alloc vectors message + * @adapter: Driver specific private structure + * @num_vectors: number of vectors to be allocated + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors) +{ + struct virtchnl2_alloc_vectors *alloc_vec, *rcvd_vec; + struct virtchnl2_alloc_vectors ac = { }; + u16 num_vchunks; + int size, err; + + ac.num_vectors = cpu_to_le16(num_vectors); + + mutex_lock(&adapter->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_ALLOC_VECTORS, + sizeof(ac), (u8 *)&ac); + if (err) + goto rel_lock; + + err = idpf_wait_for_event(adapter, NULL, IDPF_VC_ALLOC_VECTORS, + IDPF_VC_ALLOC_VECTORS_ERR); + if (err) + goto rel_lock; + + rcvd_vec = (struct virtchnl2_alloc_vectors *)adapter->vc_msg; + num_vchunks = le16_to_cpu(rcvd_vec->vchunks.num_vchunks); + + size = struct_size(rcvd_vec, vchunks.vchunks, num_vchunks); + if (size > sizeof(adapter->vc_msg)) { + err = -EINVAL; + goto rel_lock; + } + + kfree(adapter->req_vec_chunks); + adapter->req_vec_chunks = NULL; + adapter->req_vec_chunks = kmemdup(adapter->vc_msg, size, GFP_KERNEL); + if (!adapter->req_vec_chunks) { + err = -ENOMEM; + goto rel_lock; + } + + alloc_vec = adapter->req_vec_chunks; + if (le16_to_cpu(alloc_vec->num_vectors) < num_vectors) { + kfree(adapter->req_vec_chunks); + adapter->req_vec_chunks = NULL; + err = -EINVAL; + } + +rel_lock: + mutex_unlock(&adapter->vc_buf_lock); + + return err; +} + +/** + * idpf_send_dealloc_vectors_msg - Send virtchnl de allocate vectors message + * @adapter: Driver specific private structure + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter) +{ + struct virtchnl2_alloc_vectors *ac = adapter->req_vec_chunks; + struct virtchnl2_vector_chunks *vcs = &ac->vchunks; + int buf_size, err; + + buf_size = struct_size(vcs, vchunks, le16_to_cpu(vcs->num_vchunks)); + + mutex_lock(&adapter->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_DEALLOC_VECTORS, buf_size, + (u8 *)vcs); + if (err) + goto rel_lock; + + err = idpf_min_wait_for_event(adapter, NULL, IDPF_VC_DEALLOC_VECTORS, + IDPF_VC_DEALLOC_VECTORS_ERR); + if (err) + goto rel_lock; + + kfree(adapter->req_vec_chunks); + adapter->req_vec_chunks = NULL; + +rel_lock: + mutex_unlock(&adapter->vc_buf_lock); + + return err; +} + +/** + * idpf_get_max_vfs - Get max number of vfs supported + * @adapter: Driver specific private structure + * + * Returns max number of VFs + */ +static int idpf_get_max_vfs(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.max_sriov_vfs); +} + +/** + * idpf_send_set_sriov_vfs_msg - Send virtchnl set sriov vfs message + * @adapter: Driver specific private structure + * @num_vfs: number of virtual functions to be created + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs) +{ + struct virtchnl2_sriov_vfs_info svi = { }; + int err; + + svi.num_vfs = cpu_to_le16(num_vfs); + + mutex_lock(&adapter->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_SRIOV_VFS, + sizeof(svi), (u8 *)&svi); + if (err) + goto rel_lock; + + err = idpf_wait_for_event(adapter, NULL, IDPF_VC_SET_SRIOV_VFS, + IDPF_VC_SET_SRIOV_VFS_ERR); + +rel_lock: + mutex_unlock(&adapter->vc_buf_lock); + + return err; +} + +/** + * idpf_send_get_stats_msg - Send virtchnl get statistics message + * @vport: vport to get stats for + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_get_stats_msg(struct idpf_vport *vport) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + struct rtnl_link_stats64 *netstats = &np->netstats; + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_vport_stats stats_msg = { }; + struct virtchnl2_vport_stats *stats; + int err; + + /* Don't send get_stats message if the link is down */ + if (np->state <= __IDPF_VPORT_DOWN) + return 0; + + stats_msg.vport_id = cpu_to_le32(vport->vport_id); + + mutex_lock(&vport->vc_buf_lock); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_STATS, + sizeof(struct virtchnl2_vport_stats), + (u8 *)&stats_msg); + if (err) + goto rel_lock; + + err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_STATS, + IDPF_VC_GET_STATS_ERR); + if (err) + goto rel_lock; + + stats = (struct virtchnl2_vport_stats *)vport->vc_msg; + + spin_lock_bh(&np->stats_lock); + + netstats->rx_packets = le64_to_cpu(stats->rx_unicast) + + le64_to_cpu(stats->rx_multicast) + + le64_to_cpu(stats->rx_broadcast); + netstats->rx_bytes = le64_to_cpu(stats->rx_bytes); + netstats->rx_dropped = le64_to_cpu(stats->rx_discards); + netstats->rx_over_errors = le64_to_cpu(stats->rx_overflow_drop); + netstats->rx_length_errors = le64_to_cpu(stats->rx_invalid_frame_length); + + netstats->tx_packets = le64_to_cpu(stats->tx_unicast) + + le64_to_cpu(stats->tx_multicast) + + le64_to_cpu(stats->tx_broadcast); + netstats->tx_bytes = le64_to_cpu(stats->tx_bytes); + netstats->tx_errors = le64_to_cpu(stats->tx_errors); + netstats->tx_dropped = le64_to_cpu(stats->tx_discards); + + vport->port_stats.vport_stats = *stats; + + spin_unlock_bh(&np->stats_lock); + +rel_lock: + mutex_unlock(&vport->vc_buf_lock); + + return err; +} + +/** + * idpf_send_get_set_rss_lut_msg - Send virtchnl get or set rss lut message + * @vport: virtual port data structure + * @get: flag to set or get rss look up table + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_rss_lut *recv_rl; + struct idpf_rss_data *rss_data; + struct virtchnl2_rss_lut *rl; + int buf_size, lut_buf_size; + int i, err; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + buf_size = struct_size(rl, lut, rss_data->rss_lut_size); + rl = kzalloc(buf_size, GFP_KERNEL); + if (!rl) + return -ENOMEM; + + rl->vport_id = cpu_to_le32(vport->vport_id); + mutex_lock(&vport->vc_buf_lock); + + if (!get) { + rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size); + for (i = 0; i < rss_data->rss_lut_size; i++) + rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_LUT, + buf_size, (u8 *)rl); + if (err) + goto free_mem; + + err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_LUT, + IDPF_VC_SET_RSS_LUT_ERR); + + goto free_mem; + } + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_LUT, + buf_size, (u8 *)rl); + if (err) + goto free_mem; + + err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_LUT, + IDPF_VC_GET_RSS_LUT_ERR); + if (err) + goto free_mem; + + recv_rl = (struct virtchnl2_rss_lut *)vport->vc_msg; + if (rss_data->rss_lut_size == le16_to_cpu(recv_rl->lut_entries)) + goto do_memcpy; + + rss_data->rss_lut_size = le16_to_cpu(recv_rl->lut_entries); + kfree(rss_data->rss_lut); + + lut_buf_size = rss_data->rss_lut_size * sizeof(u32); + rss_data->rss_lut = kzalloc(lut_buf_size, GFP_KERNEL); + if (!rss_data->rss_lut) { + rss_data->rss_lut_size = 0; + err = -ENOMEM; + goto free_mem; + } + +do_memcpy: + memcpy(rss_data->rss_lut, vport->vc_msg, rss_data->rss_lut_size); +free_mem: + mutex_unlock(&vport->vc_buf_lock); + kfree(rl); + + return err; +} + +/** + * idpf_send_get_set_rss_key_msg - Send virtchnl get or set rss key message + * @vport: virtual port data structure + * @get: flag to set or get rss look up table + * + * Returns 0 on success, negative on failure + */ +int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_rss_key *recv_rk; + struct idpf_rss_data *rss_data; + struct virtchnl2_rss_key *rk; + int i, buf_size, err; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + buf_size = struct_size(rk, key_flex, rss_data->rss_key_size); + rk = kzalloc(buf_size, GFP_KERNEL); + if (!rk) + return -ENOMEM; + + rk->vport_id = cpu_to_le32(vport->vport_id); + mutex_lock(&vport->vc_buf_lock); + + if (get) { + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_RSS_KEY, + buf_size, (u8 *)rk); + if (err) + goto error; + + err = idpf_wait_for_event(adapter, vport, IDPF_VC_GET_RSS_KEY, + IDPF_VC_GET_RSS_KEY_ERR); + if (err) + goto error; + + recv_rk = (struct virtchnl2_rss_key *)vport->vc_msg; + if (rss_data->rss_key_size != + le16_to_cpu(recv_rk->key_len)) { + rss_data->rss_key_size = + min_t(u16, NETDEV_RSS_KEY_LEN, + le16_to_cpu(recv_rk->key_len)); + kfree(rss_data->rss_key); + rss_data->rss_key = kzalloc(rss_data->rss_key_size, + GFP_KERNEL); + if (!rss_data->rss_key) { + rss_data->rss_key_size = 0; + err = -ENOMEM; + goto error; + } + } + memcpy(rss_data->rss_key, recv_rk->key_flex, + rss_data->rss_key_size); + } else { + rk->key_len = cpu_to_le16(rss_data->rss_key_size); + for (i = 0; i < rss_data->rss_key_size; i++) + rk->key_flex[i] = rss_data->rss_key[i]; + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_SET_RSS_KEY, + buf_size, (u8 *)rk); + if (err) + goto error; + + err = idpf_wait_for_event(adapter, vport, IDPF_VC_SET_RSS_KEY, + IDPF_VC_SET_RSS_KEY_ERR); + } + +error: + mutex_unlock(&vport->vc_buf_lock); + kfree(rk); + + return err; +} + +/** + * idpf_fill_ptype_lookup - Fill L3 specific fields in ptype lookup table + * @ptype: ptype lookup table + * @pstate: state machine for ptype lookup table + * @ipv4: ipv4 or ipv6 + * @frag: fragmentation allowed + * + */ +static void idpf_fill_ptype_lookup(struct idpf_rx_ptype_decoded *ptype, + struct idpf_ptype_state *pstate, + bool ipv4, bool frag) +{ + if (!pstate->outer_ip || !pstate->outer_frag) { + ptype->outer_ip = IDPF_RX_PTYPE_OUTER_IP; + pstate->outer_ip = true; + + if (ipv4) + ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV4; + else + ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV6; + + if (frag) { + ptype->outer_frag = IDPF_RX_PTYPE_FRAG; + pstate->outer_frag = true; + } + } else { + ptype->tunnel_type = IDPF_RX_PTYPE_TUNNEL_IP_IP; + pstate->tunnel_state = IDPF_PTYPE_TUNNEL_IP; + + if (ipv4) + ptype->tunnel_end_prot = + IDPF_RX_PTYPE_TUNNEL_END_IPV4; + else + ptype->tunnel_end_prot = + IDPF_RX_PTYPE_TUNNEL_END_IPV6; + + if (frag) + ptype->tunnel_end_frag = IDPF_RX_PTYPE_FRAG; + } +} + +/** + * idpf_send_get_rx_ptype_msg - Send virtchnl for ptype info + * @vport: virtual port data structure + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) +{ + struct idpf_rx_ptype_decoded *ptype_lkup = vport->rx_ptype_lkup; + struct virtchnl2_get_ptype_info get_ptype_info; + int max_ptype, ptypes_recvd = 0, ptype_offset; + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_get_ptype_info *ptype_info; + u16 next_ptype_id = 0; + int err = 0, i, j, k; + + if (idpf_is_queue_model_split(vport->rxq_model)) + max_ptype = IDPF_RX_MAX_PTYPE; + else + max_ptype = IDPF_RX_MAX_BASE_PTYPE; + + memset(vport->rx_ptype_lkup, 0, sizeof(vport->rx_ptype_lkup)); + + ptype_info = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!ptype_info) + return -ENOMEM; + + mutex_lock(&adapter->vc_buf_lock); + + while (next_ptype_id < max_ptype) { + get_ptype_info.start_ptype_id = cpu_to_le16(next_ptype_id); + + if ((next_ptype_id + IDPF_RX_MAX_PTYPES_PER_BUF) > max_ptype) + get_ptype_info.num_ptypes = + cpu_to_le16(max_ptype - next_ptype_id); + else + get_ptype_info.num_ptypes = + cpu_to_le16(IDPF_RX_MAX_PTYPES_PER_BUF); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_GET_PTYPE_INFO, + sizeof(struct virtchnl2_get_ptype_info), + (u8 *)&get_ptype_info); + if (err) + goto vc_buf_unlock; + + err = idpf_wait_for_event(adapter, NULL, IDPF_VC_GET_PTYPE_INFO, + IDPF_VC_GET_PTYPE_INFO_ERR); + if (err) + goto vc_buf_unlock; + + memcpy(ptype_info, adapter->vc_msg, IDPF_CTLQ_MAX_BUF_LEN); + + ptypes_recvd += le16_to_cpu(ptype_info->num_ptypes); + if (ptypes_recvd > max_ptype) { + err = -EINVAL; + goto vc_buf_unlock; + } + + next_ptype_id = le16_to_cpu(get_ptype_info.start_ptype_id) + + le16_to_cpu(get_ptype_info.num_ptypes); + + ptype_offset = IDPF_RX_PTYPE_HDR_SZ; + + for (i = 0; i < le16_to_cpu(ptype_info->num_ptypes); i++) { + struct idpf_ptype_state pstate = { }; + struct virtchnl2_ptype *ptype; + u16 id; + + ptype = (struct virtchnl2_ptype *) + ((u8 *)ptype_info + ptype_offset); + + ptype_offset += IDPF_GET_PTYPE_SIZE(ptype); + if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN) { + err = -EINVAL; + goto vc_buf_unlock; + } + + /* 0xFFFF indicates end of ptypes */ + if (le16_to_cpu(ptype->ptype_id_10) == + IDPF_INVALID_PTYPE_ID) { + err = 0; + goto vc_buf_unlock; + } + + if (idpf_is_queue_model_split(vport->rxq_model)) + k = le16_to_cpu(ptype->ptype_id_10); + else + k = ptype->ptype_id_8; + + if (ptype->proto_id_count) + ptype_lkup[k].known = 1; + + for (j = 0; j < ptype->proto_id_count; j++) { + id = le16_to_cpu(ptype->proto_id[j]); + switch (id) { + case VIRTCHNL2_PROTO_HDR_GRE: + if (pstate.tunnel_state == + IDPF_PTYPE_TUNNEL_IP) { + ptype_lkup[k].tunnel_type = + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT; + pstate.tunnel_state |= + IDPF_PTYPE_TUNNEL_IP_GRENAT; + } + break; + case VIRTCHNL2_PROTO_HDR_MAC: + ptype_lkup[k].outer_ip = + IDPF_RX_PTYPE_OUTER_L2; + if (pstate.tunnel_state == + IDPF_TUN_IP_GRE) { + ptype_lkup[k].tunnel_type = + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC; + pstate.tunnel_state |= + IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC; + } + break; + case VIRTCHNL2_PROTO_HDR_IPV4: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, true, + false); + break; + case VIRTCHNL2_PROTO_HDR_IPV6: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, false, + false); + break; + case VIRTCHNL2_PROTO_HDR_IPV4_FRAG: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, true, + true); + break; + case VIRTCHNL2_PROTO_HDR_IPV6_FRAG: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, false, + true); + break; + case VIRTCHNL2_PROTO_HDR_UDP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_UDP; + break; + case VIRTCHNL2_PROTO_HDR_TCP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_TCP; + break; + case VIRTCHNL2_PROTO_HDR_SCTP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_SCTP; + break; + case VIRTCHNL2_PROTO_HDR_ICMP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_ICMP; + break; + case VIRTCHNL2_PROTO_HDR_PAY: + ptype_lkup[k].payload_layer = + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2; + break; + case VIRTCHNL2_PROTO_HDR_ICMPV6: + case VIRTCHNL2_PROTO_HDR_IPV6_EH: + case VIRTCHNL2_PROTO_HDR_PRE_MAC: + case VIRTCHNL2_PROTO_HDR_POST_MAC: + case VIRTCHNL2_PROTO_HDR_ETHERTYPE: + case VIRTCHNL2_PROTO_HDR_SVLAN: + case VIRTCHNL2_PROTO_HDR_CVLAN: + case VIRTCHNL2_PROTO_HDR_MPLS: + case VIRTCHNL2_PROTO_HDR_MMPLS: + case VIRTCHNL2_PROTO_HDR_PTP: + case VIRTCHNL2_PROTO_HDR_CTRL: + case VIRTCHNL2_PROTO_HDR_LLDP: + case VIRTCHNL2_PROTO_HDR_ARP: + case VIRTCHNL2_PROTO_HDR_ECP: + case VIRTCHNL2_PROTO_HDR_EAPOL: + case VIRTCHNL2_PROTO_HDR_PPPOD: + case VIRTCHNL2_PROTO_HDR_PPPOE: + case VIRTCHNL2_PROTO_HDR_IGMP: + case VIRTCHNL2_PROTO_HDR_AH: + case VIRTCHNL2_PROTO_HDR_ESP: + case VIRTCHNL2_PROTO_HDR_IKE: + case VIRTCHNL2_PROTO_HDR_NATT_KEEP: + case VIRTCHNL2_PROTO_HDR_L2TPV2: + case VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL: + case VIRTCHNL2_PROTO_HDR_L2TPV3: + case VIRTCHNL2_PROTO_HDR_GTP: + case VIRTCHNL2_PROTO_HDR_GTP_EH: + case VIRTCHNL2_PROTO_HDR_GTPCV2: + case VIRTCHNL2_PROTO_HDR_GTPC_TEID: + case VIRTCHNL2_PROTO_HDR_GTPU: + case VIRTCHNL2_PROTO_HDR_GTPU_UL: + case VIRTCHNL2_PROTO_HDR_GTPU_DL: + case VIRTCHNL2_PROTO_HDR_ECPRI: + case VIRTCHNL2_PROTO_HDR_VRRP: + case VIRTCHNL2_PROTO_HDR_OSPF: + case VIRTCHNL2_PROTO_HDR_TUN: + case VIRTCHNL2_PROTO_HDR_NVGRE: + case VIRTCHNL2_PROTO_HDR_VXLAN: + case VIRTCHNL2_PROTO_HDR_VXLAN_GPE: + case VIRTCHNL2_PROTO_HDR_GENEVE: + case VIRTCHNL2_PROTO_HDR_NSH: + case VIRTCHNL2_PROTO_HDR_QUIC: + case VIRTCHNL2_PROTO_HDR_PFCP: + case VIRTCHNL2_PROTO_HDR_PFCP_NODE: + case VIRTCHNL2_PROTO_HDR_PFCP_SESSION: + case VIRTCHNL2_PROTO_HDR_RTP: + case VIRTCHNL2_PROTO_HDR_NO_PROTO: + break; + default: + break; + } + } + } + } + +vc_buf_unlock: + mutex_unlock(&adapter->vc_buf_lock); + kfree(ptype_info); + + return err; +} + +/** + * idpf_send_ena_dis_loopback_msg - Send virtchnl enable/disable loopback + * message + * @vport: virtual port data structure + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport) +{ + struct virtchnl2_loopback loopback; + int err; + + loopback.vport_id = cpu_to_le32(vport->vport_id); + loopback.enable = idpf_is_feature_ena(vport, NETIF_F_LOOPBACK); + + mutex_lock(&vport->vc_buf_lock); + + err = idpf_send_mb_msg(vport->adapter, VIRTCHNL2_OP_LOOPBACK, + sizeof(loopback), (u8 *)&loopback); + if (err) + goto rel_lock; + + err = idpf_wait_for_event(vport->adapter, vport, + IDPF_VC_LOOPBACK_STATE, + IDPF_VC_LOOPBACK_STATE_ERR); + +rel_lock: + mutex_unlock(&vport->vc_buf_lock); + + return err; +} + +/** + * idpf_find_ctlq - Given a type and id, find ctlq info + * @hw: hardware struct + * @type: type of ctrlq to find + * @id: ctlq id to find + * + * Returns pointer to found ctlq info struct, NULL otherwise. + */ +static struct idpf_ctlq_info *idpf_find_ctlq(struct idpf_hw *hw, + enum idpf_ctlq_type type, int id) +{ + struct idpf_ctlq_info *cq, *tmp; + + list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list) + if (cq->q_id == id && cq->cq_type == type) + return cq; + + return NULL; +} + +/** + * idpf_init_dflt_mbx - Setup default mailbox parameters and make request + * @adapter: adapter info struct + * + * Returns 0 on success, negative otherwise + */ +int idpf_init_dflt_mbx(struct idpf_adapter *adapter) +{ + struct idpf_ctlq_create_info ctlq_info[] = { + { + .type = IDPF_CTLQ_TYPE_MAILBOX_TX, + .id = IDPF_DFLT_MBX_ID, + .len = IDPF_DFLT_MBX_Q_LEN, + .buf_size = IDPF_CTLQ_MAX_BUF_LEN + }, + { + .type = IDPF_CTLQ_TYPE_MAILBOX_RX, + .id = IDPF_DFLT_MBX_ID, + .len = IDPF_DFLT_MBX_Q_LEN, + .buf_size = IDPF_CTLQ_MAX_BUF_LEN + } + }; + struct idpf_hw *hw = &adapter->hw; + int err; + + adapter->dev_ops.reg_ops.ctlq_reg_init(ctlq_info); + + err = idpf_ctlq_init(hw, IDPF_NUM_DFLT_MBX_Q, ctlq_info); + if (err) + return err; + + hw->asq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_TX, + IDPF_DFLT_MBX_ID); + hw->arq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_RX, + IDPF_DFLT_MBX_ID); + + if (!hw->asq || !hw->arq) { + idpf_ctlq_deinit(hw); + + return -ENOENT; + } + + adapter->state = __IDPF_STARTUP; + + return 0; +} + +/** + * idpf_deinit_dflt_mbx - Free up ctlqs setup + * @adapter: Driver specific private data structure + */ +void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter) +{ + if (adapter->hw.arq && adapter->hw.asq) { + idpf_mb_clean(adapter); + idpf_ctlq_deinit(&adapter->hw); + } + adapter->hw.arq = NULL; + adapter->hw.asq = NULL; +} + +/** + * idpf_vport_params_buf_rel - Release memory for MailBox resources + * @adapter: Driver specific private data structure + * + * Will release memory to hold the vport parameters received on MailBox + */ +static void idpf_vport_params_buf_rel(struct idpf_adapter *adapter) +{ + kfree(adapter->vport_params_recvd); + adapter->vport_params_recvd = NULL; + kfree(adapter->vport_params_reqd); + adapter->vport_params_reqd = NULL; + kfree(adapter->vport_ids); + adapter->vport_ids = NULL; +} + +/** + * idpf_vport_params_buf_alloc - Allocate memory for MailBox resources + * @adapter: Driver specific private data structure + * + * Will alloc memory to hold the vport parameters received on MailBox + */ +static int idpf_vport_params_buf_alloc(struct idpf_adapter *adapter) +{ + u16 num_max_vports = idpf_get_max_vports(adapter); + + adapter->vport_params_reqd = kcalloc(num_max_vports, + sizeof(*adapter->vport_params_reqd), + GFP_KERNEL); + if (!adapter->vport_params_reqd) + return -ENOMEM; + + adapter->vport_params_recvd = kcalloc(num_max_vports, + sizeof(*adapter->vport_params_recvd), + GFP_KERNEL); + if (!adapter->vport_params_recvd) + goto err_mem; + + adapter->vport_ids = kcalloc(num_max_vports, sizeof(u32), GFP_KERNEL); + if (!adapter->vport_ids) + goto err_mem; + + if (adapter->vport_config) + return 0; + + adapter->vport_config = kcalloc(num_max_vports, + sizeof(*adapter->vport_config), + GFP_KERNEL); + if (!adapter->vport_config) + goto err_mem; + + return 0; + +err_mem: + idpf_vport_params_buf_rel(adapter); + + return -ENOMEM; +} + +/** + * idpf_vc_core_init - Initialize state machine and get driver specific + * resources + * @adapter: Driver specific private structure + * + * This function will initialize the state machine and request all necessary + * resources required by the device driver. Once the state machine is + * initialized, allocate memory to store vport specific information and also + * requests required interrupts. + * + * Returns 0 on success, -EAGAIN function will get called again, + * otherwise negative on failure. + */ +int idpf_vc_core_init(struct idpf_adapter *adapter) +{ + int task_delay = 30; + u16 num_max_vports; + int err = 0; + + while (adapter->state != __IDPF_INIT_SW) { + switch (adapter->state) { + case __IDPF_STARTUP: + if (idpf_send_ver_msg(adapter)) + goto init_failed; + adapter->state = __IDPF_VER_CHECK; + goto restart; + case __IDPF_VER_CHECK: + err = idpf_recv_ver_msg(adapter); + if (err == -EIO) { + return err; + } else if (err == -EAGAIN) { + adapter->state = __IDPF_STARTUP; + goto restart; + } else if (err) { + goto init_failed; + } + if (idpf_send_get_caps_msg(adapter)) + goto init_failed; + adapter->state = __IDPF_GET_CAPS; + goto restart; + case __IDPF_GET_CAPS: + if (idpf_recv_get_caps_msg(adapter)) + goto init_failed; + adapter->state = __IDPF_INIT_SW; + break; + default: + dev_err(&adapter->pdev->dev, "Device is in bad state: %d\n", + adapter->state); + goto init_failed; + } + break; +restart: + /* Give enough time before proceeding further with + * state machine + */ + msleep(task_delay); + } + + pci_sriov_set_totalvfs(adapter->pdev, idpf_get_max_vfs(adapter)); + num_max_vports = idpf_get_max_vports(adapter); + adapter->max_vports = num_max_vports; + adapter->vports = kcalloc(num_max_vports, sizeof(*adapter->vports), + GFP_KERNEL); + if (!adapter->vports) + return -ENOMEM; + + if (!adapter->netdevs) { + adapter->netdevs = kcalloc(num_max_vports, + sizeof(struct net_device *), + GFP_KERNEL); + if (!adapter->netdevs) { + err = -ENOMEM; + goto err_netdev_alloc; + } + } + + err = idpf_vport_params_buf_alloc(adapter); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to alloc vport params buffer: %d\n", + err); + goto err_netdev_alloc; + } + + /* Start the mailbox task before requesting vectors. This will ensure + * vector information response from mailbox is handled + */ + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); + + queue_delayed_work(adapter->serv_wq, &adapter->serv_task, + msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07))); + + err = idpf_intr_req(adapter); + if (err) { + dev_err(&adapter->pdev->dev, "failed to enable interrupt vectors: %d\n", + err); + goto err_intr_req; + } + + idpf_init_avail_queues(adapter); + + /* Skew the delay for init tasks for each function based on fn number + * to prevent every function from making the same call simultaneously. + */ + queue_delayed_work(adapter->init_wq, &adapter->init_task, + msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07))); + + goto no_err; + +err_intr_req: + cancel_delayed_work_sync(&adapter->serv_task); + idpf_vport_params_buf_rel(adapter); +err_netdev_alloc: + kfree(adapter->vports); + adapter->vports = NULL; +no_err: + return err; + +init_failed: + /* Don't retry if we're trying to go down, just bail. */ + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + return err; + + if (++adapter->mb_wait_count > IDPF_MB_MAX_ERR) { + dev_err(&adapter->pdev->dev, "Failed to establish mailbox communications with hardware\n"); + + return -EFAULT; + } + /* If it reached here, it is possible that mailbox queue initialization + * register writes might not have taken effect. Retry to initialize + * the mailbox again + */ + adapter->state = __IDPF_STARTUP; + idpf_deinit_dflt_mbx(adapter); + set_bit(IDPF_HR_DRV_LOAD, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task, + msecs_to_jiffies(task_delay)); + + return -EAGAIN; +} + +/** + * idpf_vc_core_deinit - Device deinit routine + * @adapter: Driver specific private structure + * + */ +void idpf_vc_core_deinit(struct idpf_adapter *adapter) +{ + int i; + + idpf_deinit_task(adapter); + idpf_intr_rel(adapter); + /* Set all bits as we dont know on which vc_state the vhnl_wq is + * waiting on and wakeup the virtchnl workqueue even if it is waiting + * for the response as we are going down + */ + for (i = 0; i < IDPF_VC_NBITS; i++) + set_bit(i, adapter->vc_state); + wake_up(&adapter->vchnl_wq); + + cancel_delayed_work_sync(&adapter->serv_task); + cancel_delayed_work_sync(&adapter->mbx_task); + + idpf_vport_params_buf_rel(adapter); + + /* Clear all the bits */ + for (i = 0; i < IDPF_VC_NBITS; i++) + clear_bit(i, adapter->vc_state); + + kfree(adapter->vports); + adapter->vports = NULL; +} + +/** + * idpf_vport_alloc_vec_indexes - Get relative vector indexes + * @vport: virtual port data struct + * + * This function requests the vector information required for the vport and + * stores the vector indexes received from the 'global vector distribution' + * in the vport's queue vectors array. + * + * Return 0 on success, error on failure + */ +int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport) +{ + struct idpf_vector_info vec_info; + int num_alloc_vecs; + + vec_info.num_curr_vecs = vport->num_q_vectors; + vec_info.num_req_vecs = max(vport->num_txq, vport->num_rxq); + vec_info.default_vport = vport->default_vport; + vec_info.index = vport->idx; + + num_alloc_vecs = idpf_req_rel_vector_indexes(vport->adapter, + vport->q_vector_idxs, + &vec_info); + if (num_alloc_vecs <= 0) { + dev_err(&vport->adapter->pdev->dev, "Vector distribution failed: %d\n", + num_alloc_vecs); + return -EINVAL; + } + + vport->num_q_vectors = num_alloc_vecs; + + return 0; +} + +/** + * idpf_vport_init - Initialize virtual port + * @vport: virtual port to be initialized + * @max_q: vport max queue info + * + * Will initialize vport with the info received through MB earlier + */ +void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_create_vport *vport_msg; + struct idpf_vport_config *vport_config; + u16 tx_itr[] = {2, 8, 64, 128, 256}; + u16 rx_itr[] = {2, 8, 32, 96, 128}; + struct idpf_rss_data *rss_data; + u16 idx = vport->idx; + + vport_config = adapter->vport_config[idx]; + rss_data = &vport_config->user_config.rss_data; + vport_msg = adapter->vport_params_recvd[idx]; + + vport_config->max_q.max_txq = max_q->max_txq; + vport_config->max_q.max_rxq = max_q->max_rxq; + vport_config->max_q.max_complq = max_q->max_complq; + vport_config->max_q.max_bufq = max_q->max_bufq; + + vport->txq_model = le16_to_cpu(vport_msg->txq_model); + vport->rxq_model = le16_to_cpu(vport_msg->rxq_model); + vport->vport_type = le16_to_cpu(vport_msg->vport_type); + vport->vport_id = le32_to_cpu(vport_msg->vport_id); + + rss_data->rss_key_size = min_t(u16, NETDEV_RSS_KEY_LEN, + le16_to_cpu(vport_msg->rss_key_size)); + rss_data->rss_lut_size = le16_to_cpu(vport_msg->rss_lut_size); + + ether_addr_copy(vport->default_mac_addr, vport_msg->default_mac_addr); + vport->max_mtu = le16_to_cpu(vport_msg->max_mtu) - IDPF_PACKET_HDR_PAD; + + /* Initialize Tx and Rx profiles for Dynamic Interrupt Moderation */ + memcpy(vport->rx_itr_profile, rx_itr, IDPF_DIM_PROFILE_SLOTS); + memcpy(vport->tx_itr_profile, tx_itr, IDPF_DIM_PROFILE_SLOTS); + + idpf_vport_init_num_qs(vport, vport_msg); + idpf_vport_calc_num_q_desc(vport); + idpf_vport_calc_num_q_groups(vport); + idpf_vport_alloc_vec_indexes(vport); + + vport->crc_enable = adapter->crc_enable; +} + +/** + * idpf_get_vec_ids - Initialize vector id from Mailbox parameters + * @adapter: adapter structure to get the mailbox vector id + * @vecids: Array of vector ids + * @num_vecids: number of vector ids + * @chunks: vector ids received over mailbox + * + * Will initialize the mailbox vector id which is received from the + * get capabilities and data queue vector ids with ids received as + * mailbox parameters. + * Returns number of ids filled + */ +int idpf_get_vec_ids(struct idpf_adapter *adapter, + u16 *vecids, int num_vecids, + struct virtchnl2_vector_chunks *chunks) +{ + u16 num_chunks = le16_to_cpu(chunks->num_vchunks); + int num_vecid_filled = 0; + int i, j; + + vecids[num_vecid_filled] = adapter->mb_vector.v_idx; + num_vecid_filled++; + + for (j = 0; j < num_chunks; j++) { + struct virtchnl2_vector_chunk *chunk; + u16 start_vecid, num_vec; + + chunk = &chunks->vchunks[j]; + num_vec = le16_to_cpu(chunk->num_vectors); + start_vecid = le16_to_cpu(chunk->start_vector_id); + + for (i = 0; i < num_vec; i++) { + if ((num_vecid_filled + i) < num_vecids) { + vecids[num_vecid_filled + i] = start_vecid; + start_vecid++; + } else { + break; + } + } + num_vecid_filled = num_vecid_filled + i; + } + + return num_vecid_filled; +} + +/** + * idpf_vport_get_queue_ids - Initialize queue id from Mailbox parameters + * @qids: Array of queue ids + * @num_qids: number of queue ids + * @q_type: queue model + * @chunks: queue ids received over mailbox + * + * Will initialize all queue ids with ids received as mailbox parameters + * Returns number of ids filled + */ +static int idpf_vport_get_queue_ids(u32 *qids, int num_qids, u16 q_type, + struct virtchnl2_queue_reg_chunks *chunks) +{ + u16 num_chunks = le16_to_cpu(chunks->num_chunks); + u32 num_q_id_filled = 0, i; + u32 start_q_id, num_q; + + while (num_chunks--) { + struct virtchnl2_queue_reg_chunk *chunk; + + chunk = &chunks->chunks[num_chunks]; + if (le32_to_cpu(chunk->type) != q_type) + continue; + + num_q = le32_to_cpu(chunk->num_queues); + start_q_id = le32_to_cpu(chunk->start_queue_id); + + for (i = 0; i < num_q; i++) { + if ((num_q_id_filled + i) < num_qids) { + qids[num_q_id_filled + i] = start_q_id; + start_q_id++; + } else { + break; + } + } + num_q_id_filled = num_q_id_filled + i; + } + + return num_q_id_filled; +} + +/** + * __idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters + * @vport: virtual port for which the queues ids are initialized + * @qids: queue ids + * @num_qids: number of queue ids + * @q_type: type of queue + * + * Will initialize all queue ids with ids received as mailbox + * parameters. Returns number of queue ids initialized. + */ +static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + const u32 *qids, + int num_qids, + u32 q_type) +{ + struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { + case VIRTCHNL2_QUEUE_TYPE_TX: + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) { + tx_qgrp->txqs[j]->q_id = qids[k]; + tx_qgrp->txqs[j]->q_type = + VIRTCHNL2_QUEUE_TYPE_TX; + } + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_qids; j++, k++) { + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->q_id = qids[k]; + q->q_type = VIRTCHNL2_QUEUE_TYPE_RX; + } + } + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + for (i = 0; i < vport->num_txq_grp && k < num_qids; i++, k++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + tx_qgrp->complq->q_id = qids[k]; + tx_qgrp->complq->q_type = + VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_qids; j++, k++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->q_id = qids[k]; + q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + } + } + break; + default: + break; + } + + return k; +} + +/** + * idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters + * @vport: virtual port for which the queues ids are initialized + * + * Will initialize all queue ids with ids received as mailbox parameters. + * Returns 0 on success, negative if all the queues are not initialized. + */ +int idpf_vport_queue_ids_init(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport *vport_params; + struct virtchnl2_queue_reg_chunks *chunks; + struct idpf_vport_config *vport_config; + u16 vport_idx = vport->idx; + int num_ids, err = 0; + u16 q_type; + u32 *qids; + + vport_config = vport->adapter->vport_config[vport_idx]; + if (vport_config->req_qs_chunks) { + struct virtchnl2_add_queues *vc_aq = + (struct virtchnl2_add_queues *)vport_config->req_qs_chunks; + chunks = &vc_aq->chunks; + } else { + vport_params = vport->adapter->vport_params_recvd[vport_idx]; + chunks = &vport_params->chunks; + } + + qids = kcalloc(IDPF_MAX_QIDS, sizeof(u32), GFP_KERNEL); + if (!qids) + return -ENOMEM; + + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, + VIRTCHNL2_QUEUE_TYPE_TX, + chunks); + if (num_ids < vport->num_txq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, + VIRTCHNL2_QUEUE_TYPE_TX); + if (num_ids < vport->num_txq) { + err = -EINVAL; + goto mem_rel; + } + + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, + VIRTCHNL2_QUEUE_TYPE_RX, + chunks); + if (num_ids < vport->num_rxq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, + VIRTCHNL2_QUEUE_TYPE_RX); + if (num_ids < vport->num_rxq) { + err = -EINVAL; + goto mem_rel; + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + goto check_rxq; + + q_type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks); + if (num_ids < vport->num_complq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type); + if (num_ids < vport->num_complq) { + err = -EINVAL; + goto mem_rel; + } + +check_rxq: + if (!idpf_is_queue_model_split(vport->rxq_model)) + goto mem_rel; + + q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks); + if (num_ids < vport->num_bufq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type); + if (num_ids < vport->num_bufq) + err = -EINVAL; + +mem_rel: + kfree(qids); + + return err; +} + +/** + * idpf_vport_adjust_qs - Adjust to new requested queues + * @vport: virtual port data struct + * + * Renegotiate queues. Returns 0 on success, negative on failure. + */ +int idpf_vport_adjust_qs(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport vport_msg; + int err; + + vport_msg.txq_model = cpu_to_le16(vport->txq_model); + vport_msg.rxq_model = cpu_to_le16(vport->rxq_model); + err = idpf_vport_calc_total_qs(vport->adapter, vport->idx, &vport_msg, + NULL); + if (err) + return err; + + idpf_vport_init_num_qs(vport, &vport_msg); + idpf_vport_calc_num_q_groups(vport); + + return 0; +} + +/** + * idpf_is_capability_ena - Default implementation of capability checking + * @adapter: Private data struct + * @all: all or one flag + * @field: caps field to check for flags + * @flag: flag to check + * + * Return true if all capabilities are supported, false otherwise + */ +bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, + enum idpf_cap_field field, u64 flag) +{ + u8 *caps = (u8 *)&adapter->caps; + u32 *cap_field; + + if (!caps) + return false; + + if (field == IDPF_BASE_CAPS) + return false; + + cap_field = (u32 *)(caps + field); + + if (all) + return (*cap_field & flag) == flag; + else + return !!(*cap_field & flag); +} + +/** + * idpf_get_vport_id: Get vport id + * @vport: virtual port structure + * + * Return vport id from the adapter persistent data + */ +u32 idpf_get_vport_id(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport *vport_msg; + + vport_msg = vport->adapter->vport_params_recvd[vport->idx]; + + return le32_to_cpu(vport_msg->vport_id); +} + +/** + * idpf_add_del_mac_filters - Add/del mac filters + * @vport: Virtual port data structure + * @np: Netdev private structure + * @add: Add or delete flag + * @async: Don't wait for return message + * + * Returns 0 on success, error on failure. + **/ +int idpf_add_del_mac_filters(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + bool add, bool async) +{ + struct virtchnl2_mac_addr_list *ma_list = NULL; + struct idpf_adapter *adapter = np->adapter; + struct idpf_vport_config *vport_config; + enum idpf_vport_config_flags mac_flag; + struct pci_dev *pdev = adapter->pdev; + enum idpf_vport_vc_state vc, vc_err; + struct virtchnl2_mac_addr *mac_addr; + struct idpf_mac_filter *f, *tmp; + u32 num_msgs, total_filters = 0; + int i = 0, k, err = 0; + u32 vop; + + vport_config = adapter->vport_config[np->vport_idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + /* Find the number of newly added filters */ + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, + list) { + if (add && f->add) + total_filters++; + else if (!add && f->remove) + total_filters++; + } + + if (!total_filters) { + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; + } + + /* Fill all the new filters into virtchannel message */ + mac_addr = kcalloc(total_filters, sizeof(struct virtchnl2_mac_addr), + GFP_ATOMIC); + if (!mac_addr) { + err = -ENOMEM; + spin_unlock_bh(&vport_config->mac_filter_list_lock); + goto error; + } + + list_for_each_entry_safe(f, tmp, &vport_config->user_config.mac_filter_list, + list) { + if (add && f->add) { + ether_addr_copy(mac_addr[i].addr, f->macaddr); + i++; + f->add = false; + if (i == total_filters) + break; + } + if (!add && f->remove) { + ether_addr_copy(mac_addr[i].addr, f->macaddr); + i++; + f->remove = false; + if (i == total_filters) + break; + } + } + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + if (add) { + vop = VIRTCHNL2_OP_ADD_MAC_ADDR; + vc = IDPF_VC_ADD_MAC_ADDR; + vc_err = IDPF_VC_ADD_MAC_ADDR_ERR; + mac_flag = IDPF_VPORT_ADD_MAC_REQ; + } else { + vop = VIRTCHNL2_OP_DEL_MAC_ADDR; + vc = IDPF_VC_DEL_MAC_ADDR; + vc_err = IDPF_VC_DEL_MAC_ADDR_ERR; + mac_flag = IDPF_VPORT_DEL_MAC_REQ; + } + + /* Chunk up the filters into multiple messages to avoid + * sending a control queue message buffer that is too large + */ + num_msgs = DIV_ROUND_UP(total_filters, IDPF_NUM_FILTERS_PER_MSG); + + if (!async) + mutex_lock(&vport->vc_buf_lock); + + for (i = 0, k = 0; i < num_msgs; i++) { + u32 entries_size, buf_size, num_entries; + + num_entries = min_t(u32, total_filters, + IDPF_NUM_FILTERS_PER_MSG); + entries_size = sizeof(struct virtchnl2_mac_addr) * num_entries; + buf_size = struct_size(ma_list, mac_addr_list, num_entries); + + if (!ma_list || num_entries != IDPF_NUM_FILTERS_PER_MSG) { + kfree(ma_list); + ma_list = kzalloc(buf_size, GFP_ATOMIC); + if (!ma_list) { + err = -ENOMEM; + goto list_prep_error; + } + } else { + memset(ma_list, 0, buf_size); + } + + ma_list->vport_id = cpu_to_le32(np->vport_id); + ma_list->num_mac_addr = cpu_to_le16(num_entries); + memcpy(ma_list->mac_addr_list, &mac_addr[k], entries_size); + + if (async) + set_bit(mac_flag, vport_config->flags); + + err = idpf_send_mb_msg(adapter, vop, buf_size, (u8 *)ma_list); + if (err) + goto mbx_error; + + if (!async) { + err = idpf_wait_for_event(adapter, vport, vc, vc_err); + if (err) + goto mbx_error; + } + + k += num_entries; + total_filters -= num_entries; + } + +mbx_error: + if (!async) + mutex_unlock(&vport->vc_buf_lock); + kfree(ma_list); +list_prep_error: + kfree(mac_addr); +error: + if (err) + dev_err(&pdev->dev, "Failed to add or del mac filters %d", err); + + return err; +} + +/** + * idpf_set_promiscuous - set promiscuous and send message to mailbox + * @adapter: Driver specific private structure + * @config_data: Vport specific config data + * @vport_id: Vport identifier + * + * Request to enable promiscuous mode for the vport. Message is sent + * asynchronously and won't wait for response. Returns 0 on success, negative + * on failure; + */ +int idpf_set_promiscuous(struct idpf_adapter *adapter, + struct idpf_vport_user_config_data *config_data, + u32 vport_id) +{ + struct virtchnl2_promisc_info vpi; + u16 flags = 0; + int err; + + if (test_bit(__IDPF_PROMISC_UC, config_data->user_flags)) + flags |= VIRTCHNL2_UNICAST_PROMISC; + if (test_bit(__IDPF_PROMISC_MC, config_data->user_flags)) + flags |= VIRTCHNL2_MULTICAST_PROMISC; + + vpi.vport_id = cpu_to_le32(vport_id); + vpi.flags = cpu_to_le16(flags); + + err = idpf_send_mb_msg(adapter, VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE, + sizeof(struct virtchnl2_promisc_info), + (u8 *)&vpi); + + return err; +} diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h new file mode 100644 index 000000000000..07e72c72d156 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -0,0 +1,1273 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _VIRTCHNL2_H_ +#define _VIRTCHNL2_H_ + +/* All opcodes associated with virtchnl2 are prefixed with virtchnl2 or + * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures, + * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion. + * + * PF/VF uses the virtchnl2 interface defined in this header file to communicate + * with device Control Plane (CP). Driver and the CP may run on different + * platforms with different endianness. To avoid byte order discrepancies, + * all the structures in this header follow little-endian format. + * + * This is an interface definition file where existing enums and their values + * must remain unchanged over time, so we specify explicit values for all enums. + */ + +#include "virtchnl2_lan_desc.h" + +/* This macro is used to generate compilation errors if a structure + * is not exactly the correct length. + */ +#define VIRTCHNL2_CHECK_STRUCT_LEN(n, X) \ + static_assert((n) == sizeof(struct X)) + +/* New major set of opcodes introduced and so leaving room for + * old misc opcodes to be added in future. Also these opcodes may only + * be used if both the PF and VF have successfully negotiated the + * VIRTCHNL version as 2.0 during VIRTCHNL2_OP_VERSION exchange. + */ +enum virtchnl2_op { + VIRTCHNL2_OP_UNKNOWN = 0, + VIRTCHNL2_OP_VERSION = 1, + VIRTCHNL2_OP_GET_CAPS = 500, + VIRTCHNL2_OP_CREATE_VPORT = 501, + VIRTCHNL2_OP_DESTROY_VPORT = 502, + VIRTCHNL2_OP_ENABLE_VPORT = 503, + VIRTCHNL2_OP_DISABLE_VPORT = 504, + VIRTCHNL2_OP_CONFIG_TX_QUEUES = 505, + VIRTCHNL2_OP_CONFIG_RX_QUEUES = 506, + VIRTCHNL2_OP_ENABLE_QUEUES = 507, + VIRTCHNL2_OP_DISABLE_QUEUES = 508, + VIRTCHNL2_OP_ADD_QUEUES = 509, + VIRTCHNL2_OP_DEL_QUEUES = 510, + VIRTCHNL2_OP_MAP_QUEUE_VECTOR = 511, + VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR = 512, + VIRTCHNL2_OP_GET_RSS_KEY = 513, + VIRTCHNL2_OP_SET_RSS_KEY = 514, + VIRTCHNL2_OP_GET_RSS_LUT = 515, + VIRTCHNL2_OP_SET_RSS_LUT = 516, + VIRTCHNL2_OP_GET_RSS_HASH = 517, + VIRTCHNL2_OP_SET_RSS_HASH = 518, + VIRTCHNL2_OP_SET_SRIOV_VFS = 519, + VIRTCHNL2_OP_ALLOC_VECTORS = 520, + VIRTCHNL2_OP_DEALLOC_VECTORS = 521, + VIRTCHNL2_OP_EVENT = 522, + VIRTCHNL2_OP_GET_STATS = 523, + VIRTCHNL2_OP_RESET_VF = 524, + VIRTCHNL2_OP_GET_EDT_CAPS = 525, + VIRTCHNL2_OP_GET_PTYPE_INFO = 526, + /* Opcode 527 and 528 are reserved for VIRTCHNL2_OP_GET_PTYPE_ID and + * VIRTCHNL2_OP_GET_PTYPE_INFO_RAW. + * Opcodes 529, 530, 531, 532 and 533 are reserved. + */ + VIRTCHNL2_OP_LOOPBACK = 534, + VIRTCHNL2_OP_ADD_MAC_ADDR = 535, + VIRTCHNL2_OP_DEL_MAC_ADDR = 536, + VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE = 537, +}; + +/** + * enum virtchnl2_vport_type - Type of virtual port. + * @VIRTCHNL2_VPORT_TYPE_DEFAULT: Default virtual port type. + */ +enum virtchnl2_vport_type { + VIRTCHNL2_VPORT_TYPE_DEFAULT = 0, +}; + +/** + * enum virtchnl2_queue_model - Type of queue model. + * @VIRTCHNL2_QUEUE_MODEL_SINGLE: Single queue model. + * @VIRTCHNL2_QUEUE_MODEL_SPLIT: Split queue model. + * + * In the single queue model, the same transmit descriptor queue is used by + * software to post descriptors to hardware and by hardware to post completed + * descriptors to software. + * Likewise, the same receive descriptor queue is used by hardware to post + * completions to software and by software to post buffers to hardware. + * + * In the split queue model, hardware uses transmit completion queues to post + * descriptor/buffer completions to software, while software uses transmit + * descriptor queues to post descriptors to hardware. + * Likewise, hardware posts descriptor completions to the receive descriptor + * queue, while software uses receive buffer queues to post buffers to hardware. + */ +enum virtchnl2_queue_model { + VIRTCHNL2_QUEUE_MODEL_SINGLE = 0, + VIRTCHNL2_QUEUE_MODEL_SPLIT = 1, +}; + +/* Checksum offload capability flags */ +enum virtchnl2_cap_txrx_csum { + VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 = BIT(0), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP = BIT(1), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP = BIT(2), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP = BIT(3), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP = BIT(4), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP = BIT(5), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP = BIT(6), + VIRTCHNL2_CAP_TX_CSUM_GENERIC = BIT(7), + VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 = BIT(8), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP = BIT(9), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP = BIT(10), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP = BIT(11), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP = BIT(12), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP = BIT(13), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP = BIT(14), + VIRTCHNL2_CAP_RX_CSUM_GENERIC = BIT(15), + VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL = BIT(16), + VIRTCHNL2_CAP_TX_CSUM_L3_DOUBLE_TUNNEL = BIT(17), + VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL = BIT(18), + VIRTCHNL2_CAP_RX_CSUM_L3_DOUBLE_TUNNEL = BIT(19), + VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL = BIT(20), + VIRTCHNL2_CAP_TX_CSUM_L4_DOUBLE_TUNNEL = BIT(21), + VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL = BIT(22), + VIRTCHNL2_CAP_RX_CSUM_L4_DOUBLE_TUNNEL = BIT(23), +}; + +/* Segmentation offload capability flags */ +enum virtchnl2_cap_seg { + VIRTCHNL2_CAP_SEG_IPV4_TCP = BIT(0), + VIRTCHNL2_CAP_SEG_IPV4_UDP = BIT(1), + VIRTCHNL2_CAP_SEG_IPV4_SCTP = BIT(2), + VIRTCHNL2_CAP_SEG_IPV6_TCP = BIT(3), + VIRTCHNL2_CAP_SEG_IPV6_UDP = BIT(4), + VIRTCHNL2_CAP_SEG_IPV6_SCTP = BIT(5), + VIRTCHNL2_CAP_SEG_GENERIC = BIT(6), + VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL = BIT(7), + VIRTCHNL2_CAP_SEG_TX_DOUBLE_TUNNEL = BIT(8), +}; + +/* Receive Side Scaling Flow type capability flags */ +enum virtchnl2_cap_rss { + VIRTCHNL2_CAP_RSS_IPV4_TCP = BIT(0), + VIRTCHNL2_CAP_RSS_IPV4_UDP = BIT(1), + VIRTCHNL2_CAP_RSS_IPV4_SCTP = BIT(2), + VIRTCHNL2_CAP_RSS_IPV4_OTHER = BIT(3), + VIRTCHNL2_CAP_RSS_IPV6_TCP = BIT(4), + VIRTCHNL2_CAP_RSS_IPV6_UDP = BIT(5), + VIRTCHNL2_CAP_RSS_IPV6_SCTP = BIT(6), + VIRTCHNL2_CAP_RSS_IPV6_OTHER = BIT(7), + VIRTCHNL2_CAP_RSS_IPV4_AH = BIT(8), + VIRTCHNL2_CAP_RSS_IPV4_ESP = BIT(9), + VIRTCHNL2_CAP_RSS_IPV4_AH_ESP = BIT(10), + VIRTCHNL2_CAP_RSS_IPV6_AH = BIT(11), + VIRTCHNL2_CAP_RSS_IPV6_ESP = BIT(12), + VIRTCHNL2_CAP_RSS_IPV6_AH_ESP = BIT(13), +}; + +/* Header split capability flags */ +enum virtchnl2_cap_rx_hsplit_at { + /* for prepended metadata */ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L2 = BIT(0), + /* all VLANs go into header buffer */ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L3 = BIT(1), + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 = BIT(2), + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6 = BIT(3), +}; + +/* Receive Side Coalescing offload capability flags */ +enum virtchnl2_cap_rsc { + VIRTCHNL2_CAP_RSC_IPV4_TCP = BIT(0), + VIRTCHNL2_CAP_RSC_IPV4_SCTP = BIT(1), + VIRTCHNL2_CAP_RSC_IPV6_TCP = BIT(2), + VIRTCHNL2_CAP_RSC_IPV6_SCTP = BIT(3), +}; + +/* Other capability flags */ +enum virtchnl2_cap_other { + VIRTCHNL2_CAP_RDMA = BIT_ULL(0), + VIRTCHNL2_CAP_SRIOV = BIT_ULL(1), + VIRTCHNL2_CAP_MACFILTER = BIT_ULL(2), + VIRTCHNL2_CAP_FLOW_DIRECTOR = BIT_ULL(3), + /* Queue based scheduling using split queue model */ + VIRTCHNL2_CAP_SPLITQ_QSCHED = BIT_ULL(4), + VIRTCHNL2_CAP_CRC = BIT_ULL(5), + VIRTCHNL2_CAP_ADQ = BIT_ULL(6), + VIRTCHNL2_CAP_WB_ON_ITR = BIT_ULL(7), + VIRTCHNL2_CAP_PROMISC = BIT_ULL(8), + VIRTCHNL2_CAP_LINK_SPEED = BIT_ULL(9), + VIRTCHNL2_CAP_INLINE_IPSEC = BIT_ULL(10), + VIRTCHNL2_CAP_LARGE_NUM_QUEUES = BIT_ULL(11), + VIRTCHNL2_CAP_VLAN = BIT_ULL(12), + VIRTCHNL2_CAP_PTP = BIT_ULL(13), + /* EDT: Earliest Departure Time capability used for Timing Wheel */ + VIRTCHNL2_CAP_EDT = BIT_ULL(14), + VIRTCHNL2_CAP_ADV_RSS = BIT_ULL(15), + VIRTCHNL2_CAP_FDIR = BIT_ULL(16), + VIRTCHNL2_CAP_RX_FLEX_DESC = BIT_ULL(17), + VIRTCHNL2_CAP_PTYPE = BIT_ULL(18), + VIRTCHNL2_CAP_LOOPBACK = BIT_ULL(19), + /* Other capability 20 is reserved */ + + /* this must be the last capability */ + VIRTCHNL2_CAP_OEM = BIT_ULL(63), +}; + +/* underlying device type */ +enum virtchl2_device_type { + VIRTCHNL2_MEV_DEVICE = 0, +}; + +/** + * enum virtchnl2_txq_sched_mode - Transmit Queue Scheduling Modes. + * @VIRTCHNL2_TXQ_SCHED_MODE_QUEUE: Queue mode is the legacy mode i.e. inorder + * completions where descriptors and buffers + * are completed at the same time. + * @VIRTCHNL2_TXQ_SCHED_MODE_FLOW: Flow scheduling mode allows for out of order + * packet processing where descriptors are + * cleaned in order, but buffers can be + * completed out of order. + */ +enum virtchnl2_txq_sched_mode { + VIRTCHNL2_TXQ_SCHED_MODE_QUEUE = 0, + VIRTCHNL2_TXQ_SCHED_MODE_FLOW = 1, +}; + +/** + * enum virtchnl2_rxq_flags - Receive Queue Feature flags. + * @VIRTCHNL2_RXQ_RSC: Rx queue RSC flag. + * @VIRTCHNL2_RXQ_HDR_SPLIT: Rx queue header split flag. + * @VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK: When set, packet descriptors are flushed + * by hardware immediately after processing + * each packet. + * @VIRTCHNL2_RX_DESC_SIZE_16BYTE: Rx queue 16 byte descriptor size. + * @VIRTCHNL2_RX_DESC_SIZE_32BYTE: Rx queue 32 byte descriptor size. + */ +enum virtchnl2_rxq_flags { + VIRTCHNL2_RXQ_RSC = BIT(0), + VIRTCHNL2_RXQ_HDR_SPLIT = BIT(1), + VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK = BIT(2), + VIRTCHNL2_RX_DESC_SIZE_16BYTE = BIT(3), + VIRTCHNL2_RX_DESC_SIZE_32BYTE = BIT(4), +}; + +/* Type of RSS algorithm */ +enum virtchnl2_rss_alg { + VIRTCHNL2_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0, + VIRTCHNL2_RSS_ALG_R_ASYMMETRIC = 1, + VIRTCHNL2_RSS_ALG_TOEPLITZ_SYMMETRIC = 2, + VIRTCHNL2_RSS_ALG_XOR_SYMMETRIC = 3, +}; + +/* Type of event */ +enum virtchnl2_event_codes { + VIRTCHNL2_EVENT_UNKNOWN = 0, + VIRTCHNL2_EVENT_LINK_CHANGE = 1, + /* Event type 2, 3 are reserved */ +}; + +/* Transmit and Receive queue types are valid in legacy as well as split queue + * models. With Split Queue model, 2 additional types are introduced - + * TX_COMPLETION and RX_BUFFER. In split queue model, receive corresponds to + * the queue where hardware posts completions. + */ +enum virtchnl2_queue_type { + VIRTCHNL2_QUEUE_TYPE_TX = 0, + VIRTCHNL2_QUEUE_TYPE_RX = 1, + VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION = 2, + VIRTCHNL2_QUEUE_TYPE_RX_BUFFER = 3, + VIRTCHNL2_QUEUE_TYPE_CONFIG_TX = 4, + VIRTCHNL2_QUEUE_TYPE_CONFIG_RX = 5, + /* Queue types 6, 7, 8, 9 are reserved */ + VIRTCHNL2_QUEUE_TYPE_MBX_TX = 10, + VIRTCHNL2_QUEUE_TYPE_MBX_RX = 11, +}; + +/* Interrupt throttling rate index */ +enum virtchnl2_itr_idx { + VIRTCHNL2_ITR_IDX_0 = 0, + VIRTCHNL2_ITR_IDX_1 = 1, +}; + +/** + * enum virtchnl2_mac_addr_type - MAC address types. + * @VIRTCHNL2_MAC_ADDR_PRIMARY: PF/VF driver should set this type for the + * primary/device unicast MAC address filter for + * VIRTCHNL2_OP_ADD_MAC_ADDR and + * VIRTCHNL2_OP_DEL_MAC_ADDR. This allows for the + * underlying control plane function to accurately + * track the MAC address and for VM/function reset. + * + * @VIRTCHNL2_MAC_ADDR_EXTRA: PF/VF driver should set this type for any extra + * unicast and/or multicast filters that are being + * added/deleted via VIRTCHNL2_OP_ADD_MAC_ADDR or + * VIRTCHNL2_OP_DEL_MAC_ADDR. + */ +enum virtchnl2_mac_addr_type { + VIRTCHNL2_MAC_ADDR_PRIMARY = 1, + VIRTCHNL2_MAC_ADDR_EXTRA = 2, +}; + +/* Flags used for promiscuous mode */ +enum virtchnl2_promisc_flags { + VIRTCHNL2_UNICAST_PROMISC = BIT(0), + VIRTCHNL2_MULTICAST_PROMISC = BIT(1), +}; + +/* Protocol header type within a packet segment. A segment consists of one or + * more protocol headers that make up a logical group of protocol headers. Each + * logical group of protocol headers encapsulates or is encapsulated using/by + * tunneling or encapsulation protocols for network virtualization. + */ +enum virtchnl2_proto_hdr_type { + /* VIRTCHNL2_PROTO_HDR_ANY is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_ANY = 0, + VIRTCHNL2_PROTO_HDR_PRE_MAC = 1, + /* VIRTCHNL2_PROTO_HDR_MAC is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_MAC = 2, + VIRTCHNL2_PROTO_HDR_POST_MAC = 3, + VIRTCHNL2_PROTO_HDR_ETHERTYPE = 4, + VIRTCHNL2_PROTO_HDR_VLAN = 5, + VIRTCHNL2_PROTO_HDR_SVLAN = 6, + VIRTCHNL2_PROTO_HDR_CVLAN = 7, + VIRTCHNL2_PROTO_HDR_MPLS = 8, + VIRTCHNL2_PROTO_HDR_UMPLS = 9, + VIRTCHNL2_PROTO_HDR_MMPLS = 10, + VIRTCHNL2_PROTO_HDR_PTP = 11, + VIRTCHNL2_PROTO_HDR_CTRL = 12, + VIRTCHNL2_PROTO_HDR_LLDP = 13, + VIRTCHNL2_PROTO_HDR_ARP = 14, + VIRTCHNL2_PROTO_HDR_ECP = 15, + VIRTCHNL2_PROTO_HDR_EAPOL = 16, + VIRTCHNL2_PROTO_HDR_PPPOD = 17, + VIRTCHNL2_PROTO_HDR_PPPOE = 18, + /* VIRTCHNL2_PROTO_HDR_IPV4 is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV4 = 19, + /* IPv4 and IPv6 Fragment header types are only associated to + * VIRTCHNL2_PROTO_HDR_IPV4 and VIRTCHNL2_PROTO_HDR_IPV6 respectively, + * cannot be used independently. + */ + /* VIRTCHNL2_PROTO_HDR_IPV4_FRAG is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV4_FRAG = 20, + /* VIRTCHNL2_PROTO_HDR_IPV6 is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV6 = 21, + /* VIRTCHNL2_PROTO_HDR_IPV6_FRAG is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV6_FRAG = 22, + VIRTCHNL2_PROTO_HDR_IPV6_EH = 23, + /* VIRTCHNL2_PROTO_HDR_UDP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_UDP = 24, + /* VIRTCHNL2_PROTO_HDR_TCP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_TCP = 25, + /* VIRTCHNL2_PROTO_HDR_SCTP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_SCTP = 26, + /* VIRTCHNL2_PROTO_HDR_ICMP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_ICMP = 27, + /* VIRTCHNL2_PROTO_HDR_ICMPV6 is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_ICMPV6 = 28, + VIRTCHNL2_PROTO_HDR_IGMP = 29, + VIRTCHNL2_PROTO_HDR_AH = 30, + VIRTCHNL2_PROTO_HDR_ESP = 31, + VIRTCHNL2_PROTO_HDR_IKE = 32, + VIRTCHNL2_PROTO_HDR_NATT_KEEP = 33, + /* VIRTCHNL2_PROTO_HDR_PAY is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_PAY = 34, + VIRTCHNL2_PROTO_HDR_L2TPV2 = 35, + VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL = 36, + VIRTCHNL2_PROTO_HDR_L2TPV3 = 37, + VIRTCHNL2_PROTO_HDR_GTP = 38, + VIRTCHNL2_PROTO_HDR_GTP_EH = 39, + VIRTCHNL2_PROTO_HDR_GTPCV2 = 40, + VIRTCHNL2_PROTO_HDR_GTPC_TEID = 41, + VIRTCHNL2_PROTO_HDR_GTPU = 42, + VIRTCHNL2_PROTO_HDR_GTPU_UL = 43, + VIRTCHNL2_PROTO_HDR_GTPU_DL = 44, + VIRTCHNL2_PROTO_HDR_ECPRI = 45, + VIRTCHNL2_PROTO_HDR_VRRP = 46, + VIRTCHNL2_PROTO_HDR_OSPF = 47, + /* VIRTCHNL2_PROTO_HDR_TUN is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_TUN = 48, + VIRTCHNL2_PROTO_HDR_GRE = 49, + VIRTCHNL2_PROTO_HDR_NVGRE = 50, + VIRTCHNL2_PROTO_HDR_VXLAN = 51, + VIRTCHNL2_PROTO_HDR_VXLAN_GPE = 52, + VIRTCHNL2_PROTO_HDR_GENEVE = 53, + VIRTCHNL2_PROTO_HDR_NSH = 54, + VIRTCHNL2_PROTO_HDR_QUIC = 55, + VIRTCHNL2_PROTO_HDR_PFCP = 56, + VIRTCHNL2_PROTO_HDR_PFCP_NODE = 57, + VIRTCHNL2_PROTO_HDR_PFCP_SESSION = 58, + VIRTCHNL2_PROTO_HDR_RTP = 59, + VIRTCHNL2_PROTO_HDR_ROCE = 60, + VIRTCHNL2_PROTO_HDR_ROCEV1 = 61, + VIRTCHNL2_PROTO_HDR_ROCEV2 = 62, + /* Protocol ids up to 32767 are reserved. + * 32768 - 65534 are used for user defined protocol ids. + * VIRTCHNL2_PROTO_HDR_NO_PROTO is a mandatory protocol id. + */ + VIRTCHNL2_PROTO_HDR_NO_PROTO = 65535, +}; + +enum virtchl2_version { + VIRTCHNL2_VERSION_MINOR_0 = 0, + VIRTCHNL2_VERSION_MAJOR_2 = 2, +}; + +/** + * struct virtchnl2_edt_caps - Get EDT granularity and time horizon. + * @tstamp_granularity_ns: Timestamp granularity in nanoseconds. + * @time_horizon_ns: Total time window in nanoseconds. + * + * Associated with VIRTCHNL2_OP_GET_EDT_CAPS. + */ +struct virtchnl2_edt_caps { + __le64 tstamp_granularity_ns; + __le64 time_horizon_ns; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_edt_caps); + +/** + * struct virtchnl2_version_info - Version information. + * @major: Major version. + * @minor: Minor version. + * + * PF/VF posts its version number to the CP. CP responds with its version number + * in the same format, along with a return code. + * If there is a major version mismatch, then the PF/VF cannot operate. + * If there is a minor version mismatch, then the PF/VF can operate but should + * add a warning to the system log. + * + * This version opcode MUST always be specified as == 1, regardless of other + * changes in the API. The CP must always respond to this message without + * error regardless of version mismatch. + * + * Associated with VIRTCHNL2_OP_VERSION. + */ +struct virtchnl2_version_info { + __le32 major; + __le32 minor; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_version_info); + +/** + * struct virtchnl2_get_capabilities - Capabilities info. + * @csum_caps: See enum virtchnl2_cap_txrx_csum. + * @seg_caps: See enum virtchnl2_cap_seg. + * @hsplit_caps: See enum virtchnl2_cap_rx_hsplit_at. + * @rsc_caps: See enum virtchnl2_cap_rsc. + * @rss_caps: See enum virtchnl2_cap_rss. + * @other_caps: See enum virtchnl2_cap_other. + * @mailbox_dyn_ctl: DYN_CTL register offset and vector id for mailbox + * provided by CP. + * @mailbox_vector_id: Mailbox vector id. + * @num_allocated_vectors: Maximum number of allocated vectors for the device. + * @max_rx_q: Maximum number of supported Rx queues. + * @max_tx_q: Maximum number of supported Tx queues. + * @max_rx_bufq: Maximum number of supported buffer queues. + * @max_tx_complq: Maximum number of supported completion queues. + * @max_sriov_vfs: The PF sends the maximum VFs it is requesting. The CP + * responds with the maximum VFs granted. + * @max_vports: Maximum number of vports that can be supported. + * @default_num_vports: Default number of vports driver should allocate on load. + * @max_tx_hdr_size: Max header length hardware can parse/checksum, in bytes. + * @max_sg_bufs_per_tx_pkt: Max number of scatter gather buffers that can be + * sent per transmit packet without needing to be + * linearized. + * @pad: Padding. + * @reserved: Reserved. + * @device_type: See enum virtchl2_device_type. + * @min_sso_packet_len: Min packet length supported by device for single + * segment offload. + * @max_hdr_buf_per_lso: Max number of header buffers that can be used for + * an LSO. + * @pad1: Padding for future extensions. + * + * Dataplane driver sends this message to CP to negotiate capabilities and + * provides a virtchnl2_get_capabilities structure with its desired + * capabilities, max_sriov_vfs and num_allocated_vectors. + * CP responds with a virtchnl2_get_capabilities structure updated + * with allowed capabilities and the other fields as below. + * If PF sets max_sriov_vfs as 0, CP will respond with max number of VFs + * that can be created by this PF. For any other value 'n', CP responds + * with max_sriov_vfs set to min(n, x) where x is the max number of VFs + * allowed by CP's policy. max_sriov_vfs is not applicable for VFs. + * If dataplane driver sets num_allocated_vectors as 0, CP will respond with 1 + * which is default vector associated with the default mailbox. For any other + * value 'n', CP responds with a value <= n based on the CP's policy of + * max number of vectors for a PF. + * CP will respond with the vector ID of mailbox allocated to the PF in + * mailbox_vector_id and the number of itr index registers in itr_idx_map. + * It also responds with default number of vports that the dataplane driver + * should comeup with in default_num_vports and maximum number of vports that + * can be supported in max_vports. + * + * Associated with VIRTCHNL2_OP_GET_CAPS. + */ +struct virtchnl2_get_capabilities { + __le32 csum_caps; + __le32 seg_caps; + __le32 hsplit_caps; + __le32 rsc_caps; + __le64 rss_caps; + __le64 other_caps; + __le32 mailbox_dyn_ctl; + __le16 mailbox_vector_id; + __le16 num_allocated_vectors; + __le16 max_rx_q; + __le16 max_tx_q; + __le16 max_rx_bufq; + __le16 max_tx_complq; + __le16 max_sriov_vfs; + __le16 max_vports; + __le16 default_num_vports; + __le16 max_tx_hdr_size; + u8 max_sg_bufs_per_tx_pkt; + u8 pad[3]; + u8 reserved[4]; + __le32 device_type; + u8 min_sso_packet_len; + u8 max_hdr_buf_per_lso; + u8 pad1[10]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(80, virtchnl2_get_capabilities); + +/** + * struct virtchnl2_queue_reg_chunk - Single queue chunk. + * @type: See enum virtchnl2_queue_type. + * @start_queue_id: Start Queue ID. + * @num_queues: Number of queues in the chunk. + * @pad: Padding. + * @qtail_reg_start: Queue tail register offset. + * @qtail_reg_spacing: Queue tail register spacing. + * @pad1: Padding for future extensions. + */ +struct virtchnl2_queue_reg_chunk { + __le32 type; + __le32 start_queue_id; + __le32 num_queues; + __le32 pad; + __le64 qtail_reg_start; + __le32 qtail_reg_spacing; + u8 pad1[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_queue_reg_chunk); + +/** + * struct virtchnl2_queue_reg_chunks - Specify several chunks of contiguous + * queues. + * @num_chunks: Number of chunks. + * @pad: Padding. + * @chunks: Chunks of queue info. + */ +struct virtchnl2_queue_reg_chunks { + __le16 num_chunks; + u8 pad[6]; + struct virtchnl2_queue_reg_chunk chunks[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_reg_chunks); + +/** + * struct virtchnl2_create_vport - Create vport config info. + * @vport_type: See enum virtchnl2_vport_type. + * @txq_model: See virtchnl2_queue_model. + * @rxq_model: See virtchnl2_queue_model. + * @num_tx_q: Number of Tx queues. + * @num_tx_complq: Valid only if txq_model is split queue. + * @num_rx_q: Number of Rx queues. + * @num_rx_bufq: Valid only if rxq_model is split queue. + * @default_rx_q: Relative receive queue index to be used as default. + * @vport_index: Used to align PF and CP in case of default multiple vports, + * it is filled by the PF and CP returns the same value, to + * enable the driver to support multiple asynchronous parallel + * CREATE_VPORT requests and associate a response to a specific + * request. + * @max_mtu: Max MTU. CP populates this field on response. + * @vport_id: Vport id. CP populates this field on response. + * @default_mac_addr: Default MAC address. + * @pad: Padding. + * @rx_desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions. + * @tx_desc_ids: See VIRTCHNL2_TX_DESC_IDS definitions. + * @pad1: Padding. + * @rss_algorithm: RSS algorithm. + * @rss_key_size: RSS key size. + * @rss_lut_size: RSS LUT size. + * @rx_split_pos: See enum virtchnl2_cap_rx_hsplit_at. + * @pad2: Padding. + * @chunks: Chunks of contiguous queues. + * + * PF sends this message to CP to create a vport by filling in required + * fields of virtchnl2_create_vport structure. + * CP responds with the updated virtchnl2_create_vport structure containing the + * necessary fields followed by chunks which in turn will have an array of + * num_chunks entries of virtchnl2_queue_chunk structures. + * + * Associated with VIRTCHNL2_OP_CREATE_VPORT. + */ +struct virtchnl2_create_vport { + __le16 vport_type; + __le16 txq_model; + __le16 rxq_model; + __le16 num_tx_q; + __le16 num_tx_complq; + __le16 num_rx_q; + __le16 num_rx_bufq; + __le16 default_rx_q; + __le16 vport_index; + /* CP populates the following fields on response */ + __le16 max_mtu; + __le32 vport_id; + u8 default_mac_addr[ETH_ALEN]; + __le16 pad; + __le64 rx_desc_ids; + __le64 tx_desc_ids; + u8 pad1[72]; + __le32 rss_algorithm; + __le16 rss_key_size; + __le16 rss_lut_size; + __le32 rx_split_pos; + u8 pad2[20]; + struct virtchnl2_queue_reg_chunks chunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(160, virtchnl2_create_vport); + +/** + * struct virtchnl2_vport - Vport ID info. + * @vport_id: Vport id. + * @pad: Padding for future extensions. + * + * PF sends this message to CP to destroy, enable or disable a vport by filling + * in the vport_id in virtchnl2_vport structure. + * CP responds with the status of the requested operation. + * + * Associated with VIRTCHNL2_OP_DESTROY_VPORT, VIRTCHNL2_OP_ENABLE_VPORT, + * VIRTCHNL2_OP_DISABLE_VPORT. + */ +struct virtchnl2_vport { + __le32 vport_id; + u8 pad[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_vport); + +/** + * struct virtchnl2_txq_info - Transmit queue config info + * @dma_ring_addr: DMA address. + * @type: See enum virtchnl2_queue_type. + * @queue_id: Queue ID. + * @relative_queue_id: Valid only if queue model is split and type is transmit + * queue. Used in many to one mapping of transmit queues to + * completion queue. + * @model: See enum virtchnl2_queue_model. + * @sched_mode: See enum virtchnl2_txq_sched_mode. + * @qflags: TX queue feature flags. + * @ring_len: Ring length. + * @tx_compl_queue_id: Valid only if queue model is split and type is transmit + * queue. + * @peer_type: Valid only if queue type is VIRTCHNL2_QUEUE_TYPE_MAILBOX_TX + * @peer_rx_queue_id: Valid only if queue type is CONFIG_TX and used to deliver + * messages for the respective CONFIG_TX queue. + * @pad: Padding. + * @egress_pasid: Egress PASID info. + * @egress_hdr_pasid: Egress HDR passid. + * @egress_buf_pasid: Egress buf passid. + * @pad1: Padding for future extensions. + */ +struct virtchnl2_txq_info { + __le64 dma_ring_addr; + __le32 type; + __le32 queue_id; + __le16 relative_queue_id; + __le16 model; + __le16 sched_mode; + __le16 qflags; + __le16 ring_len; + __le16 tx_compl_queue_id; + __le16 peer_type; + __le16 peer_rx_queue_id; + u8 pad[4]; + __le32 egress_pasid; + __le32 egress_hdr_pasid; + __le32 egress_buf_pasid; + u8 pad1[8]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(56, virtchnl2_txq_info); + +/** + * struct virtchnl2_config_tx_queues - TX queue config. + * @vport_id: Vport id. + * @num_qinfo: Number of virtchnl2_txq_info structs. + * @pad: Padding. + * @qinfo: Tx queues config info. + * + * PF sends this message to set up parameters for one or more transmit queues. + * This message contains an array of num_qinfo instances of virtchnl2_txq_info + * structures. CP configures requested queues and returns a status code. If + * num_qinfo specified is greater than the number of queues associated with the + * vport, an error is returned and no queues are configured. + * + * Associated with VIRTCHNL2_OP_CONFIG_TX_QUEUES. + */ +struct virtchnl2_config_tx_queues { + __le32 vport_id; + __le16 num_qinfo; + u8 pad[10]; + struct virtchnl2_txq_info qinfo[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_config_tx_queues); + +/** + * struct virtchnl2_rxq_info - Receive queue config info. + * @desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions. + * @dma_ring_addr: See VIRTCHNL2_RX_DESC_IDS definitions. + * @type: See enum virtchnl2_queue_type. + * @queue_id: Queue id. + * @model: See enum virtchnl2_queue_model. + * @hdr_buffer_size: Header buffer size. + * @data_buffer_size: Data buffer size. + * @max_pkt_size: Max packet size. + * @ring_len: Ring length. + * @buffer_notif_stride: Buffer notification stride in units of 32-descriptors. + * This field must be a power of 2. + * @pad: Padding. + * @dma_head_wb_addr: Applicable only for receive buffer queues. + * @qflags: Applicable only for receive completion queues. + * See enum virtchnl2_rxq_flags. + * @rx_buffer_low_watermark: Rx buffer low watermark. + * @rx_bufq1_id: Buffer queue index of the first buffer queue associated with + * the Rx queue. Valid only in split queue model. + * @rx_bufq2_id: Buffer queue index of the second buffer queue associated with + * the Rx queue. Valid only in split queue model. + * @bufq2_ena: It indicates if there is a second buffer, rx_bufq2_id is valid + * only if this field is set. + * @pad1: Padding. + * @ingress_pasid: Ingress PASID. + * @ingress_hdr_pasid: Ingress PASID header. + * @ingress_buf_pasid: Ingress PASID buffer. + * @pad2: Padding for future extensions. + */ +struct virtchnl2_rxq_info { + __le64 desc_ids; + __le64 dma_ring_addr; + __le32 type; + __le32 queue_id; + __le16 model; + __le16 hdr_buffer_size; + __le32 data_buffer_size; + __le32 max_pkt_size; + __le16 ring_len; + u8 buffer_notif_stride; + u8 pad; + __le64 dma_head_wb_addr; + __le16 qflags; + __le16 rx_buffer_low_watermark; + __le16 rx_bufq1_id; + __le16 rx_bufq2_id; + u8 bufq2_ena; + u8 pad1[3]; + __le32 ingress_pasid; + __le32 ingress_hdr_pasid; + __le32 ingress_buf_pasid; + u8 pad2[16]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(88, virtchnl2_rxq_info); + +/** + * struct virtchnl2_config_rx_queues - Rx queues config. + * @vport_id: Vport id. + * @num_qinfo: Number of instances. + * @pad: Padding. + * @qinfo: Rx queues config info. + * + * PF sends this message to set up parameters for one or more receive queues. + * This message contains an array of num_qinfo instances of virtchnl2_rxq_info + * structures. CP configures requested queues and returns a status code. + * If the number of queues specified is greater than the number of queues + * associated with the vport, an error is returned and no queues are configured. + * + * Associated with VIRTCHNL2_OP_CONFIG_RX_QUEUES. + */ +struct virtchnl2_config_rx_queues { + __le32 vport_id; + __le16 num_qinfo; + u8 pad[18]; + struct virtchnl2_rxq_info qinfo[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_config_rx_queues); + +/** + * struct virtchnl2_add_queues - data for VIRTCHNL2_OP_ADD_QUEUES. + * @vport_id: Vport id. + * @num_tx_q: Number of Tx qieues. + * @num_tx_complq: Number of Tx completion queues. + * @num_rx_q: Number of Rx queues. + * @num_rx_bufq: Number of Rx buffer queues. + * @pad: Padding. + * @chunks: Chunks of contiguous queues. + * + * PF sends this message to request additional transmit/receive queues beyond + * the ones that were assigned via CREATE_VPORT request. virtchnl2_add_queues + * structure is used to specify the number of each type of queues. + * CP responds with the same structure with the actual number of queues assigned + * followed by num_chunks of virtchnl2_queue_chunk structures. + * + * Associated with VIRTCHNL2_OP_ADD_QUEUES. + */ +struct virtchnl2_add_queues { + __le32 vport_id; + __le16 num_tx_q; + __le16 num_tx_complq; + __le16 num_rx_q; + __le16 num_rx_bufq; + u8 pad[4]; + struct virtchnl2_queue_reg_chunks chunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_add_queues); + +/** + * struct virtchnl2_vector_chunk - Structure to specify a chunk of contiguous + * interrupt vectors. + * @start_vector_id: Start vector id. + * @start_evv_id: Start EVV id. + * @num_vectors: Number of vectors. + * @pad: Padding. + * @dynctl_reg_start: DYN_CTL register offset. + * @dynctl_reg_spacing: register spacing between DYN_CTL registers of 2 + * consecutive vectors. + * @itrn_reg_start: ITRN register offset. + * @itrn_reg_spacing: Register spacing between dynctl registers of 2 + * consecutive vectors. + * @itrn_index_spacing: Register spacing between itrn registers of the same + * vector where n=0..2. + * @pad1: Padding for future extensions. + * + * Register offsets and spacing provided by CP. + * Dynamic control registers are used for enabling/disabling/re-enabling + * interrupts and updating interrupt rates in the hotpath. Any changes + * to interrupt rates in the dynamic control registers will be reflected + * in the interrupt throttling rate registers. + * itrn registers are used to update interrupt rates for specific + * interrupt indices without modifying the state of the interrupt. + */ +struct virtchnl2_vector_chunk { + __le16 start_vector_id; + __le16 start_evv_id; + __le16 num_vectors; + __le16 pad; + __le32 dynctl_reg_start; + __le32 dynctl_reg_spacing; + __le32 itrn_reg_start; + __le32 itrn_reg_spacing; + __le32 itrn_index_spacing; + u8 pad1[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_vector_chunk); + +/** + * struct virtchnl2_vector_chunks - chunks of contiguous interrupt vectors. + * @num_vchunks: number of vector chunks. + * @pad: Padding. + * @vchunks: Chunks of contiguous vector info. + * + * PF sends virtchnl2_vector_chunks struct to specify the vectors it is giving + * away. CP performs requested action and returns status. + * + * Associated with VIRTCHNL2_OP_DEALLOC_VECTORS. + */ +struct virtchnl2_vector_chunks { + __le16 num_vchunks; + u8 pad[14]; + struct virtchnl2_vector_chunk vchunks[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_vector_chunks); + +/** + * struct virtchnl2_alloc_vectors - vector allocation info. + * @num_vectors: Number of vectors. + * @pad: Padding. + * @vchunks: Chunks of contiguous vector info. + * + * PF sends this message to request additional interrupt vectors beyond the + * ones that were assigned via GET_CAPS request. virtchnl2_alloc_vectors + * structure is used to specify the number of vectors requested. CP responds + * with the same structure with the actual number of vectors assigned followed + * by virtchnl2_vector_chunks structure identifying the vector ids. + * + * Associated with VIRTCHNL2_OP_ALLOC_VECTORS. + */ +struct virtchnl2_alloc_vectors { + __le16 num_vectors; + u8 pad[14]; + struct virtchnl2_vector_chunks vchunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_alloc_vectors); + +/** + * struct virtchnl2_rss_lut - RSS LUT info. + * @vport_id: Vport id. + * @lut_entries_start: Start of LUT entries. + * @lut_entries: Number of LUT entrties. + * @pad: Padding. + * @lut: RSS lookup table. + * + * PF sends this message to get or set RSS lookup table. Only supported if + * both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration + * negotiation. + * + * Associated with VIRTCHNL2_OP_GET_RSS_LUT and VIRTCHNL2_OP_SET_RSS_LUT. + */ +struct virtchnl2_rss_lut { + __le32 vport_id; + __le16 lut_entries_start; + __le16 lut_entries; + u8 pad[4]; + __le32 lut[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(12, virtchnl2_rss_lut); + +/** + * struct virtchnl2_rss_hash - RSS hash info. + * @ptype_groups: Packet type groups bitmap. + * @vport_id: Vport id. + * @pad: Padding for future extensions. + * + * PF sends these messages to get and set the hash filter enable bits for RSS. + * By default, the CP sets these to all possible traffic types that the + * hardware supports. The PF can query this value if it wants to change the + * traffic types that are hashed by the hardware. + * Only supported if both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit + * during configuration negotiation. + * + * Associated with VIRTCHNL2_OP_GET_RSS_HASH and VIRTCHNL2_OP_SET_RSS_HASH + */ +struct virtchnl2_rss_hash { + __le64 ptype_groups; + __le32 vport_id; + u8 pad[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_rss_hash); + +/** + * struct virtchnl2_sriov_vfs_info - VFs info. + * @num_vfs: Number of VFs. + * @pad: Padding for future extensions. + * + * This message is used to set number of SRIOV VFs to be created. The actual + * allocation of resources for the VFs in terms of vport, queues and interrupts + * is done by CP. When this call completes, the IDPF driver calls + * pci_enable_sriov to let the OS instantiate the SRIOV PCIE devices. + * The number of VFs set to 0 will destroy all the VFs of this function. + * + * Associated with VIRTCHNL2_OP_SET_SRIOV_VFS. + */ +struct virtchnl2_sriov_vfs_info { + __le16 num_vfs; + __le16 pad; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(4, virtchnl2_sriov_vfs_info); + +/** + * struct virtchnl2_ptype - Packet type info. + * @ptype_id_10: 10-bit packet type. + * @ptype_id_8: 8-bit packet type. + * @proto_id_count: Number of protocol ids the packet supports, maximum of 32 + * protocol ids are supported. + * @pad: Padding. + * @proto_id: proto_id_count decides the allocation of protocol id array. + * See enum virtchnl2_proto_hdr_type. + * + * Based on the descriptor type the PF supports, CP fills ptype_id_10 or + * ptype_id_8 for flex and base descriptor respectively. If ptype_id_10 value + * is set to 0xFFFF, PF should consider this ptype as dummy one and it is the + * last ptype. + */ +struct virtchnl2_ptype { + __le16 ptype_id_10; + u8 ptype_id_8; + u8 proto_id_count; + __le16 pad; + __le16 proto_id[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); + +/** + * struct virtchnl2_get_ptype_info - Packet type info. + * @start_ptype_id: Starting ptype ID. + * @num_ptypes: Number of packet types from start_ptype_id. + * @pad: Padding for future extensions. + * + * The total number of supported packet types is based on the descriptor type. + * For the flex descriptor, it is 1024 (10-bit ptype), and for the base + * descriptor, it is 256 (8-bit ptype). Send this message to the CP by + * populating the 'start_ptype_id' and the 'num_ptypes'. CP responds with the + * 'start_ptype_id', 'num_ptypes', and the array of ptype (virtchnl2_ptype) that + * are added at the end of the 'virtchnl2_get_ptype_info' message (Note: There + * is no specific field for the ptypes but are added at the end of the + * ptype info message. PF/VF is expected to extract the ptypes accordingly. + * Reason for doing this is because compiler doesn't allow nested flexible + * array fields). + * + * If all the ptypes don't fit into one mailbox buffer, CP splits the + * ptype info into multiple messages, where each message will have its own + * 'start_ptype_id', 'num_ptypes', and the ptype array itself. When CP is done + * updating all the ptype information extracted from the package (the number of + * ptypes extracted might be less than what PF/VF expects), it will append a + * dummy ptype (which has 'ptype_id_10' of 'struct virtchnl2_ptype' as 0xFFFF) + * to the ptype array. + * + * PF/VF is expected to receive multiple VIRTCHNL2_OP_GET_PTYPE_INFO messages. + * + * Associated with VIRTCHNL2_OP_GET_PTYPE_INFO. + */ +struct virtchnl2_get_ptype_info { + __le16 start_ptype_id; + __le16 num_ptypes; + __le32 pad; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_get_ptype_info); + +/** + * struct virtchnl2_vport_stats - Vport statistics. + * @vport_id: Vport id. + * @pad: Padding. + * @rx_bytes: Received bytes. + * @rx_unicast: Received unicast packets. + * @rx_multicast: Received multicast packets. + * @rx_broadcast: Received broadcast packets. + * @rx_discards: Discarded packets on receive. + * @rx_errors: Receive errors. + * @rx_unknown_protocol: Unlnown protocol. + * @tx_bytes: Transmitted bytes. + * @tx_unicast: Transmitted unicast packets. + * @tx_multicast: Transmitted multicast packets. + * @tx_broadcast: Transmitted broadcast packets. + * @tx_discards: Discarded packets on transmit. + * @tx_errors: Transmit errors. + * @rx_invalid_frame_length: Packets with invalid frame length. + * @rx_overflow_drop: Packets dropped on buffer overflow. + * + * PF/VF sends this message to CP to get the update stats by specifying the + * vport_id. CP responds with stats in struct virtchnl2_vport_stats. + * + * Associated with VIRTCHNL2_OP_GET_STATS. + */ +struct virtchnl2_vport_stats { + __le32 vport_id; + u8 pad[4]; + __le64 rx_bytes; + __le64 rx_unicast; + __le64 rx_multicast; + __le64 rx_broadcast; + __le64 rx_discards; + __le64 rx_errors; + __le64 rx_unknown_protocol; + __le64 tx_bytes; + __le64 tx_unicast; + __le64 tx_multicast; + __le64 tx_broadcast; + __le64 tx_discards; + __le64 tx_errors; + __le64 rx_invalid_frame_length; + __le64 rx_overflow_drop; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(128, virtchnl2_vport_stats); + +/** + * struct virtchnl2_event - Event info. + * @event: Event opcode. See enum virtchnl2_event_codes. + * @link_speed: Link_speed provided in Mbps. + * @vport_id: Vport ID. + * @link_status: Link status. + * @pad: Padding. + * @reserved: Reserved. + * + * CP sends this message to inform the PF/VF driver of events that may affect + * it. No direct response is expected from the driver, though it may generate + * other messages in response to this one. + * + * Associated with VIRTCHNL2_OP_EVENT. + */ +struct virtchnl2_event { + __le32 event; + __le32 link_speed; + __le32 vport_id; + u8 link_status; + u8 pad; + __le16 reserved; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_event); + +/** + * struct virtchnl2_rss_key - RSS key info. + * @vport_id: Vport id. + * @key_len: Length of RSS key. + * @pad: Padding. + * @key_flex: RSS hash key, packed bytes. + * PF/VF sends this message to get or set RSS key. Only supported if both + * PF/VF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration + * negotiation. + * + * Associated with VIRTCHNL2_OP_GET_RSS_KEY and VIRTCHNL2_OP_SET_RSS_KEY. + */ +struct virtchnl2_rss_key { + __le32 vport_id; + __le16 key_len; + u8 pad; + __DECLARE_FLEX_ARRAY(u8, key_flex); +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_rss_key); + +/** + * struct virtchnl2_queue_chunk - chunk of contiguous queues + * @type: See enum virtchnl2_queue_type. + * @start_queue_id: Starting queue id. + * @num_queues: Number of queues. + * @pad: Padding for future extensions. + */ +struct virtchnl2_queue_chunk { + __le32 type; + __le32 start_queue_id; + __le32 num_queues; + u8 pad[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_chunk); + +/* struct virtchnl2_queue_chunks - chunks of contiguous queues + * @num_chunks: Number of chunks. + * @pad: Padding. + * @chunks: Chunks of contiguous queues info. + */ +struct virtchnl2_queue_chunks { + __le16 num_chunks; + u8 pad[6]; + struct virtchnl2_queue_chunk chunks[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_chunks); + +/** + * struct virtchnl2_del_ena_dis_queues - Enable/disable queues info. + * @vport_id: Vport id. + * @pad: Padding. + * @chunks: Chunks of contiguous queues info. + * + * PF sends these messages to enable, disable or delete queues specified in + * chunks. PF sends virtchnl2_del_ena_dis_queues struct to specify the queues + * to be enabled/disabled/deleted. Also applicable to single queue receive or + * transmit. CP performs requested action and returns status. + * + * Associated with VIRTCHNL2_OP_ENABLE_QUEUES, VIRTCHNL2_OP_DISABLE_QUEUES and + * VIRTCHNL2_OP_DISABLE_QUEUES. + */ +struct virtchnl2_del_ena_dis_queues { + __le32 vport_id; + u8 pad[4]; + struct virtchnl2_queue_chunks chunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_del_ena_dis_queues); + +/** + * struct virtchnl2_queue_vector - Queue to vector mapping. + * @queue_id: Queue id. + * @vector_id: Vector id. + * @pad: Padding. + * @itr_idx: See enum virtchnl2_itr_idx. + * @queue_type: See enum virtchnl2_queue_type. + * @pad1: Padding for future extensions. + */ +struct virtchnl2_queue_vector { + __le32 queue_id; + __le16 vector_id; + u8 pad[2]; + __le32 itr_idx; + __le32 queue_type; + u8 pad1[8]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_queue_vector); + +/** + * struct virtchnl2_queue_vector_maps - Map/unmap queues info. + * @vport_id: Vport id. + * @num_qv_maps: Number of queue vector maps. + * @pad: Padding. + * @qv_maps: Queue to vector maps. + * + * PF sends this message to map or unmap queues to vectors and interrupt + * throttling rate index registers. External data buffer contains + * virtchnl2_queue_vector_maps structure that contains num_qv_maps of + * virtchnl2_queue_vector structures. CP maps the requested queue vector maps + * after validating the queue and vector ids and returns a status code. + * + * Associated with VIRTCHNL2_OP_MAP_QUEUE_VECTOR and + * VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR. + */ +struct virtchnl2_queue_vector_maps { + __le32 vport_id; + __le16 num_qv_maps; + u8 pad[10]; + struct virtchnl2_queue_vector qv_maps[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_vector_maps); + +/** + * struct virtchnl2_loopback - Loopback info. + * @vport_id: Vport id. + * @enable: Enable/disable. + * @pad: Padding for future extensions. + * + * PF/VF sends this message to transition to/from the loopback state. Setting + * the 'enable' to 1 enables the loopback state and setting 'enable' to 0 + * disables it. CP configures the state to loopback and returns status. + * + * Associated with VIRTCHNL2_OP_LOOPBACK. + */ +struct virtchnl2_loopback { + __le32 vport_id; + u8 enable; + u8 pad[3]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_loopback); + +/* struct virtchnl2_mac_addr - MAC address info. + * @addr: MAC address. + * @type: MAC type. See enum virtchnl2_mac_addr_type. + * @pad: Padding for future extensions. + */ +struct virtchnl2_mac_addr { + u8 addr[ETH_ALEN]; + u8 type; + u8 pad; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr); + +/** + * struct virtchnl2_mac_addr_list - List of MAC addresses. + * @vport_id: Vport id. + * @num_mac_addr: Number of MAC addresses. + * @pad: Padding. + * @mac_addr_list: List with MAC address info. + * + * PF/VF driver uses this structure to send list of MAC addresses to be + * added/deleted to the CP where as CP performs the action and returns the + * status. + * + * Associated with VIRTCHNL2_OP_ADD_MAC_ADDR and VIRTCHNL2_OP_DEL_MAC_ADDR. + */ +struct virtchnl2_mac_addr_list { + __le32 vport_id; + __le16 num_mac_addr; + u8 pad[2]; + struct virtchnl2_mac_addr mac_addr_list[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr_list); + +/** + * struct virtchnl2_promisc_info - Promisc type info. + * @vport_id: Vport id. + * @flags: See enum virtchnl2_promisc_flags. + * @pad: Padding for future extensions. + * + * PF/VF sends vport id and flags to the CP where as CP performs the action + * and returns the status. + * + * Associated with VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE. + */ +struct virtchnl2_promisc_info { + __le32 vport_id; + /* See VIRTCHNL2_PROMISC_FLAGS definitions */ + __le16 flags; + u8 pad[2]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_promisc_info); + +#endif /* _VIRTCHNL_2_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h new file mode 100644 index 000000000000..f1b577f1c452 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h @@ -0,0 +1,451 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _VIRTCHNL2_LAN_DESC_H_ +#define _VIRTCHNL2_LAN_DESC_H_ + +#include <linux/bits.h> + +/* This is an interface definition file where existing enums and their values + * must remain unchanged over time, so we specify explicit values for all enums. + */ + +/* Transmit descriptor ID flags + */ +enum virtchnl2_tx_desc_ids { + VIRTCHNL2_TXDID_DATA = BIT(0), + VIRTCHNL2_TXDID_CTX = BIT(1), + /* TXDID bit 2 is reserved + * TXDID bit 3 is free for future use + * TXDID bit 4 is reserved + */ + VIRTCHNL2_TXDID_FLEX_TSO_CTX = BIT(5), + /* TXDID bit 6 is reserved */ + VIRTCHNL2_TXDID_FLEX_L2TAG1_L2TAG2 = BIT(7), + /* TXDID bits 8 and 9 are free for future use + * TXDID bit 10 is reserved + * TXDID bit 11 is free for future use + */ + VIRTCHNL2_TXDID_FLEX_FLOW_SCHED = BIT(12), + /* TXDID bits 13 and 14 are free for future use */ + VIRTCHNL2_TXDID_DESC_DONE = BIT(15), +}; + +/* Receive descriptor IDs */ +enum virtchnl2_rx_desc_ids { + VIRTCHNL2_RXDID_1_32B_BASE = 1, + /* FLEX_SQ_NIC and FLEX_SPLITQ share desc ids because they can be + * differentiated based on queue model; e.g. single queue model can + * only use FLEX_SQ_NIC and split queue model can only use FLEX_SPLITQ + * for DID 2. + */ + VIRTCHNL2_RXDID_2_FLEX_SPLITQ = 2, + VIRTCHNL2_RXDID_2_FLEX_SQ_NIC = VIRTCHNL2_RXDID_2_FLEX_SPLITQ, + /* 3 through 6 are reserved */ + VIRTCHNL2_RXDID_7_HW_RSVD = 7, + /* 8 through 15 are free */ +}; + +/* Receive descriptor ID bitmasks */ +#define VIRTCHNL2_RXDID_M(bit) BIT_ULL(VIRTCHNL2_RXDID_##bit) + +enum virtchnl2_rx_desc_id_bitmasks { + VIRTCHNL2_RXDID_1_32B_BASE_M = VIRTCHNL2_RXDID_M(1_32B_BASE), + VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M = VIRTCHNL2_RXDID_M(2_FLEX_SPLITQ), + VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL2_RXDID_M(2_FLEX_SQ_NIC), + VIRTCHNL2_RXDID_7_HW_RSVD_M = VIRTCHNL2_RXDID_M(7_HW_RSVD), +}; + +/* For splitq virtchnl2_rx_flex_desc_adv_nic_3 desc members */ +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M GENMASK(3, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_M GENMASK(7, 6) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M GENMASK(9, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S 12 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_M GENMASK(15, 13) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M GENMASK(13, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S 14 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S 15 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M GENMASK(9, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S 10 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S 11 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_S 12 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M GENMASK(14, 12) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S 15 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S) + +/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */ +enum virtchl2_rx_flex_desc_adv_status_error_0_qw1_bits { + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_DD_M = BIT(0), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M = BIT(1), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_M = BIT(7), +}; + +/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */ +enum virtchnl2_rx_flex_desc_adv_status_error_0_qw0_bits { + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_LPBK_M = BIT(0), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M = BIT(1), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RXE_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_CRCP_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L2TAG1P_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD0_VALID_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD1_VALID_M = BIT(7), +}; + +/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */ +enum virtchnl2_rx_flex_desc_adv_status_error_1_bits { + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_RSVD_M = GENMASK(1, 0), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_ATRAEFAIL_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_L2TAG2P_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD2_VALID_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD3_VALID_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD4_VALID_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD5_VALID_M = BIT(7), +}; + +/* For singleq (flex) virtchnl2_rx_flex_desc fields + * For virtchnl2_rx_flex_desc.ptype_flex_flags0 member + */ +#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_M GENMASK(9, 0) + +/* For virtchnl2_rx_flex_desc.pkt_len member */ +#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M GENMASK(13, 0) + +/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */ +enum virtchnl2_rx_flex_desc_status_error_0_bits { + VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_M = BIT(0), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_EOF_M = BIT(1), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_HBO_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M = BIT(7), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_LPBK_M = BIT(8), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M = BIT(9), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_RXE_M = BIT(10), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_CRCP_M = BIT(11), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M = BIT(12), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_L2TAG1P_M = BIT(13), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_M = BIT(14), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_M = BIT(15), +}; + +/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */ +enum virtchnl2_rx_flex_desc_status_error_1_bits { + VIRTCHNL2_RX_FLEX_DESC_STATUS1_CPM_M = GENMASK(3, 0), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_CRYPTO_M = BIT(5), + /* [10:6] reserved */ + VIRTCHNL2_RX_FLEX_DESC_STATUS1_L2TAG2P_M = BIT(11), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD2_VALID_M = BIT(12), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD3_VALID_M = BIT(13), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_M = BIT(14), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_M = BIT(15), +}; + +/* For virtchnl2_rx_flex_desc.ts_low member */ +#define VIRTCHNL2_RX_FLEX_TSTAMP_VALID BIT(0) + +/* For singleq (non flex) virtchnl2_singleq_base_rx_desc legacy desc members */ +#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M GENMASK_ULL(51, 38) +#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M GENMASK_ULL(37, 30) +#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M GENMASK_ULL(26, 19) +#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M GENMASK_ULL(18, 0) + +/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */ +enum virtchnl2_rx_base_desc_status_bits { + VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M = BIT(0), + VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M = BIT(1), + VIRTCHNL2_RX_BASE_DESC_STATUS_L2TAG1P_M = BIT(2), + VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M = BIT(3), + VIRTCHNL2_RX_BASE_DESC_STATUS_CRCP_M = BIT(4), + VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD_M = GENMASK(7, 5), + VIRTCHNL2_RX_BASE_DESC_STATUS_EXT_UDP_0_M = BIT(8), + VIRTCHNL2_RX_BASE_DESC_STATUS_UMBCAST_M = GENMASK(10, 9), + VIRTCHNL2_RX_BASE_DESC_STATUS_FLM_M = BIT(11), + VIRTCHNL2_RX_BASE_DESC_STATUS_FLTSTAT_M = GENMASK(13, 12), + VIRTCHNL2_RX_BASE_DESC_STATUS_LPBK_M = BIT(14), + VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M = BIT(15), + VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD1_M = GENMASK(17, 16), + VIRTCHNL2_RX_BASE_DESC_STATUS_INT_UDP_0_M = BIT(18), +}; + +/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */ +enum virtchnl2_rx_base_desc_error_bits { + VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M = BIT(0), + VIRTCHNL2_RX_BASE_DESC_ERROR_ATRAEFAIL_M = BIT(1), + VIRTCHNL2_RX_BASE_DESC_ERROR_HBO_M = BIT(2), + VIRTCHNL2_RX_BASE_DESC_ERROR_L3L4E_M = GENMASK(5, 3), + VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M = BIT(3), + VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M = BIT(4), + VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M = BIT(5), + VIRTCHNL2_RX_BASE_DESC_ERROR_OVERSIZE_M = BIT(6), + VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M = BIT(7), +}; + +/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */ +#define VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M GENMASK(13, 12) + +/** + * struct virtchnl2_splitq_rx_buf_desc - SplitQ RX buffer descriptor format + * @qword0: RX buffer struct. + * @qword0.buf_id: Buffer identifier. + * @qword0.rsvd0: Reserved. + * @qword0.rsvd1: Reserved. + * @pkt_addr: Packet buffer address. + * @hdr_addr: Header buffer address. + * @rsvd2: Rerserved. + * + * Receive Descriptors + * SplitQ buffer + * | 16| 0| + * ---------------------------------------------------------------- + * | RSV | Buffer ID | + * ---------------------------------------------------------------- + * | Rx packet buffer address | + * ---------------------------------------------------------------- + * | Rx header buffer address | + * ---------------------------------------------------------------- + * | RSV | + * ---------------------------------------------------------------- + * | 0| + */ +struct virtchnl2_splitq_rx_buf_desc { + struct { + __le16 buf_id; + __le16 rsvd0; + __le32 rsvd1; + } qword0; + __le64 pkt_addr; + __le64 hdr_addr; + __le64 rsvd2; +}; + +/** + * struct virtchnl2_singleq_rx_buf_desc - SingleQ RX buffer descriptor format. + * @pkt_addr: Packet buffer address. + * @hdr_addr: Header buffer address. + * @rsvd1: Reserved. + * @rsvd2: Reserved. + * + * SingleQ buffer + * | 0| + * ---------------------------------------------------------------- + * | Rx packet buffer address | + * ---------------------------------------------------------------- + * | Rx header buffer address | + * ---------------------------------------------------------------- + * | RSV | + * ---------------------------------------------------------------- + * | RSV | + * ---------------------------------------------------------------- + * | 0| + */ +struct virtchnl2_singleq_rx_buf_desc { + __le64 pkt_addr; + __le64 hdr_addr; + __le64 rsvd1; + __le64 rsvd2; +}; + +/** + * struct virtchnl2_singleq_base_rx_desc - RX descriptor writeback format. + * @qword0: First quad word struct. + * @qword0.lo_dword: Lower dual word struct. + * @qword0.lo_dword.mirroring_status: Mirrored packet status. + * @qword0.lo_dword.l2tag1: Stripped L2 tag from the received packet. + * @qword0.hi_dword: High dual word union. + * @qword0.hi_dword.rss: RSS hash. + * @qword0.hi_dword.fd_id: Flow director filter id. + * @qword1: Second quad word struct. + * @qword1.status_error_ptype_len: Status/error/PTYPE/length. + * @qword2: Third quad word struct. + * @qword2.ext_status: Extended status. + * @qword2.rsvd: Reserved. + * @qword2.l2tag2_1: Extracted L2 tag 2 from the packet. + * @qword2.l2tag2_2: Reserved. + * @qword3: Fourth quad word struct. + * @qword3.reserved: Reserved. + * @qword3.fd_id: Flow director filter id. + * + * Profile ID 0x1, SingleQ, base writeback format + */ +struct virtchnl2_singleq_base_rx_desc { + struct { + struct { + __le16 mirroring_status; + __le16 l2tag1; + } lo_dword; + union { + __le32 rss; + __le32 fd_id; + } hi_dword; + } qword0; + struct { + __le64 status_error_ptype_len; + } qword1; + struct { + __le16 ext_status; + __le16 rsvd; + __le16 l2tag2_1; + __le16 l2tag2_2; + } qword2; + struct { + __le32 reserved; + __le32 fd_id; + } qword3; +}; + +/** + * struct virtchnl2_rx_flex_desc_nic - RX descriptor writeback format. + * + * @rxdid: Descriptor builder profile id. + * @mir_id_umb_cast: umb_cast=[7:6], mirror=[5:0] + * @ptype_flex_flags0: ff0=[15:10], ptype=[9:0] + * @pkt_len: Packet length, [15:14] are reserved. + * @hdr_len_sph_flex_flags1: ff1/ext=[15:12], sph=[11], header=[10:0]. + * @status_error0: Status/Error section 0. + * @l2tag1: Stripped L2 tag from the received packet + * @rss_hash: RSS hash. + * @status_error1: Status/Error section 1. + * @flexi_flags2: Flexible flags section 2. + * @ts_low: Lower word of timestamp value. + * @l2tag2_1st: First L2TAG2. + * @l2tag2_2nd: Second L2TAG2. + * @flow_id: Flow id. + * @flex_ts: Timestamp and flexible flow id union. + * @flex_ts.ts_high: Timestamp higher word of the timestamp value. + * @flex_ts.flex.rsvd: Reserved. + * @flex_ts.flex.flow_id_ipv6: IPv6 flow id. + * + * Profile ID 0x2, SingleQ, flex writeback format + */ +struct virtchnl2_rx_flex_desc_nic { + /* Qword 0 */ + u8 rxdid; + u8 mir_id_umb_cast; + __le16 ptype_flex_flags0; + __le16 pkt_len; + __le16 hdr_len_sph_flex_flags1; + /* Qword 1 */ + __le16 status_error0; + __le16 l2tag1; + __le32 rss_hash; + /* Qword 2 */ + __le16 status_error1; + u8 flexi_flags2; + u8 ts_low; + __le16 l2tag2_1st; + __le16 l2tag2_2nd; + /* Qword 3 */ + __le32 flow_id; + union { + struct { + __le16 rsvd; + __le16 flow_id_ipv6; + } flex; + __le32 ts_high; + } flex_ts; +}; + +/** + * struct virtchnl2_rx_flex_desc_adv_nic_3 - RX descriptor writeback format. + * @rxdid_ucast: ucast=[7:6], rsvd=[5:4], profile_id=[3:0]. + * @status_err0_qw0: Status/Error section 0 in quad word 0. + * @ptype_err_fflags0: ff0=[15:12], udp_len_err=[11], ip_hdr_err=[10], + * ptype=[9:0]. + * @pktlen_gen_bufq_id: bufq_id=[15] only in splitq, gen=[14] only in splitq, + * plen=[13:0]. + * @hdrlen_flags: miss_prepend=[15], trunc_mirr=[14], int_udp_0=[13], + * ext_udp0=[12], sph=[11] only in splitq, rsc=[10] + * only in splitq, header=[9:0]. + * @status_err0_qw1: Status/Error section 0 in quad word 1. + * @status_err1: Status/Error section 1. + * @fflags1: Flexible flags section 1. + * @ts_low: Lower word of timestamp value. + * @buf_id: Buffer identifier. Only in splitq mode. + * @misc: Union. + * @misc.raw_cs: Raw checksum. + * @misc.l2tag1: Stripped L2 tag from the received packet + * @misc.rscseglen: + * @hash1: Lower bits of Rx hash value. + * @ff2_mirrid_hash2: Union. + * @ff2_mirrid_hash2.fflags2: Flexible flags section 2. + * @ff2_mirrid_hash2.mirrorid: Mirror id. + * @ff2_mirrid_hash2.rscseglen: RSC segment length. + * @hash3: Upper bits of Rx hash value. + * @l2tag2: Extracted L2 tag 2 from the packet. + * @fmd4: Flexible metadata container 4. + * @l2tag1: Stripped L2 tag from the received packet + * @fmd6: Flexible metadata container 6. + * @ts_high: Timestamp higher word of the timestamp value. + * + * Profile ID 0x2, SplitQ, flex writeback format + * + * Flex-field 0: BufferID + * Flex-field 1: Raw checksum/L2TAG1/RSC Seg Len (determined by HW) + * Flex-field 2: Hash[15:0] + * Flex-flags 2: Hash[23:16] + * Flex-field 3: L2TAG2 + * Flex-field 5: L2TAG1 + * Flex-field 7: Timestamp (upper 32 bits) + */ +struct virtchnl2_rx_flex_desc_adv_nic_3 { + /* Qword 0 */ + u8 rxdid_ucast; + u8 status_err0_qw0; + __le16 ptype_err_fflags0; + __le16 pktlen_gen_bufq_id; + __le16 hdrlen_flags; + /* Qword 1 */ + u8 status_err0_qw1; + u8 status_err1; + u8 fflags1; + u8 ts_low; + __le16 buf_id; + union { + __le16 raw_cs; + __le16 l2tag1; + __le16 rscseglen; + } misc; + /* Qword 2 */ + __le16 hash1; + union { + u8 fflags2; + u8 mirrorid; + u8 hash2; + } ff2_mirrid_hash2; + u8 hash3; + __le16 l2tag2; + __le16 fmd4; + /* Qword 3 */ + __le16 l2tag1; + __le16 fmd6; + __le32 ts_high; +}; + +/* Common union for accessing descriptor format structs */ +union virtchnl2_rx_desc { + struct virtchnl2_singleq_base_rx_desc base_wb; + struct virtchnl2_rx_flex_desc_nic flex_nic_wb; + struct virtchnl2_rx_flex_desc_adv_nic_3 flex_adv_nic_3_wb; +}; + +#endif /* _VIRTCHNL_LAN_DESC_H_ */ diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 5f6ae11212ae..81cf3361a1e5 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1380,13 +1380,11 @@ static int korina_probe(struct platform_device *pdev) return rc; } -static int korina_remove(struct platform_device *pdev) +static void korina_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); unregister_netdev(dev); - - return 0; } #ifdef CONFIG_OF @@ -1405,7 +1403,7 @@ static struct platform_driver korina_driver = { .of_match_table = of_match_ptr(korina_match), }, .probe = korina_probe, - .remove = korina_remove, + .remove_new = korina_remove, }; module_platform_driver(korina_driver); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index f5961bdcc480..1d5b7bb6380f 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -721,8 +721,7 @@ err_out: return err; } -static int -ltq_etop_remove(struct platform_device *pdev) +static void ltq_etop_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); @@ -732,11 +731,10 @@ ltq_etop_remove(struct platform_device *pdev) ltq_etop_mdio_cleanup(dev); unregister_netdev(dev); } - return 0; } static struct platform_driver ltq_mii_driver = { - .remove = ltq_etop_remove, + .remove_new = ltq_etop_remove, .driver = { .name = "ltq_etop", }, diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 8d646c7f8c82..8bd4def3622e 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -641,7 +641,7 @@ err_uninit_dma: return err; } -static int xrx200_remove(struct platform_device *pdev) +static void xrx200_remove(struct platform_device *pdev) { struct xrx200_priv *priv = platform_get_drvdata(pdev); struct net_device *net_dev = priv->net_dev; @@ -659,8 +659,6 @@ static int xrx200_remove(struct platform_device *pdev) /* shut down hardware */ xrx200_hw_cleanup(priv); - - return 0; } static const struct of_device_id xrx200_match[] = { @@ -671,7 +669,7 @@ MODULE_DEVICE_TABLE(of, xrx200_match); static struct platform_driver xrx200_driver = { .probe = xrx200_probe, - .remove = xrx200_remove, + .remove_new = xrx200_remove, .driver = { .name = "lantiq,xrx200-net", .of_match_table = xrx200_match, diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index ffa96059079c..5182fe737c37 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -294,13 +294,11 @@ static int liteeth_probe(struct platform_device *pdev) return 0; } -static int liteeth_remove(struct platform_device *pdev) +static void liteeth_remove(struct platform_device *pdev) { struct net_device *netdev = platform_get_drvdata(pdev); unregister_netdev(netdev); - - return 0; } static const struct of_device_id liteeth_of_match[] = { @@ -311,7 +309,7 @@ MODULE_DEVICE_TABLE(of, liteeth_of_match); static struct platform_driver liteeth_driver = { .probe = liteeth_probe, - .remove = liteeth_remove, + .remove_new = liteeth_remove, .driver = { .name = DRV_NAME, .of_match_table = liteeth_of_match, diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 3b129a1c3381..f0bdc06d253d 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2892,19 +2892,18 @@ err_put_clk: return ret; } -static int mv643xx_eth_shared_remove(struct platform_device *pdev) +static void mv643xx_eth_shared_remove(struct platform_device *pdev) { struct mv643xx_eth_shared_private *msp = platform_get_drvdata(pdev); mv643xx_eth_shared_of_remove(); if (!IS_ERR(msp->clk)) clk_disable_unprepare(msp->clk); - return 0; } static struct platform_driver mv643xx_eth_shared_driver = { .probe = mv643xx_eth_shared_probe, - .remove = mv643xx_eth_shared_remove, + .remove_new = mv643xx_eth_shared_remove, .driver = { .name = MV643XX_ETH_SHARED_NAME, .of_match_table = of_match_ptr(mv643xx_eth_shared_ids), @@ -3279,7 +3278,7 @@ out: return err; } -static int mv643xx_eth_remove(struct platform_device *pdev) +static void mv643xx_eth_remove(struct platform_device *pdev) { struct mv643xx_eth_private *mp = platform_get_drvdata(pdev); struct net_device *dev = mp->dev; @@ -3293,8 +3292,6 @@ static int mv643xx_eth_remove(struct platform_device *pdev) clk_disable_unprepare(mp->clk); free_netdev(mp->dev); - - return 0; } static void mv643xx_eth_shutdown(struct platform_device *pdev) @@ -3311,7 +3308,7 @@ static void mv643xx_eth_shutdown(struct platform_device *pdev) static struct platform_driver mv643xx_eth_driver = { .probe = mv643xx_eth_probe, - .remove = mv643xx_eth_remove, + .remove_new = mv643xx_eth_remove, .shutdown = mv643xx_eth_shutdown, .driver = { .name = MV643XX_ETH_NAME, diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 674913184ebf..89f26402f8fb 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -388,7 +388,7 @@ out_clk: return ret; } -static int orion_mdio_remove(struct platform_device *pdev) +static void orion_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); struct orion_mdio_dev *dev = bus->priv; @@ -404,8 +404,6 @@ static int orion_mdio_remove(struct platform_device *pdev) clk_disable_unprepare(dev->clk[i]); clk_put(dev->clk[i]); } - - return 0; } static const struct of_device_id orion_mdio_match[] = { @@ -426,7 +424,7 @@ MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match); static struct platform_driver orion_mdio_driver = { .probe = orion_mdio_probe, - .remove = orion_mdio_remove, + .remove_new = orion_mdio_remove, .driver = { .name = "orion-mdio", .of_match_table = orion_mdio_match, diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d483b8c00ec0..61a430e5bc91 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5725,7 +5725,7 @@ err_free_irq: } /* Device removal routine */ -static int mvneta_remove(struct platform_device *pdev) +static void mvneta_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct mvneta_port *pp = netdev_priv(dev); @@ -5744,8 +5744,6 @@ static int mvneta_remove(struct platform_device *pdev) 1 << pp->id); mvneta_bm_put(pp->bm_priv); } - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -5871,7 +5869,7 @@ MODULE_DEVICE_TABLE(of, mvneta_match); static struct platform_driver mvneta_driver = { .probe = mvneta_probe, - .remove = mvneta_remove, + .remove_new = mvneta_remove, .driver = { .name = MVNETA_DRIVER_NAME, .of_match_table = mvneta_match, diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c index 46c942ef2287..3f46a0fed048 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.c +++ b/drivers/net/ethernet/marvell/mvneta_bm.c @@ -457,7 +457,7 @@ err_clk: return err; } -static int mvneta_bm_remove(struct platform_device *pdev) +static void mvneta_bm_remove(struct platform_device *pdev) { struct mvneta_bm *priv = platform_get_drvdata(pdev); u8 all_ports_map = 0xff; @@ -475,8 +475,6 @@ static int mvneta_bm_remove(struct platform_device *pdev) mvneta_bm_write(priv, MVNETA_BM_COMMAND_REG, MVNETA_BM_STOP_MASK); clk_disable_unprepare(priv->clk); - - return 0; } static const struct of_device_id mvneta_bm_match[] = { @@ -487,7 +485,7 @@ MODULE_DEVICE_TABLE(of, mvneta_bm_match); static struct platform_driver mvneta_bm_driver = { .probe = mvneta_bm_probe, - .remove = mvneta_bm_remove, + .remove_new = mvneta_bm_remove, .driver = { .name = MVNETA_BM_DRIVER_NAME, .of_match_table = mvneta_bm_match, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 21c3f9b015c8..463e89d3f17a 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7662,7 +7662,7 @@ err_pp_clk: return err; } -static int mvpp2_remove(struct platform_device *pdev) +static void mvpp2_remove(struct platform_device *pdev) { struct mvpp2 *priv = platform_get_drvdata(pdev); struct fwnode_handle *fwnode = pdev->dev.fwnode; @@ -7700,15 +7700,13 @@ static int mvpp2_remove(struct platform_device *pdev) } if (is_acpi_node(port_fwnode)) - return 0; + return; clk_disable_unprepare(priv->axi_clk); clk_disable_unprepare(priv->mg_core_clk); clk_disable_unprepare(priv->mg_clk); clk_disable_unprepare(priv->pp_clk); clk_disable_unprepare(priv->gop_clk); - - return 0; } static const struct of_device_id mvpp2_match[] = { @@ -7734,7 +7732,7 @@ MODULE_DEVICE_TABLE(acpi, mvpp2_acpi_match); static struct platform_driver mvpp2_driver = { .probe = mvpp2_probe, - .remove = mvpp2_remove, + .remove_new = mvpp2_remove, .driver = { .name = MVPP2_DRIVER_NAME, .of_match_table = mvpp2_match, diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c index 90c3a419932d..d4ee2454675b 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c @@ -16,9 +16,6 @@ #define CTRL_MBOX_MAX_PF 128 #define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF)) -#define FW_HB_INTERVAL_IN_SECS 1 -#define FW_HB_MISS_COUNT 10 - /* Names of Hardware non-queue generic interrupts */ static char *cn93_non_ioq_msix_names[] = { "epf_ire_rint", @@ -250,12 +247,11 @@ static void octep_init_config_cn93_pf(struct octep_device *oct) link = PCI_DEVFN(PCI_SLOT(oct->pdev->devfn), link); } conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr + - (0x400000ull * 7) + + CN93_PEM_BAR4_INDEX_OFFSET + (link * CTRL_MBOX_SZ); - conf->hb_interval = FW_HB_INTERVAL_IN_SECS; - conf->max_hb_miss_cnt = FW_HB_MISS_COUNT; - + conf->fw_info.hb_interval = OCTEP_DEFAULT_FW_HB_INTERVAL; + conf->fw_info.hb_miss_count = OCTEP_DEFAULT_FW_HB_MISS_COUNT; } /* Setup registers for a hardware Tx Queue */ @@ -373,34 +369,40 @@ static void octep_setup_mbox_regs_cn93_pf(struct octep_device *oct, int q_no) mbox->mbox_read_reg = oct->mmio[0].hw_addr + CN93_SDP_R_MBOX_VF_PF_DATA(q_no); } -/* Process non-ioq interrupts required to keep pf interface running. - * OEI_RINT is needed for control mailbox - */ -static bool octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct) -{ - bool handled = false; - u64 reg0; - - /* Check for OEI INTR */ - reg0 = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT); - if (reg0) { - dev_info(&oct->pdev->dev, - "Received OEI_RINT intr: 0x%llx\n", - reg0); - octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg0); - if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX) +/* Poll OEI events like heartbeat */ +static void octep_poll_oei_cn93_pf(struct octep_device *oct) +{ + u64 reg; + + reg = octep_read_csr64(oct, CN93_SDP_EPF_OEI_RINT); + if (reg) { + octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT, reg); + if (reg & CN93_SDP_EPF_OEI_RINT_DATA_BIT_MBOX) queue_work(octep_wq, &oct->ctrl_mbox_task); - else if (reg0 & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT) + else if (reg & CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT) atomic_set(&oct->hb_miss_cnt, 0); - - handled = true; } +} + +/* OEI interrupt handler */ +static irqreturn_t octep_oei_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + + octep_poll_oei_cn93_pf(oct); + return IRQ_HANDLED; +} - return handled; +/* Process non-ioq interrupts required to keep pf interface running. + * OEI_RINT is needed for control mailbox + */ +static void octep_poll_non_ioq_interrupts_cn93_pf(struct octep_device *oct) +{ + octep_poll_oei_cn93_pf(oct); } -/* Interrupts handler for all non-queue generic interrupts. */ -static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) +/* Interrupt handler for input ring error interrupts. */ +static irqreturn_t octep_ire_intr_handler_cn93_pf(void *dev) { struct octep_device *oct = (struct octep_device *)dev; struct pci_dev *pdev = oct->pdev; @@ -425,8 +427,17 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) reg_val); } } - goto irq_handled; } + return IRQ_HANDLED; +} + +/* Interrupt handler for output ring error interrupts. */ +static irqreturn_t octep_ore_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + int i = 0; /* Check for ORERR INTR */ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_ORERR_RINT); @@ -444,9 +455,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) reg_val); } } - - goto irq_handled; } + return IRQ_HANDLED; +} + +/* Interrupt handler for vf input ring error interrupts. */ +static irqreturn_t octep_vfire_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; /* Check for VFIRE INTR */ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0)); @@ -454,8 +472,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) dev_info(&pdev->dev, "Received VFIRE_RINT intr: 0x%llx\n", reg_val); octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT(0), reg_val); - goto irq_handled; } + return IRQ_HANDLED; +} + +/* Interrupt handler for vf output ring error interrupts. */ +static irqreturn_t octep_vfore_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; /* Check for VFORE INTR */ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0)); @@ -463,19 +489,30 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) dev_info(&pdev->dev, "Received VFORE_RINT intr: 0x%llx\n", reg_val); octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT(0), reg_val); - goto irq_handled; } + return IRQ_HANDLED; +} - /* Check for MBOX INTR and OEI INTR */ - if (octep_poll_non_ioq_interrupts_cn93_pf(oct)) - goto irq_handled; +/* Interrupt handler for dpi dma related interrupts. */ +static irqreturn_t octep_dma_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + u64 reg_val = 0; /* Check for DMA INTR */ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_RINT); if (reg_val) { octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT, reg_val); - goto irq_handled; } + return IRQ_HANDLED; +} + +/* Interrupt handler for dpi dma transaction error interrupts for VFs */ +static irqreturn_t octep_dma_vf_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; /* Check for DMA VF INTR */ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0)); @@ -483,8 +520,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) dev_info(&pdev->dev, "Received DMA_VF_RINT intr: 0x%llx\n", reg_val); octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT(0), reg_val); - goto irq_handled; } + return IRQ_HANDLED; +} + +/* Interrupt handler for pp transaction error interrupts for VFs */ +static irqreturn_t octep_pp_vf_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; /* Check for PPVF INTR */ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0)); @@ -492,8 +537,16 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) dev_info(&pdev->dev, "Received PP_VF_RINT intr: 0x%llx\n", reg_val); octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT(0), reg_val); - goto irq_handled; } + return IRQ_HANDLED; +} + +/* Interrupt handler for mac related interrupts. */ +static irqreturn_t octep_misc_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; /* Check for MISC INTR */ reg_val = octep_read_csr64(oct, CN93_SDP_EPF_MISC_RINT); @@ -501,11 +554,17 @@ static irqreturn_t octep_non_ioq_intr_handler_cn93_pf(void *dev) dev_info(&pdev->dev, "Received MISC_RINT intr: 0x%llx\n", reg_val); octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT, reg_val); - goto irq_handled; } + return IRQ_HANDLED; +} + +/* Interrupts handler for all reserved interrupts. */ +static irqreturn_t octep_rsvd_intr_handler_cn93_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; dev_info(&pdev->dev, "Reserved interrupts raised; Ignore\n"); -irq_handled: return IRQ_HANDLED; } @@ -569,8 +628,15 @@ static void octep_enable_interrupts_cn93_pf(struct octep_device *oct) octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL); + + octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask); + + octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL); } /* Disable all interrupts */ @@ -588,8 +654,15 @@ static void octep_disable_interrupts_cn93_pf(struct octep_device *oct) octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL); + + octep_write_csr64(oct, CN93_SDP_EPF_VFIRE_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_VFORE_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask); + + octep_write_csr64(oct, CN93_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CN93_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL); } /* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ @@ -722,7 +795,16 @@ void octep_device_setup_cn93_pf(struct octep_device *oct) oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cn93_pf; oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cn93_pf; - oct->hw_ops.non_ioq_intr_handler = octep_non_ioq_intr_handler_cn93_pf; + oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cn93_pf; + oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cn93_pf; + oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cn93_pf; + oct->hw_ops.vfire_intr_handler = octep_vfire_intr_handler_cn93_pf; + oct->hw_ops.vfore_intr_handler = octep_vfore_intr_handler_cn93_pf; + oct->hw_ops.dma_intr_handler = octep_dma_intr_handler_cn93_pf; + oct->hw_ops.dma_vf_intr_handler = octep_dma_vf_intr_handler_cn93_pf; + oct->hw_ops.pp_vf_intr_handler = octep_pp_vf_intr_handler_cn93_pf; + oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cn93_pf; + oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cn93_pf; oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cn93_pf; oct->hw_ops.soft_reset = octep_soft_reset_cn93_pf; oct->hw_ops.reinit_regs = octep_reinit_regs_cn93_pf; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h index df7cd39d9fce..1622a6ebf036 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h @@ -49,6 +49,11 @@ /* Default MTU */ #define OCTEP_DEFAULT_MTU 1500 +/* pf heartbeat interval in milliseconds */ +#define OCTEP_DEFAULT_FW_HB_INTERVAL 1000 +/* pf heartbeat miss count */ +#define OCTEP_DEFAULT_FW_HB_MISS_COUNT 20 + /* Macros to get octeon config params */ #define CFG_GET_IQ_CFG(cfg) ((cfg)->iq) #define CFG_GET_IQ_NUM_DESC(cfg) ((cfg)->iq.num_descs) @@ -181,6 +186,16 @@ struct octep_ctrl_mbox_config { void __iomem *barmem_addr; }; +/* Info from firmware */ +struct octep_fw_info { + /* interface pkind */ + u16 pkind; + /* heartbeat interval in milliseconds */ + u16 hb_interval; + /* heartbeat miss count */ + u16 hb_miss_count; +}; + /* Data Structure to hold configuration limits and active config */ struct octep_config { /* Input Queue attributes. */ @@ -201,10 +216,7 @@ struct octep_config { /* ctrl mbox config */ struct octep_ctrl_mbox_config ctrl_mbox_cfg; - /* Configured maximum heartbeat miss count */ - u32 max_hb_miss_cnt; - - /* Configured firmware heartbeat interval in secs */ - u32 hb_interval; + /* fw info */ + struct octep_fw_info fw_info; }; #endif /* _OCTEP_CONFIG_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c index 17bfd5cdf462..0594607a2585 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c @@ -26,7 +26,7 @@ static atomic_t ctrl_net_msg_id; /* Control plane version in which OCTEP_CTRL_NET_H2F_CMD was added */ static const u32 octep_ctrl_net_h2f_cmd_versions[OCTEP_CTRL_NET_H2F_CMD_MAX] = { - [OCTEP_CTRL_NET_H2F_CMD_INVALID ... OCTEP_CTRL_NET_H2F_CMD_LINK_INFO] = + [OCTEP_CTRL_NET_H2F_CMD_INVALID ... OCTEP_CTRL_NET_H2F_CMD_GET_INFO] = OCTEP_CP_VERSION(1, 0, 0) }; @@ -353,6 +353,28 @@ void octep_ctrl_net_recv_fw_messages(struct octep_device *oct) } } +int octep_ctrl_net_get_info(struct octep_device *oct, int vfid, + struct octep_fw_info *info) +{ + struct octep_ctrl_net_wait_data d = {0}; + struct octep_ctrl_net_h2f_resp *resp; + struct octep_ctrl_net_h2f_req *req; + int err; + + req = &d.data.req; + init_send_req(&d.msg, req, 0, vfid); + req->hdr.s.cmd = OCTEP_CTRL_NET_H2F_CMD_GET_INFO; + req->link_info.cmd = OCTEP_CTRL_NET_CMD_GET; + err = octep_send_mbox_req(oct, &d, true); + if (err < 0) + return err; + + resp = &d.data.resp; + memcpy(info, &resp->info.fw_info, sizeof(struct octep_fw_info)); + + return 0; +} + int octep_ctrl_net_uninit(struct octep_device *oct) { struct octep_ctrl_net_wait_data *pos, *n; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h index 1c2ef4ee31d9..b330f370131b 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.h @@ -41,6 +41,7 @@ enum octep_ctrl_net_h2f_cmd { OCTEP_CTRL_NET_H2F_CMD_LINK_STATUS, OCTEP_CTRL_NET_H2F_CMD_RX_STATE, OCTEP_CTRL_NET_H2F_CMD_LINK_INFO, + OCTEP_CTRL_NET_H2F_CMD_GET_INFO, OCTEP_CTRL_NET_H2F_CMD_MAX }; @@ -161,6 +162,11 @@ struct octep_ctrl_net_h2f_resp_cmd_state { u16 state; }; +/* get info request */ +struct octep_ctrl_net_h2f_resp_cmd_get_info { + struct octep_fw_info fw_info; +}; + /* Host to fw response data */ struct octep_ctrl_net_h2f_resp { union octep_ctrl_net_resp_hdr hdr; @@ -171,6 +177,7 @@ struct octep_ctrl_net_h2f_resp { struct octep_ctrl_net_h2f_resp_cmd_state link; struct octep_ctrl_net_h2f_resp_cmd_state rx; struct octep_ctrl_net_link_info link_info; + struct octep_ctrl_net_h2f_resp_cmd_get_info info; }; } __packed; @@ -330,6 +337,17 @@ int octep_ctrl_net_set_link_info(struct octep_device *oct, */ void octep_ctrl_net_recv_fw_messages(struct octep_device *oct); +/** Get info from firmware. + * + * @param oct: non-null pointer to struct octep_device. + * @param vfid: Index of virtual function. + * @param info: non-null pointer to struct octep_fw_info. + * + * return value: 0 on success, -errno on failure. + */ +int octep_ctrl_net_get_info(struct octep_device *oct, int vfid, + struct octep_fw_info *info); + /** Uninitialize data for ctrl net. * * @param oct: non-null pointer to struct octep_device. diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 5b46ca47c8e5..552970c7dec0 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -155,18 +155,153 @@ static void octep_disable_msix(struct octep_device *oct) } /** - * octep_non_ioq_intr_handler() - common handler for all generic interrupts. + * octep_oei_intr_handler() - common handler for output endpoint interrupts. * * @irq: Interrupt number. * @data: interrupt data. * - * this is common handler for all non-queue (generic) interrupts. + * this is common handler for all output endpoint interrupts. + */ +static irqreturn_t octep_oei_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.oei_intr_handler(oct); +} + +/** + * octep_ire_intr_handler() - common handler for input ring error interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for input ring error interrupts. + */ +static irqreturn_t octep_ire_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.ire_intr_handler(oct); +} + +/** + * octep_ore_intr_handler() - common handler for output ring error interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for output ring error interrupts. + */ +static irqreturn_t octep_ore_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.ore_intr_handler(oct); +} + +/** + * octep_vfire_intr_handler() - common handler for vf input ring error interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for vf input ring error interrupts. + */ +static irqreturn_t octep_vfire_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.vfire_intr_handler(oct); +} + +/** + * octep_vfore_intr_handler() - common handler for vf output ring error interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for vf output ring error interrupts. + */ +static irqreturn_t octep_vfore_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.vfore_intr_handler(oct); +} + +/** + * octep_dma_intr_handler() - common handler for dpi dma related interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for dpi dma related interrupts. + */ +static irqreturn_t octep_dma_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.dma_intr_handler(oct); +} + +/** + * octep_dma_vf_intr_handler() - common handler for dpi dma transaction error interrupts for VFs. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for dpi dma transaction error interrupts for VFs. */ -static irqreturn_t octep_non_ioq_intr_handler(int irq, void *data) +static irqreturn_t octep_dma_vf_intr_handler(int irq, void *data) { struct octep_device *oct = data; - return oct->hw_ops.non_ioq_intr_handler(oct); + return oct->hw_ops.dma_vf_intr_handler(oct); +} + +/** + * octep_pp_vf_intr_handler() - common handler for pp transaction error interrupts for VFs. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for pp transaction error interrupts for VFs. + */ +static irqreturn_t octep_pp_vf_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.pp_vf_intr_handler(oct); +} + +/** + * octep_misc_intr_handler() - common handler for mac related interrupts. + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for mac related interrupts. + */ +static irqreturn_t octep_misc_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.misc_intr_handler(oct); +} + +/** + * octep_rsvd_intr_handler() - common handler for reserved interrupts (future use). + * + * @irq: Interrupt number. + * @data: interrupt data. + * + * this is common handler for all reserved interrupts. + */ +static irqreturn_t octep_rsvd_intr_handler(int irq, void *data) +{ + struct octep_device *oct = data; + + return oct->hw_ops.rsvd_intr_handler(oct); } /** @@ -222,9 +357,57 @@ static int octep_request_irqs(struct octep_device *oct) snprintf(irq_name, OCTEP_MSIX_NAME_SIZE, "%s-%s", netdev->name, non_ioq_msix_names[i]); - ret = request_irq(msix_entry->vector, - octep_non_ioq_intr_handler, 0, - irq_name, oct); + if (!strncmp(non_ioq_msix_names[i], "epf_oei_rint", + strlen("epf_oei_rint"))) { + ret = request_irq(msix_entry->vector, + octep_oei_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_ire_rint", + strlen("epf_ire_rint"))) { + ret = request_irq(msix_entry->vector, + octep_ire_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_ore_rint", + strlen("epf_ore_rint"))) { + ret = request_irq(msix_entry->vector, + octep_ore_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_vfire_rint", + strlen("epf_vfire_rint"))) { + ret = request_irq(msix_entry->vector, + octep_vfire_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_vfore_rint", + strlen("epf_vfore_rint"))) { + ret = request_irq(msix_entry->vector, + octep_vfore_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_dma_rint", + strlen("epf_dma_rint"))) { + ret = request_irq(msix_entry->vector, + octep_dma_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_dma_vf_rint", + strlen("epf_dma_vf_rint"))) { + ret = request_irq(msix_entry->vector, + octep_dma_vf_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_pp_vf_rint", + strlen("epf_pp_vf_rint"))) { + ret = request_irq(msix_entry->vector, + octep_pp_vf_intr_handler, 0, + irq_name, oct); + } else if (!strncmp(non_ioq_msix_names[i], "epf_misc_rint", + strlen("epf_misc_rint"))) { + ret = request_irq(msix_entry->vector, + octep_misc_intr_handler, 0, + irq_name, oct); + } else { + ret = request_irq(msix_entry->vector, + octep_rsvd_intr_handler, 0, + irq_name, oct); + } + if (ret) { netdev_err(netdev, "request_irq failed for %s; err=%d", @@ -917,9 +1100,9 @@ static void octep_hb_timeout_task(struct work_struct *work) int miss_cnt; miss_cnt = atomic_inc_return(&oct->hb_miss_cnt); - if (miss_cnt < oct->conf->max_hb_miss_cnt) { + if (miss_cnt < oct->conf->fw_info.hb_miss_count) { queue_delayed_work(octep_wq, &oct->hb_task, - msecs_to_jiffies(oct->conf->hb_interval * 1000)); + msecs_to_jiffies(oct->conf->fw_info.hb_interval)); return; } @@ -1012,8 +1195,7 @@ int octep_device_setup(struct octep_device *oct) atomic_set(&oct->hb_miss_cnt, 0); INIT_DELAYED_WORK(&oct->hb_task, octep_hb_timeout_task); - queue_delayed_work(octep_wq, &oct->hb_task, - msecs_to_jiffies(oct->conf->hb_interval * 1000)); + return 0; unsupported_dev: @@ -1142,6 +1324,15 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "Device setup failed\n"); goto err_octep_config; } + + octep_ctrl_net_get_info(octep_dev, OCTEP_CTRL_NET_INVALID_VFID, + &octep_dev->conf->fw_info); + dev_info(&octep_dev->pdev->dev, "Heartbeat interval %u msecs Heartbeat miss count %u\n", + octep_dev->conf->fw_info.hb_interval, + octep_dev->conf->fw_info.hb_miss_count); + queue_delayed_work(octep_wq, &octep_dev->hb_task, + msecs_to_jiffies(octep_dev->conf->fw_info.hb_interval)); + INIT_WORK(&octep_dev->tx_timeout_task, octep_tx_timeout_task); INIT_WORK(&octep_dev->ctrl_mbox_task, octep_ctrl_mbox_task); INIT_DELAYED_WORK(&octep_dev->intr_poll_task, octep_intr_poll_task); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index e0907a719133..6df902ebb7f3 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -65,7 +65,16 @@ struct octep_hw_ops { void (*setup_oq_regs)(struct octep_device *oct, int q); void (*setup_mbox_regs)(struct octep_device *oct, int mbox); - irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector); + irqreturn_t (*oei_intr_handler)(void *ioq_vector); + irqreturn_t (*ire_intr_handler)(void *ioq_vector); + irqreturn_t (*ore_intr_handler)(void *ioq_vector); + irqreturn_t (*vfire_intr_handler)(void *ioq_vector); + irqreturn_t (*vfore_intr_handler)(void *ioq_vector); + irqreturn_t (*dma_intr_handler)(void *ioq_vector); + irqreturn_t (*dma_vf_intr_handler)(void *ioq_vector); + irqreturn_t (*pp_vf_intr_handler)(void *ioq_vector); + irqreturn_t (*misc_intr_handler)(void *ioq_vector); + irqreturn_t (*rsvd_intr_handler)(void *ioq_vector); irqreturn_t (*ioq_intr_handler)(void *ioq_vector); int (*soft_reset)(struct octep_device *oct); void (*reinit_regs)(struct octep_device *oct); @@ -73,7 +82,7 @@ struct octep_hw_ops { void (*enable_interrupts)(struct octep_device *oct); void (*disable_interrupts)(struct octep_device *oct); - bool (*poll_non_ioq_interrupts)(struct octep_device *oct); + void (*poll_non_ioq_interrupts)(struct octep_device *oct); void (*enable_io_queues)(struct octep_device *oct); void (*disable_io_queues)(struct octep_device *oct); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index b25c3093dc7b..0a43983e9101 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -370,4 +370,8 @@ /* bit 1 for firmware heartbeat interrupt */ #define CN93_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1) +#define CN93_PEM_BAR4_INDEX 7 +#define CN93_PEM_BAR4_INDEX_SIZE 0x400000ULL +#define CN93_PEM_BAR4_INDEX_OFFSET (CN93_PEM_BAR4_INDEX * CN93_PEM_BAR4_INDEX_SIZE) + #endif /* _OCTEP_REGS_CN9K_PF_H_ */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6b5b06c2b4e9..c5bd7747fc0e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1574,7 +1574,7 @@ enum ptp_op { PTP_OP_GET_CLOCK = 1, PTP_OP_GET_TSTMP = 2, PTP_OP_SET_THRESH = 3, - PTP_OP_EXTTS_ON = 4, + PTP_OP_PPS_ON = 4, PTP_OP_ADJTIME = 5, PTP_OP_SET_CLOCK = 6, }; @@ -1584,7 +1584,8 @@ struct ptp_req { u8 op; s64 scaled_ppm; u64 thresh; - int extts_on; + u64 period; + int pps_on; s64 delta; u64 clk; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index ffbd22797163..bcc96eed2481 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -46,6 +46,7 @@ #define PTP_PPS_HI_INCR 0xF60ULL #define PTP_PPS_LO_INCR 0xF68ULL +#define PTP_PPS_THRESH_LO 0xF50ULL #define PTP_PPS_THRESH_HI 0xF58ULL #define PTP_CLOCK_LO 0xF08ULL @@ -411,29 +412,12 @@ void ptp_start(struct rvu *rvu, u64 sclk, u32 ext_clk_freq, u32 extts) } clock_cfg |= PTP_CLOCK_CFG_PTP_EN; - clock_cfg |= PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV; writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG); clock_cfg &= ~PTP_CLOCK_CFG_ATOMIC_OP_MASK; clock_cfg |= (ATOMIC_SET << 26); writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); - /* Set 50% duty cycle for 1Hz output */ - writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_HI_INCR); - writeq(0x1dcd650000000000, ptp->reg_base + PTP_PPS_LO_INCR); - if (cn10k_ptp_errata(ptp)) { - /* The ptp_clock_hi rollsover to zero once clock cycle before it - * reaches one second boundary. so, program the pps_lo_incr in - * such a way that the pps threshold value comparison at one - * second boundary will succeed and pps edge changes. After each - * one second boundary, the hrtimer handler will be invoked and - * reprograms the pps threshold value. - */ - ptp->clock_period = NSEC_PER_SEC / ptp->clock_rate; - writeq((0x1dcd6500ULL - ptp->clock_period) << 32, - ptp->reg_base + PTP_PPS_LO_INCR); - } - if (cn10k_ptp_errata(ptp)) clock_comp = ptp_calc_adjusted_comp(ptp->clock_rate); else @@ -465,20 +449,68 @@ static int ptp_set_thresh(struct ptp *ptp, u64 thresh) return 0; } -static int ptp_extts_on(struct ptp *ptp, int on) +static int ptp_config_hrtimer(struct ptp *ptp, int on) { u64 ptp_clock_hi; - if (cn10k_ptp_errata(ptp)) { - if (on) { - ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI); - ptp_hrtimer_start(ptp, (ktime_t)ptp_clock_hi); - } else { - if (hrtimer_active(&ptp->hrtimer)) - hrtimer_cancel(&ptp->hrtimer); + if (on) { + ptp_clock_hi = readq(ptp->reg_base + PTP_CLOCK_HI); + ptp_hrtimer_start(ptp, (ktime_t)ptp_clock_hi); + } else { + if (hrtimer_active(&ptp->hrtimer)) + hrtimer_cancel(&ptp->hrtimer); + } + + return 0; +} + +static int ptp_pps_on(struct ptp *ptp, int on, u64 period) +{ + u64 clock_cfg; + + clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG); + if (on) { + if (cn10k_ptp_errata(ptp) && period != NSEC_PER_SEC) { + dev_err(&ptp->pdev->dev, "Supports max period value as 1 second\n"); + return -EINVAL; } + + if (period > (8 * NSEC_PER_SEC)) { + dev_err(&ptp->pdev->dev, "Supports max period as 8 seconds\n"); + return -EINVAL; + } + + clock_cfg |= PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV; + writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); + + writeq(0, ptp->reg_base + PTP_PPS_THRESH_HI); + writeq(0, ptp->reg_base + PTP_PPS_THRESH_LO); + + /* Configure high/low phase time */ + period = period / 2; + writeq(((u64)period << 32), ptp->reg_base + PTP_PPS_HI_INCR); + writeq(((u64)period << 32), ptp->reg_base + PTP_PPS_LO_INCR); + } else { + clock_cfg &= ~(PTP_CLOCK_CFG_PPS_EN | PTP_CLOCK_CFG_PPS_INV); + writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); } + if (on && cn10k_ptp_errata(ptp)) { + /* The ptp_clock_hi rollsover to zero once clock cycle before it + * reaches one second boundary. so, program the pps_lo_incr in + * such a way that the pps threshold value comparison at one + * second boundary will succeed and pps edge changes. After each + * one second boundary, the hrtimer handler will be invoked and + * reprograms the pps threshold value. + */ + ptp->clock_period = NSEC_PER_SEC / ptp->clock_rate; + writeq((0x1dcd6500ULL - ptp->clock_period) << 32, + ptp->reg_base + PTP_PPS_LO_INCR); + } + + if (cn10k_ptp_errata(ptp)) + ptp_config_hrtimer(ptp, on); + return 0; } @@ -613,8 +645,8 @@ int rvu_mbox_handler_ptp_op(struct rvu *rvu, struct ptp_req *req, case PTP_OP_SET_THRESH: err = ptp_set_thresh(rvu->ptp, req->thresh); break; - case PTP_OP_EXTTS_ON: - err = ptp_extts_on(rvu->ptp, req->extts_on); + case PTP_OP_PPS_ON: + err = ptp_pps_on(rvu->ptp, req->pps_on, req->period); break; case PTP_OP_ADJTIME: ptp_atomic_adjtime(rvu->ptp, req->delta); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c index 3a72b0793d4a..63130ba37e9d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c @@ -175,7 +175,7 @@ static int ptp_set_thresh(struct otx2_ptp *ptp, u64 thresh) return otx2_sync_mbox_msg(&ptp->nic->mbox); } -static int ptp_extts_on(struct otx2_ptp *ptp, int on) +static int ptp_pps_on(struct otx2_ptp *ptp, int on, u64 period) { struct ptp_req *req; @@ -186,8 +186,9 @@ static int ptp_extts_on(struct otx2_ptp *ptp, int on) if (!req) return -ENOMEM; - req->op = PTP_OP_EXTTS_ON; - req->extts_on = on; + req->op = PTP_OP_PPS_ON; + req->pps_on = on; + req->period = period; return otx2_sync_mbox_msg(&ptp->nic->mbox); } @@ -276,8 +277,8 @@ static int otx2_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, switch (func) { case PTP_PF_NONE: case PTP_PF_EXTTS: - break; case PTP_PF_PEROUT: + break; case PTP_PF_PHYSYNC: return -1; } @@ -340,6 +341,7 @@ static int otx2_ptp_enable(struct ptp_clock_info *ptp_info, { struct otx2_ptp *ptp = container_of(ptp_info, struct otx2_ptp, ptp_info); + u64 period = 0; int pin; if (!ptp->nic) @@ -351,12 +353,24 @@ static int otx2_ptp_enable(struct ptp_clock_info *ptp_info, rq->extts.index); if (pin < 0) return -EBUSY; - if (on) { - ptp_extts_on(ptp, on); + if (on) schedule_delayed_work(&ptp->extts_work, msecs_to_jiffies(200)); - } else { - ptp_extts_on(ptp, on); + else cancel_delayed_work_sync(&ptp->extts_work); + + return 0; + case PTP_CLK_REQ_PEROUT: + if (rq->perout.flags) + return -EOPNOTSUPP; + + if (rq->perout.index >= ptp_info->n_pins) + return -EINVAL; + if (on) { + period = rq->perout.period.sec * NSEC_PER_SEC + + rq->perout.period.nsec; + ptp_pps_on(ptp, on, period); + } else { + ptp_pps_on(ptp, on, period); } return 0; default: @@ -411,6 +425,7 @@ int otx2_ptp_init(struct otx2_nic *pfvf) .name = "OcteonTX2 PTP", .max_adj = 1000000000ull, .n_ext_ts = 1, + .n_per_out = 1, .n_pins = 1, .pps = 0, .pin_config = &ptp_ptr->extts_config, diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index d5691b6a2bc5..dd6ca2e4fd51 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1528,7 +1528,7 @@ err_clk: return err; } -static int pxa168_eth_remove(struct platform_device *pdev) +static void pxa168_eth_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct pxa168_eth_private *pep = netdev_priv(dev); @@ -1547,7 +1547,6 @@ static int pxa168_eth_remove(struct platform_device *pdev) mdiobus_free(pep->smi_bus); unregister_netdev(dev); free_netdev(dev); - return 0; } static void pxa168_eth_shutdown(struct platform_device *pdev) @@ -1580,7 +1579,7 @@ MODULE_DEVICE_TABLE(of, pxa168_eth_of_match); static struct platform_driver pxa168_eth_driver = { .probe = pxa168_eth_probe, - .remove = pxa168_eth_remove, + .remove_new = pxa168_eth_remove, .shutdown = pxa168_eth_shutdown, .resume = pxa168_eth_resume, .suspend = pxa168_eth_suspend, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 20afe79f380a..7669b446915a 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -197,6 +197,7 @@ static const struct mtk_reg_map mt7988_reg_map = { .wdma_base = { [0] = 0x4800, [1] = 0x4c00, + [2] = 0x5000, }, .pse_iq_sta = 0x0180, .pse_oq_sta = 0x01a0, @@ -5002,7 +5003,7 @@ err_destroy_sgmii: return err; } -static int mtk_remove(struct platform_device *pdev) +static void mtk_remove(struct platform_device *pdev) { struct mtk_eth *eth = platform_get_drvdata(pdev); struct mtk_mac *mac; @@ -5024,8 +5025,6 @@ static int mtk_remove(struct platform_device *pdev) netif_napi_del(ð->rx_napi); mtk_cleanup(eth); mtk_mdio_cleanup(eth); - - return 0; } static const struct mtk_soc_data mt2701_data = { @@ -5226,7 +5225,7 @@ MODULE_DEVICE_TABLE(of, of_mtk_match); static struct platform_driver mtk_driver = { .probe = mtk_probe, - .remove = mtk_remove, + .remove_new = mtk_remove, .driver = { .name = "mtk_soc_eth", .of_match_table = of_mtk_match, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 403219d987ef..9ae3b8a71d0e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -1132,7 +1132,7 @@ struct mtk_reg_map { u32 gdm1_cnt; u32 gdma_to_ppe; u32 ppe_base; - u32 wdma_base[2]; + u32 wdma_base[3]; u32 pse_iq_sta; u32 pse_oq_sta; }; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 86f32f486043..b2a5d9c3733d 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -425,7 +425,8 @@ int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry, } int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, - int wdma_idx, int txq, int bss, int wcid) + int wdma_idx, int txq, int bss, int wcid, + bool amsdu_en) { struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry); u32 *ib2 = mtk_foe_entry_ib2(eth, entry); @@ -437,6 +438,7 @@ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, MTK_FOE_IB2_WDMA_WINFO_V2; l2->w3info = FIELD_PREP(MTK_FOE_WINFO_WCID_V3, wcid) | FIELD_PREP(MTK_FOE_WINFO_BSS_V3, bss); + l2->amsdu = FIELD_PREP(MTK_FOE_WINFO_AMSDU_EN, amsdu_en); break; case 2: *ib2 &= ~MTK_FOE_IB2_PORT_MG_V2; diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index e3d0ec72bc69..691806bca372 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -88,13 +88,13 @@ enum { #define MTK_FOE_WINFO_BSS_V3 GENMASK(23, 16) #define MTK_FOE_WINFO_WCID_V3 GENMASK(15, 0) -#define MTK_FOE_WINFO_PAO_USR_INFO GENMASK(15, 0) -#define MTK_FOE_WINFO_PAO_TID GENMASK(19, 16) -#define MTK_FOE_WINFO_PAO_IS_FIXEDRATE BIT(20) -#define MTK_FOE_WINFO_PAO_IS_PRIOR BIT(21) -#define MTK_FOE_WINFO_PAO_IS_SP BIT(22) -#define MTK_FOE_WINFO_PAO_HF BIT(23) -#define MTK_FOE_WINFO_PAO_AMSDU_EN BIT(24) +#define MTK_FOE_WINFO_AMSDU_USR_INFO GENMASK(15, 0) +#define MTK_FOE_WINFO_AMSDU_TID GENMASK(19, 16) +#define MTK_FOE_WINFO_AMSDU_IS_FIXEDRATE BIT(20) +#define MTK_FOE_WINFO_AMSDU_IS_PRIOR BIT(21) +#define MTK_FOE_WINFO_AMSDU_IS_SP BIT(22) +#define MTK_FOE_WINFO_AMSDU_HF BIT(23) +#define MTK_FOE_WINFO_AMSDU_EN BIT(24) enum { MTK_FOE_STATE_INVALID, @@ -123,7 +123,7 @@ struct mtk_foe_mac_info { /* netsys_v3 */ u32 w3info; - u32 wpao; + u32 amsdu; }; /* software-only entry type */ @@ -392,7 +392,8 @@ int mtk_foe_entry_set_vlan(struct mtk_eth *eth, struct mtk_foe_entry *entry, int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry, int sid); int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry, - int wdma_idx, int txq, int bss, int wcid); + int wdma_idx, int txq, int bss, int wcid, + bool amsdu_en); int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry, unsigned int queue); int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c index a4efbeb16208..e073d2b5542c 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -111,6 +111,7 @@ mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_i info->queue = path->mtk_wdma.queue; info->bss = path->mtk_wdma.bss; info->wcid = path->mtk_wdma.wcid; + info->amsdu = path->mtk_wdma.amsdu; return 0; } @@ -192,14 +193,17 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) { mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue, - info.bss, info.wcid); + info.bss, info.wcid, info.amsdu); if (mtk_is_netsys_v2_or_greater(eth)) { switch (info.wdma_idx) { case 0: - pse_port = 8; + pse_port = PSE_WDMA0_PORT; break; case 1: - pse_port = 9; + pse_port = PSE_WDMA1_PORT; + break; + case 2: + pse_port = PSE_WDMA2_PORT; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 94376aa2b34c..9a6744c0d458 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -17,17 +17,21 @@ #include <net/flow_offload.h> #include <net/pkt_cls.h> #include "mtk_eth_soc.h" -#include "mtk_wed_regs.h" #include "mtk_wed.h" #include "mtk_ppe.h" #include "mtk_wed_wo.h" #define MTK_PCIE_BASE(n) (0x1a143000 + (n) * 0x2000) -#define MTK_WED_PKT_SIZE 1900 +#define MTK_WED_PKT_SIZE 1920 #define MTK_WED_BUF_SIZE 2048 +#define MTK_WED_PAGE_BUF_SIZE 128 #define MTK_WED_BUF_PER_PAGE (PAGE_SIZE / 2048) +#define MTK_WED_RX_BUF_PER_PAGE (PAGE_SIZE / MTK_WED_PAGE_BUF_SIZE) #define MTK_WED_RX_RING_SIZE 1536 +#define MTK_WED_RX_PG_BM_CNT 8192 +#define MTK_WED_AMSDU_BUF_SIZE (PAGE_SIZE << 4) +#define MTK_WED_AMSDU_NPAGES 32 #define MTK_WED_TX_RING_SIZE 2048 #define MTK_WED_WDMA_RING_SIZE 1024 @@ -41,7 +45,10 @@ #define MTK_WED_RRO_QUE_CNT 8192 #define MTK_WED_MIOD_ENTRY_CNT 128 -static struct mtk_wed_hw *hw_list[2]; +#define MTK_WED_TX_BM_DMA_SIZE 65536 +#define MTK_WED_TX_BM_PKT_CNT 32768 + +static struct mtk_wed_hw *hw_list[3]; static DEFINE_MUTEX(hw_lock); struct mtk_wed_flow_block_priv { @@ -49,6 +56,39 @@ struct mtk_wed_flow_block_priv { struct net_device *dev; }; +static const struct mtk_wed_soc_data mt7622_data = { + .regmap = { + .tx_bm_tkid = 0x088, + .wpdma_rx_ring0 = 0x770, + .reset_idx_tx_mask = GENMASK(3, 0), + .reset_idx_rx_mask = GENMASK(17, 16), + }, + .tx_ring_desc_size = sizeof(struct mtk_wdma_desc), + .wdma_desc_size = sizeof(struct mtk_wdma_desc), +}; + +static const struct mtk_wed_soc_data mt7986_data = { + .regmap = { + .tx_bm_tkid = 0x0c8, + .wpdma_rx_ring0 = 0x770, + .reset_idx_tx_mask = GENMASK(1, 0), + .reset_idx_rx_mask = GENMASK(7, 6), + }, + .tx_ring_desc_size = sizeof(struct mtk_wdma_desc), + .wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc), +}; + +static const struct mtk_wed_soc_data mt7988_data = { + .regmap = { + .tx_bm_tkid = 0x0c8, + .wpdma_rx_ring0 = 0x7d0, + .reset_idx_tx_mask = GENMASK(1, 0), + .reset_idx_rx_mask = GENMASK(7, 6), + }, + .tx_ring_desc_size = sizeof(struct mtk_wed_bm_desc), + .wdma_desc_size = 2 * sizeof(struct mtk_wdma_desc), +}; + static void wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val) { @@ -109,6 +149,90 @@ mtk_wdma_read_reset(struct mtk_wed_device *dev) return wdma_r32(dev, MTK_WDMA_GLO_CFG); } +static void +mtk_wdma_v3_rx_reset(struct mtk_wed_device *dev) +{ + u32 status; + + if (!mtk_wed_is_v3_or_greater(dev->hw)) + return; + + wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN); + wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_PREF_TX_CFG_PREF_BUSY), + 0, 10000, false, dev, MTK_WDMA_PREF_TX_CFG)) + dev_err(dev->hw->dev, "rx reset failed\n"); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_PREF_RX_CFG_PREF_BUSY), + 0, 10000, false, dev, MTK_WDMA_PREF_RX_CFG)) + dev_err(dev->hw->dev, "rx reset failed\n"); + + wdma_clr(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN); + wdma_clr(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY), + 0, 10000, false, dev, MTK_WDMA_WRBK_TX_CFG)) + dev_err(dev->hw->dev, "rx reset failed\n"); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY), + 0, 10000, false, dev, MTK_WDMA_WRBK_RX_CFG)) + dev_err(dev->hw->dev, "rx reset failed\n"); + + /* prefetch FIFO */ + wdma_w32(dev, MTK_WDMA_PREF_RX_FIFO_CFG, + MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR | + MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR); + wdma_clr(dev, MTK_WDMA_PREF_RX_FIFO_CFG, + MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR | + MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR); + + /* core FIFO */ + wdma_w32(dev, MTK_WDMA_XDMA_RX_FIFO_CFG, + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR); + wdma_clr(dev, MTK_WDMA_XDMA_RX_FIFO_CFG, + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR | + MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR); + + /* writeback FIFO */ + wdma_w32(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(0), + MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR); + wdma_w32(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(1), + MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR); + + wdma_clr(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(0), + MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR); + wdma_clr(dev, MTK_WDMA_WRBK_RX_FIFO_CFG(1), + MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR); + + /* prefetch ring status */ + wdma_w32(dev, MTK_WDMA_PREF_SIDX_CFG, + MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR); + wdma_clr(dev, MTK_WDMA_PREF_SIDX_CFG, + MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR); + + /* writeback ring status */ + wdma_w32(dev, MTK_WDMA_WRBK_SIDX_CFG, + MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR); + wdma_clr(dev, MTK_WDMA_WRBK_SIDX_CFG, + MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR); +} + static int mtk_wdma_rx_reset(struct mtk_wed_device *dev) { @@ -121,6 +245,7 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev) if (ret) dev_err(dev->hw->dev, "rx reset failed\n"); + mtk_wdma_v3_rx_reset(dev); wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX); wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); @@ -135,6 +260,101 @@ mtk_wdma_rx_reset(struct mtk_wed_device *dev) return ret; } +static u32 +mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + return !!(wed_r32(dev, reg) & mask); +} + +static int +mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) +{ + int sleep = 15000; + int timeout = 100 * sleep; + u32 val; + + return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep, + timeout, false, dev, reg, mask); +} + +static void +mtk_wdma_v3_tx_reset(struct mtk_wed_device *dev) +{ + u32 status; + + if (!mtk_wed_is_v3_or_greater(dev->hw)) + return; + + wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN); + wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_PREF_TX_CFG_PREF_BUSY), + 0, 10000, false, dev, MTK_WDMA_PREF_TX_CFG)) + dev_err(dev->hw->dev, "tx reset failed\n"); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_PREF_RX_CFG_PREF_BUSY), + 0, 10000, false, dev, MTK_WDMA_PREF_RX_CFG)) + dev_err(dev->hw->dev, "tx reset failed\n"); + + wdma_clr(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN); + wdma_clr(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY), + 0, 10000, false, dev, MTK_WDMA_WRBK_TX_CFG)) + dev_err(dev->hw->dev, "tx reset failed\n"); + + if (read_poll_timeout(wdma_r32, status, + !(status & MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY), + 0, 10000, false, dev, MTK_WDMA_WRBK_RX_CFG)) + dev_err(dev->hw->dev, "tx reset failed\n"); + + /* prefetch FIFO */ + wdma_w32(dev, MTK_WDMA_PREF_TX_FIFO_CFG, + MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR | + MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR); + wdma_clr(dev, MTK_WDMA_PREF_TX_FIFO_CFG, + MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR | + MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR); + + /* core FIFO */ + wdma_w32(dev, MTK_WDMA_XDMA_TX_FIFO_CFG, + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR | + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR | + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR | + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR); + wdma_clr(dev, MTK_WDMA_XDMA_TX_FIFO_CFG, + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR | + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR | + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR | + MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR); + + /* writeback FIFO */ + wdma_w32(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(0), + MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR); + wdma_w32(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(1), + MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR); + + wdma_clr(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(0), + MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR); + wdma_clr(dev, MTK_WDMA_WRBK_TX_FIFO_CFG(1), + MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR); + + /* prefetch ring status */ + wdma_w32(dev, MTK_WDMA_PREF_SIDX_CFG, + MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR); + wdma_clr(dev, MTK_WDMA_PREF_SIDX_CFG, + MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR); + + /* writeback ring status */ + wdma_w32(dev, MTK_WDMA_WRBK_SIDX_CFG, + MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR); + wdma_clr(dev, MTK_WDMA_WRBK_SIDX_CFG, + MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR); +} + static void mtk_wdma_tx_reset(struct mtk_wed_device *dev) { @@ -146,6 +366,7 @@ mtk_wdma_tx_reset(struct mtk_wed_device *dev) !(status & mask), 0, 10000)) dev_err(dev->hw->dev, "tx reset failed\n"); + mtk_wdma_v3_tx_reset(dev); wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_TX); wdma_w32(dev, MTK_WDMA_RESET_IDX, 0); @@ -278,7 +499,7 @@ mtk_wed_assign(struct mtk_wed_device *dev) if (!hw->wed_dev) goto out; - if (hw->version == 1) + if (mtk_wed_is_v1(hw)) return NULL; /* MT7986 WED devices do not have any pcie slot restrictions */ @@ -298,35 +519,152 @@ out: } static int +mtk_wed_amsdu_buffer_alloc(struct mtk_wed_device *dev) +{ + struct mtk_wed_hw *hw = dev->hw; + struct mtk_wed_amsdu *wed_amsdu; + int i; + + if (!mtk_wed_is_v3_or_greater(hw)) + return 0; + + wed_amsdu = devm_kcalloc(hw->dev, MTK_WED_AMSDU_NPAGES, + sizeof(*wed_amsdu), GFP_KERNEL); + if (!wed_amsdu) + return -ENOMEM; + + for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) { + void *ptr; + + /* each segment is 64K */ + ptr = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | + __GFP_ZERO | __GFP_COMP | + GFP_DMA32, + get_order(MTK_WED_AMSDU_BUF_SIZE)); + if (!ptr) + goto error; + + wed_amsdu[i].txd = ptr; + wed_amsdu[i].txd_phy = dma_map_single(hw->dev, ptr, + MTK_WED_AMSDU_BUF_SIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(hw->dev, wed_amsdu[i].txd_phy)) + goto error; + } + dev->hw->wed_amsdu = wed_amsdu; + + return 0; + +error: + for (i--; i >= 0; i--) + dma_unmap_single(hw->dev, wed_amsdu[i].txd_phy, + MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE); + return -ENOMEM; +} + +static void +mtk_wed_amsdu_free_buffer(struct mtk_wed_device *dev) +{ + struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu; + int i; + + if (!wed_amsdu) + return; + + for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) { + dma_unmap_single(dev->hw->dev, wed_amsdu[i].txd_phy, + MTK_WED_AMSDU_BUF_SIZE, DMA_TO_DEVICE); + free_pages((unsigned long)wed_amsdu[i].txd, + get_order(MTK_WED_AMSDU_BUF_SIZE)); + } +} + +static int +mtk_wed_amsdu_init(struct mtk_wed_device *dev) +{ + struct mtk_wed_amsdu *wed_amsdu = dev->hw->wed_amsdu; + int i, ret; + + if (!wed_amsdu) + return 0; + + for (i = 0; i < MTK_WED_AMSDU_NPAGES; i++) + wed_w32(dev, MTK_WED_AMSDU_HIFTXD_BASE_L(i), + wed_amsdu[i].txd_phy); + + /* init all sta parameter */ + wed_w32(dev, MTK_WED_AMSDU_STA_INFO_INIT, MTK_WED_AMSDU_STA_RMVL | + MTK_WED_AMSDU_STA_WTBL_HDRT_MODE | + FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_LEN, + dev->wlan.amsdu_max_len >> 8) | + FIELD_PREP(MTK_WED_AMSDU_STA_MAX_AMSDU_NUM, + dev->wlan.amsdu_max_subframes)); + + wed_w32(dev, MTK_WED_AMSDU_STA_INFO, MTK_WED_AMSDU_STA_INFO_DO_INIT); + + ret = mtk_wed_poll_busy(dev, MTK_WED_AMSDU_STA_INFO, + MTK_WED_AMSDU_STA_INFO_DO_INIT); + if (ret) { + dev_err(dev->hw->dev, "amsdu initialization failed\n"); + return ret; + } + + /* init partial amsdu offload txd src */ + wed_set(dev, MTK_WED_AMSDU_HIFTXD_CFG, + FIELD_PREP(MTK_WED_AMSDU_HIFTXD_SRC, dev->hw->index)); + + /* init qmem */ + wed_set(dev, MTK_WED_AMSDU_PSE, MTK_WED_AMSDU_PSE_RESET); + ret = mtk_wed_poll_busy(dev, MTK_WED_MON_AMSDU_QMEM_STS1, BIT(29)); + if (ret) { + pr_info("%s: amsdu qmem initialization failed\n", __func__); + return ret; + } + + /* eagle E1 PCIE1 tx ring 22 flow control issue */ + if (dev->wlan.id == 0x7991) + wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING); + + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); + + return 0; +} + +static int mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) { - struct mtk_wdma_desc *desc; - dma_addr_t desc_phys; - void **page_list; + u32 desc_size = dev->hw->soc->tx_ring_desc_size; + int i, page_idx = 0, n_pages, ring_size; int token = dev->wlan.token_start; - int ring_size; - int n_pages; - int i, page_idx; + struct mtk_wed_buf *page_list; + dma_addr_t desc_phys; + void *desc_ptr; - ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); - n_pages = ring_size / MTK_WED_BUF_PER_PAGE; + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); + dev->tx_buf_ring.size = ring_size; + } else { + dev->tx_buf_ring.size = MTK_WED_TX_BM_DMA_SIZE; + ring_size = MTK_WED_TX_BM_PKT_CNT; + } + n_pages = dev->tx_buf_ring.size / MTK_WED_BUF_PER_PAGE; page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL); if (!page_list) return -ENOMEM; - dev->tx_buf_ring.size = ring_size; dev->tx_buf_ring.pages = page_list; - desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc), - &desc_phys, GFP_KERNEL); - if (!desc) + desc_ptr = dma_alloc_coherent(dev->hw->dev, + dev->tx_buf_ring.size * desc_size, + &desc_phys, GFP_KERNEL); + if (!desc_ptr) return -ENOMEM; - dev->tx_buf_ring.desc = desc; + dev->tx_buf_ring.desc = desc_ptr; dev->tx_buf_ring.desc_phys = desc_phys; - for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) { + for (i = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) { dma_addr_t page_phys, buf_phys; struct page *page; void *buf; @@ -343,7 +681,8 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) return -ENOMEM; } - page_list[page_idx++] = page; + page_list[page_idx].p = page; + page_list[page_idx++].phy_addr = page_phys; dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -351,28 +690,31 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) buf_phys = page_phys; for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) { - u32 txd_size; - u32 ctrl; - - txd_size = dev->wlan.init_buf(buf, buf_phys, token++); + struct mtk_wdma_desc *desc = desc_ptr; desc->buf0 = cpu_to_le32(buf_phys); - desc->buf1 = cpu_to_le32(buf_phys + txd_size); - - if (dev->hw->version == 1) - ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) | - FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1, - MTK_WED_BUF_SIZE - txd_size) | - MTK_WDMA_DESC_CTRL_LAST_SEG1; - else - ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size) | - FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2, - MTK_WED_BUF_SIZE - txd_size) | - MTK_WDMA_DESC_CTRL_LAST_SEG0; - desc->ctrl = cpu_to_le32(ctrl); - desc->info = 0; - desc++; - + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + u32 txd_size, ctrl; + + txd_size = dev->wlan.init_buf(buf, buf_phys, + token++); + desc->buf1 = cpu_to_le32(buf_phys + txd_size); + ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0, txd_size); + if (mtk_wed_is_v1(dev->hw)) + ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG1 | + FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1, + MTK_WED_BUF_SIZE - txd_size); + else + ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG0 | + FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2, + MTK_WED_BUF_SIZE - txd_size); + desc->ctrl = cpu_to_le32(ctrl); + desc->info = 0; + } else { + desc->ctrl = cpu_to_le32(token << 16); + } + + desc_ptr += desc_size; buf += MTK_WED_BUF_SIZE; buf_phys += MTK_WED_BUF_SIZE; } @@ -387,42 +729,103 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) static void mtk_wed_free_tx_buffer(struct mtk_wed_device *dev) { - struct mtk_wdma_desc *desc = dev->tx_buf_ring.desc; - void **page_list = dev->tx_buf_ring.pages; - int page_idx; - int i; + struct mtk_wed_buf *page_list = dev->tx_buf_ring.pages; + struct mtk_wed_hw *hw = dev->hw; + int i, page_idx = 0; if (!page_list) return; - if (!desc) + if (!dev->tx_buf_ring.desc) goto free_pagelist; - for (i = 0, page_idx = 0; i < dev->tx_buf_ring.size; - i += MTK_WED_BUF_PER_PAGE) { - void *page = page_list[page_idx++]; - dma_addr_t buf_addr; + for (i = 0; i < dev->tx_buf_ring.size; i += MTK_WED_BUF_PER_PAGE) { + dma_addr_t page_phy = page_list[page_idx].phy_addr; + void *page = page_list[page_idx++].p; if (!page) break; - buf_addr = le32_to_cpu(desc[i].buf0); - dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE, + dma_unmap_page(dev->hw->dev, page_phy, PAGE_SIZE, DMA_BIDIRECTIONAL); __free_page(page); } - dma_free_coherent(dev->hw->dev, dev->tx_buf_ring.size * sizeof(*desc), - desc, dev->tx_buf_ring.desc_phys); + dma_free_coherent(dev->hw->dev, + dev->tx_buf_ring.size * hw->soc->tx_ring_desc_size, + dev->tx_buf_ring.desc, + dev->tx_buf_ring.desc_phys); free_pagelist: kfree(page_list); } static int +mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev) +{ + int n_pages = MTK_WED_RX_PG_BM_CNT / MTK_WED_RX_BUF_PER_PAGE; + struct mtk_wed_buf *page_list; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + int i, page_idx = 0; + + if (!dev->wlan.hw_rro) + return 0; + + page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + dev->hw_rro.size = dev->wlan.rx_nbuf & ~(MTK_WED_BUF_PER_PAGE - 1); + dev->hw_rro.pages = page_list; + desc = dma_alloc_coherent(dev->hw->dev, + dev->wlan.rx_nbuf * sizeof(*desc), + &desc_phys, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + dev->hw_rro.desc = desc; + dev->hw_rro.desc_phys = desc_phys; + + for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) { + dma_addr_t page_phys, buf_phys; + struct page *page; + int s; + + page = __dev_alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev->hw->dev, page_phys)) { + __free_page(page); + return -ENOMEM; + } + + page_list[page_idx].p = page; + page_list[page_idx++].phy_addr = page_phys; + dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + buf_phys = page_phys; + for (s = 0; s < MTK_WED_RX_BUF_PER_PAGE; s++) { + desc->buf0 = cpu_to_le32(buf_phys); + buf_phys += MTK_WED_PAGE_BUF_SIZE; + desc++; + } + + dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + + return 0; +} + +static int mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev) { - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; dev->rx_buf_ring.size = dev->wlan.rx_nbuf; @@ -436,13 +839,48 @@ mtk_wed_rx_buffer_alloc(struct mtk_wed_device *dev) dev->rx_buf_ring.desc_phys = desc_phys; dev->wlan.init_rx_buf(dev, dev->wlan.rx_npkt); - return 0; + return mtk_wed_hwrro_buffer_alloc(dev); +} + +static void +mtk_wed_hwrro_free_buffer(struct mtk_wed_device *dev) +{ + struct mtk_wed_buf *page_list = dev->hw_rro.pages; + struct mtk_wed_bm_desc *desc = dev->hw_rro.desc; + int i, page_idx = 0; + + if (!dev->wlan.hw_rro) + return; + + if (!page_list) + return; + + if (!desc) + goto free_pagelist; + + for (i = 0; i < MTK_WED_RX_PG_BM_CNT; i += MTK_WED_RX_BUF_PER_PAGE) { + dma_addr_t buf_addr = page_list[page_idx].phy_addr; + void *page = page_list[page_idx++].p; + + if (!page) + break; + + dma_unmap_page(dev->hw->dev, buf_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + __free_page(page); + } + + dma_free_coherent(dev->hw->dev, dev->hw_rro.size * sizeof(*desc), + desc, dev->hw_rro.desc_phys); + +free_pagelist: + kfree(page_list); } static void mtk_wed_free_rx_buffer(struct mtk_wed_device *dev) { - struct mtk_rxbm_desc *desc = dev->rx_buf_ring.desc; + struct mtk_wed_bm_desc *desc = dev->rx_buf_ring.desc; if (!desc) return; @@ -450,6 +888,28 @@ mtk_wed_free_rx_buffer(struct mtk_wed_device *dev) dev->wlan.release_rx_buf(dev); dma_free_coherent(dev->hw->dev, dev->rx_buf_ring.size * sizeof(*desc), desc, dev->rx_buf_ring.desc_phys); + + mtk_wed_hwrro_free_buffer(dev); +} + +static void +mtk_wed_hwrro_init(struct mtk_wed_device *dev) +{ + if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro) + return; + + wed_set(dev, MTK_WED_RRO_PG_BM_RX_DMAM, + FIELD_PREP(MTK_WED_RRO_PG_BM_RX_SDL0, 128)); + + wed_w32(dev, MTK_WED_RRO_PG_BM_BASE, dev->hw_rro.desc_phys); + + wed_w32(dev, MTK_WED_RRO_PG_BM_INIT_PTR, + MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX | + FIELD_PREP(MTK_WED_RRO_PG_BM_SW_TAIL_IDX, + MTK_WED_RX_PG_BM_CNT)); + + /* enable rx_page_bm to fetch dmad */ + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_PG_BM_EN); } static void @@ -463,6 +923,8 @@ mtk_wed_rx_buffer_hw_init(struct mtk_wed_device *dev) wed_w32(dev, MTK_WED_RX_BM_DYN_ALLOC_TH, FIELD_PREP(MTK_WED_RX_BM_DYN_ALLOC_TH_H, 0xffff)); wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_BM_EN); + + mtk_wed_hwrro_init(dev); } static void @@ -498,13 +960,23 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en) { u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; - if (dev->hw->version == 1) + switch (dev->hw->version) { + case 1: mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR; - else + break; + case 2: mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH | MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH | MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR; + break; + case 3: + mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | + MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; + break; + default: + break; + } if (!dev->hw->num_flows) mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; @@ -516,6 +988,9 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en) static void mtk_wed_set_512_support(struct mtk_wed_device *dev, bool enable) { + if (!mtk_wed_is_v2(dev->hw)) + return; + if (enable) { wed_w32(dev, MTK_WED_TXDP_CTRL, MTK_WED_TXDP_DW9_OVERWR); wed_w32(dev, MTK_WED_TXP_DW1, @@ -527,22 +1002,15 @@ mtk_wed_set_512_support(struct mtk_wed_device *dev, bool enable) } } -#define MTK_WFMDA_RX_DMA_EN BIT(2) -static void -mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, int idx) +static int +mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, + struct mtk_wed_ring *ring) { - u32 val; int i; - if (!(dev->rx_ring[idx].flags & MTK_WED_RING_CONFIGURED)) - return; /* queue is not configured by mt76 */ - for (i = 0; i < 3; i++) { - u32 cur_idx; + u32 cur_idx = readl(ring->wpdma + MTK_WED_RING_OFS_CPU_IDX); - cur_idx = wed_r32(dev, - MTK_WED_WPDMA_RING_RX_DATA(idx) + - MTK_WED_RING_OFS_CPU_IDX); if (cur_idx == MTK_WED_RX_RING_SIZE - 1) break; @@ -551,12 +1019,10 @@ mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, int idx) if (i == 3) { dev_err(dev->hw->dev, "rx dma enable failed\n"); - return; + return -ETIMEDOUT; } - val = wifi_r32(dev, dev->wlan.wpdma_rx_glo - dev->wlan.phy_base) | - MTK_WFMDA_RX_DMA_EN; - wifi_w32(dev, dev->wlan.wpdma_rx_glo - dev->wlan.phy_base, val); + return 0; } static void @@ -577,7 +1043,7 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev) MTK_WDMA_GLO_CFG_RX_INFO1_PRERES | MTK_WDMA_GLO_CFG_RX_INFO2_PRERES); - if (dev->hw->version == 1) { + if (mtk_wed_is_v1(dev->hw)) { regmap_write(dev->hw->mirror, dev->hw->index * 4, 0); wdma_clr(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO3_PRERES); @@ -590,6 +1056,14 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev) MTK_WED_WPDMA_RX_D_RX_DRV_EN); wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK); + + if (mtk_wed_is_v3_or_greater(dev->hw) && + mtk_wed_get_rx_capa(dev)) { + wdma_clr(dev, MTK_WDMA_PREF_TX_CFG, + MTK_WDMA_PREF_TX_CFG_PREF_EN); + wdma_clr(dev, MTK_WDMA_PREF_RX_CFG, + MTK_WDMA_PREF_RX_CFG_PREF_EN); + } } mtk_wed_set_512_support(dev, false); @@ -606,7 +1080,7 @@ mtk_wed_stop(struct mtk_wed_device *dev) wdma_w32(dev, MTK_WDMA_INT_GRP2, 0); wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0); - if (dev->hw->version == 1) + if (!mtk_wed_get_rx_capa(dev)) return; wed_w32(dev, MTK_WED_EXT_INT_MASK1, 0); @@ -625,13 +1099,21 @@ mtk_wed_deinit(struct mtk_wed_device *dev) MTK_WED_CTRL_WED_TX_BM_EN | MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); - if (dev->hw->version == 1) + if (mtk_wed_is_v1(dev->hw)) return; wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN | MTK_WED_CTRL_WED_RX_BM_EN | MTK_WED_CTRL_RX_RRO_QM_EN); + + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); + wed_clr(dev, MTK_WED_RESET, MTK_WED_RESET_TX_AMSDU); + wed_clr(dev, MTK_WED_PCIE_INT_CTRL, + MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA | + MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER); + } } static void @@ -643,6 +1125,7 @@ __mtk_wed_detach(struct mtk_wed_device *dev) mtk_wdma_rx_reset(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); + mtk_wed_amsdu_free_buffer(dev); mtk_wed_free_tx_buffer(dev); mtk_wed_free_tx_rings(dev); @@ -681,21 +1164,37 @@ mtk_wed_detach(struct mtk_wed_device *dev) mutex_unlock(&hw_lock); } -#define PCIE_BASE_ADDR0 0x11280000 static void mtk_wed_bus_init(struct mtk_wed_device *dev) { switch (dev->wlan.bus_type) { case MTK_WED_BUS_PCIE: { struct device_node *np = dev->hw->eth->dev->of_node; - struct regmap *regs; - regs = syscon_regmap_lookup_by_phandle(np, - "mediatek,wed-pcie"); - if (IS_ERR(regs)) - break; + if (mtk_wed_is_v2(dev->hw)) { + struct regmap *regs; + + regs = syscon_regmap_lookup_by_phandle(np, + "mediatek,wed-pcie"); + if (IS_ERR(regs)) + break; - regmap_update_bits(regs, 0, BIT(0), BIT(0)); + regmap_update_bits(regs, 0, BIT(0), BIT(0)); + } + + if (dev->wlan.msi) { + wed_w32(dev, MTK_WED_PCIE_CFG_INTM, + dev->hw->pcie_base | 0xc08); + wed_w32(dev, MTK_WED_PCIE_CFG_BASE, + dev->hw->pcie_base | 0xc04); + wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(8)); + } else { + wed_w32(dev, MTK_WED_PCIE_CFG_INTM, + dev->hw->pcie_base | 0x180); + wed_w32(dev, MTK_WED_PCIE_CFG_BASE, + dev->hw->pcie_base | 0x184); + wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24)); + } wed_w32(dev, MTK_WED_PCIE_INT_CTRL, FIELD_PREP(MTK_WED_PCIE_INT_CTRL_POLL_EN, 2)); @@ -703,19 +1202,9 @@ mtk_wed_bus_init(struct mtk_wed_device *dev) /* pcie interrupt control: pola/source selection */ wed_set(dev, MTK_WED_PCIE_INT_CTRL, MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA | - FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL, 1)); - wed_r32(dev, MTK_WED_PCIE_INT_CTRL); - - wed_w32(dev, MTK_WED_PCIE_CFG_INTM, PCIE_BASE_ADDR0 | 0x180); - wed_w32(dev, MTK_WED_PCIE_CFG_BASE, PCIE_BASE_ADDR0 | 0x184); - - /* pcie interrupt status trigger register */ - wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24)); - wed_r32(dev, MTK_WED_PCIE_INT_TRIGGER); - - /* pola setting */ - wed_set(dev, MTK_WED_PCIE_INT_CTRL, - MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA); + MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER | + FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL, + dev->hw->index)); break; } case MTK_WED_BUS_AXI: @@ -731,38 +1220,55 @@ mtk_wed_bus_init(struct mtk_wed_device *dev) static void mtk_wed_set_wpdma(struct mtk_wed_device *dev) { - if (dev->hw->version == 1) { - wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys); - } else { - mtk_wed_bus_init(dev); + int i; - wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_int); - wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK, dev->wlan.wpdma_mask); - wed_w32(dev, MTK_WED_WPDMA_CFG_TX, dev->wlan.wpdma_tx); - wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE, dev->wlan.wpdma_txfree); - wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo); - wed_w32(dev, MTK_WED_WPDMA_RX_RING, dev->wlan.wpdma_rx); + if (mtk_wed_is_v1(dev->hw)) { + wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys); + return; } + + mtk_wed_bus_init(dev); + + wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_int); + wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK, dev->wlan.wpdma_mask); + wed_w32(dev, MTK_WED_WPDMA_CFG_TX, dev->wlan.wpdma_tx); + wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE, dev->wlan.wpdma_txfree); + + if (!mtk_wed_get_rx_capa(dev)) + return; + + wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo); + wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx); + + if (!dev->wlan.hw_rro) + return; + + wed_w32(dev, MTK_WED_RRO_RX_D_CFG(0), dev->wlan.wpdma_rx_rro[0]); + wed_w32(dev, MTK_WED_RRO_RX_D_CFG(1), dev->wlan.wpdma_rx_rro[1]); + for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) + wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING_CFG(i), + dev->wlan.wpdma_rx_pg + i * 0x10); } static void mtk_wed_hw_init_early(struct mtk_wed_device *dev) { - u32 mask, set; + u32 set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2); + u32 mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE; mtk_wed_deinit(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); mtk_wed_set_wpdma(dev); - mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE | - MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE | - MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE; - set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) | - MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP | - MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY; + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + mask |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE | + MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE; + set |= MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP | + MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY; + } wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set); - if (dev->hw->version == 1) { + if (mtk_wed_is_v1(dev->hw)) { u32 offset = dev->hw->index ? 0x04000400 : 0; wdma_set(dev, MTK_WDMA_GLO_CFG, @@ -907,11 +1413,18 @@ mtk_wed_route_qm_hw_init(struct mtk_wed_device *dev) } /* configure RX_ROUTE_QM */ - wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); - wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT); - wed_set(dev, MTK_WED_RTQM_GLO_CFG, - FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT, 0x3 + dev->hw->index)); - wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); + if (mtk_wed_is_v2(dev->hw)) { + wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); + wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT); + wed_set(dev, MTK_WED_RTQM_GLO_CFG, + FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT, + 0x3 + dev->hw->index)); + wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); + } else { + wed_set(dev, MTK_WED_RTQM_ENQ_CFG0, + FIELD_PREP(MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT, + 0x3 + dev->hw->index)); + } /* enable RX_ROUTE_QM */ wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN); } @@ -924,34 +1437,30 @@ mtk_wed_hw_init(struct mtk_wed_device *dev) dev->init_done = true; mtk_wed_set_ext_int(dev, false); - wed_w32(dev, MTK_WED_TX_BM_CTRL, - MTK_WED_TX_BM_CTRL_PAUSE | - FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, - dev->tx_buf_ring.size / 128) | - FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, - MTK_WED_TX_RING_SIZE / 256)); wed_w32(dev, MTK_WED_TX_BM_BASE, dev->tx_buf_ring.desc_phys); - wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE); - if (dev->hw->version == 1) { - wed_w32(dev, MTK_WED_TX_BM_TKID, - FIELD_PREP(MTK_WED_TX_BM_TKID_START, - dev->wlan.token_start) | - FIELD_PREP(MTK_WED_TX_BM_TKID_END, - dev->wlan.token_start + - dev->wlan.nbuf - 1)); + if (mtk_wed_is_v1(dev->hw)) { + wed_w32(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, + dev->tx_buf_ring.size / 128) | + FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, + MTK_WED_TX_RING_SIZE / 256)); wed_w32(dev, MTK_WED_TX_BM_DYN_THR, FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) | MTK_WED_TX_BM_DYN_THR_HI); - } else { - wed_w32(dev, MTK_WED_TX_BM_TKID_V2, - FIELD_PREP(MTK_WED_TX_BM_TKID_START, - dev->wlan.token_start) | - FIELD_PREP(MTK_WED_TX_BM_TKID_END, - dev->wlan.token_start + - dev->wlan.nbuf - 1)); + } else if (mtk_wed_is_v2(dev->hw)) { + wed_w32(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, + dev->tx_buf_ring.size / 128) | + FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, + MTK_WED_TX_RING_SIZE / 256)); + wed_w32(dev, MTK_WED_TX_TKID_DYN_THR, + FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) | + MTK_WED_TX_TKID_DYN_THR_HI); wed_w32(dev, MTK_WED_TX_BM_DYN_THR, FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO_V2, 0) | MTK_WED_TX_BM_DYN_THR_HI_V2); @@ -961,31 +1470,71 @@ mtk_wed_hw_init(struct mtk_wed_device *dev) dev->tx_buf_ring.size / 128) | FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM, dev->tx_buf_ring.size / 128)); - wed_w32(dev, MTK_WED_TX_TKID_DYN_THR, - FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) | - MTK_WED_TX_TKID_DYN_THR_HI); } + wed_w32(dev, dev->hw->soc->regmap.tx_bm_tkid, + FIELD_PREP(MTK_WED_TX_BM_TKID_START, dev->wlan.token_start) | + FIELD_PREP(MTK_WED_TX_BM_TKID_END, + dev->wlan.token_start + dev->wlan.nbuf - 1)); + mtk_wed_reset(dev, MTK_WED_RESET_TX_BM); - if (dev->hw->version == 1) { + if (mtk_wed_is_v3_or_greater(dev->hw)) { + /* switch to new bm architecture */ + wed_clr(dev, MTK_WED_TX_BM_CTRL, + MTK_WED_TX_BM_CTRL_LEGACY_EN); + + wed_w32(dev, MTK_WED_TX_TKID_CTRL, + MTK_WED_TX_TKID_CTRL_PAUSE | + FIELD_PREP(MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3, + dev->wlan.nbuf / 128) | + FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3, + dev->wlan.nbuf / 128)); + /* return SKBID + SDP back to bm */ + wed_set(dev, MTK_WED_TX_TKID_CTRL, + MTK_WED_TX_TKID_CTRL_FREE_FORMAT); + + wed_w32(dev, MTK_WED_TX_BM_INIT_PTR, + MTK_WED_TX_BM_PKT_CNT | + MTK_WED_TX_BM_INIT_SW_TAIL_IDX); + } + + if (mtk_wed_is_v1(dev->hw)) { wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_TX_BM_EN | MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); - } else { - wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE); + } else if (mtk_wed_get_rx_capa(dev)) { /* rx hw init */ wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, MTK_WED_WPDMA_RX_D_RST_CRX_IDX | MTK_WED_WPDMA_RX_D_RST_DRV_IDX); wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, 0); + /* reset prefetch index of ring */ + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX, + MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR); + + /* reset prefetch FIFO of ring */ + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG, + MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR | + MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR); + wed_w32(dev, MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG, 0); + mtk_wed_rx_buffer_hw_init(dev); mtk_wed_rro_hw_init(dev); mtk_wed_route_qm_hw_init(dev); } wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE); + if (!mtk_wed_is_v1(dev->hw)) + wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE); } static void @@ -1008,23 +1557,6 @@ mtk_wed_ring_reset(struct mtk_wed_ring *ring, int size, bool tx) } } -static u32 -mtk_wed_check_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) -{ - return !!(wed_r32(dev, reg) & mask); -} - -static int -mtk_wed_poll_busy(struct mtk_wed_device *dev, u32 reg, u32 mask) -{ - int sleep = 15000; - int timeout = 100 * sleep; - u32 val; - - return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep, - timeout, false, dev, reg, mask); -} - static int mtk_wed_rx_reset(struct mtk_wed_device *dev) { @@ -1038,13 +1570,33 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev) if (ret) return ret; + if (dev->wlan.hw_rro) { + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_IND_CMD_EN); + mtk_wed_poll_busy(dev, MTK_WED_RRO_RX_HW_STS, + MTK_WED_RX_IND_CMD_BUSY); + mtk_wed_reset(dev, MTK_WED_RESET_RRO_RX_TO_PG); + } + wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RX_DRV_EN); ret = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RX_DRV_BUSY); + if (!ret && mtk_wed_is_v3_or_greater(dev->hw)) + ret = mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_PREF_CFG, + MTK_WED_WPDMA_RX_D_PREF_BUSY); if (ret) { mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT); mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_D_DRV); } else { + if (mtk_wed_is_v3_or_greater(dev->hw)) { + /* 1.a. disable prefetch HW */ + wed_clr(dev, MTK_WED_WPDMA_RX_D_PREF_CFG, + MTK_WED_WPDMA_RX_D_PREF_EN); + mtk_wed_poll_busy(dev, MTK_WED_WPDMA_RX_D_PREF_CFG, + MTK_WED_WPDMA_RX_D_PREF_BUSY); + wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, + MTK_WED_WPDMA_RX_D_RST_DRV_IDX_ALL); + } + wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, MTK_WED_WPDMA_RX_D_RST_CRX_IDX | MTK_WED_WPDMA_RX_D_RST_DRV_IDX); @@ -1072,23 +1624,52 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev) wed_w32(dev, MTK_WED_RROQM_RST_IDX, 0); } + if (dev->wlan.hw_rro) { + /* disable rro msdu page drv */ + wed_clr(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_EN); + + /* disable rro data drv */ + wed_clr(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_RX_D_DRV_EN); + + /* rro msdu page drv reset */ + wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_CLR); + mtk_wed_poll_busy(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_CLR); + + /* rro data drv reset */ + wed_w32(dev, MTK_WED_RRO_RX_D_CFG(2), + MTK_WED_RRO_RX_D_DRV_CLR); + mtk_wed_poll_busy(dev, MTK_WED_RRO_RX_D_CFG(2), + MTK_WED_RRO_RX_D_DRV_CLR); + } + /* reset route qm */ wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN); ret = mtk_wed_poll_busy(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_BUSY); - if (ret) + if (ret) { mtk_wed_reset(dev, MTK_WED_RESET_RX_ROUTE_QM); - else - wed_set(dev, MTK_WED_RTQM_GLO_CFG, - MTK_WED_RTQM_Q_RST); + } else if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_set(dev, MTK_WED_RTQM_RST, BIT(0)); + wed_clr(dev, MTK_WED_RTQM_RST, BIT(0)); + mtk_wed_reset(dev, MTK_WED_RESET_RX_ROUTE_QM); + } else { + wed_set(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST); + } /* reset tx wdma */ mtk_wdma_tx_reset(dev); /* reset tx wdma drv */ wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_TX_DRV_EN); - mtk_wed_poll_busy(dev, MTK_WED_CTRL, - MTK_WED_CTRL_WDMA_INT_AGENT_BUSY); + if (mtk_wed_is_v3_or_greater(dev->hw)) + mtk_wed_poll_busy(dev, MTK_WED_WPDMA_STATUS, + MTK_WED_WPDMA_STATUS_TX_DRV); + else + mtk_wed_poll_busy(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WDMA_INT_AGENT_BUSY); mtk_wed_reset(dev, MTK_WED_RESET_WDMA_TX_DRV); /* reset wed rx dma */ @@ -1098,13 +1679,8 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev) if (ret) { mtk_wed_reset(dev, MTK_WED_RESET_WED_RX_DMA); } else { - struct mtk_eth *eth = dev->hw->eth; - - if (mtk_is_netsys_v2_or_greater(eth)) - wed_set(dev, MTK_WED_RESET_IDX, - MTK_WED_RESET_IDX_RX_V2); - else - wed_set(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_RX); + wed_set(dev, MTK_WED_RESET_IDX, + dev->hw->soc->regmap.reset_idx_rx_mask); wed_w32(dev, MTK_WED_RESET_IDX, 0); } @@ -1114,6 +1690,14 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev) MTK_WED_CTRL_WED_RX_BM_BUSY); mtk_wed_reset(dev, MTK_WED_RESET_RX_BM); + if (dev->wlan.hw_rro) { + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_PG_BM_EN); + mtk_wed_poll_busy(dev, MTK_WED_CTRL, + MTK_WED_CTRL_WED_RX_PG_BM_BUSY); + wed_set(dev, MTK_WED_RESET, MTK_WED_RESET_RX_PG_BM); + wed_clr(dev, MTK_WED_RESET, MTK_WED_RESET_RX_PG_BM); + } + /* wo change to enable state */ val = MTK_WED_WO_STATE_ENABLE; ret = mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, @@ -1131,6 +1715,7 @@ mtk_wed_rx_reset(struct mtk_wed_device *dev) false); } mtk_wed_free_rx_buffer(dev); + mtk_wed_hwrro_free_buffer(dev); return 0; } @@ -1157,21 +1742,48 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) if (busy) { mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA); } else { - wed_w32(dev, MTK_WED_RESET_IDX, MTK_WED_RESET_IDX_TX); + wed_w32(dev, MTK_WED_RESET_IDX, + dev->hw->soc->regmap.reset_idx_tx_mask); wed_w32(dev, MTK_WED_RESET_IDX, 0); } /* 2. reset WDMA rx DMA */ busy = !!mtk_wdma_rx_reset(dev); - wed_clr(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); + if (mtk_wed_is_v3_or_greater(dev->hw)) { + val = MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE | + wed_r32(dev, MTK_WED_WDMA_GLO_CFG); + val &= ~MTK_WED_WDMA_GLO_CFG_RX_DRV_EN; + wed_w32(dev, MTK_WED_WDMA_GLO_CFG, val); + } else { + wed_clr(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); + } + if (!busy) busy = mtk_wed_poll_busy(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY); + if (!busy && mtk_wed_is_v3_or_greater(dev->hw)) + busy = mtk_wed_poll_busy(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_BUSY); if (busy) { mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT); mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV); } else { + if (mtk_wed_is_v3_or_greater(dev->hw)) { + /* 1.a. disable prefetch HW */ + wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_EN); + mtk_wed_poll_busy(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_BUSY); + wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_DDONE2_EN); + + /* 2. Reset dma index */ + wed_w32(dev, MTK_WED_WDMA_RESET_IDX, + MTK_WED_WDMA_RESET_IDX_RX_ALL); + } + wed_w32(dev, MTK_WED_WDMA_RESET_IDX, MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV); wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0); @@ -1187,8 +1799,13 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); for (i = 0; i < 100; i++) { - val = wed_r32(dev, MTK_WED_TX_BM_INTF); - if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40) + if (mtk_wed_is_v1(dev->hw)) + val = FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, + wed_r32(dev, MTK_WED_TX_BM_INTF)); + else + val = FIELD_GET(MTK_WED_TX_TKID_INTF_TKFIFO_FDEP, + wed_r32(dev, MTK_WED_TX_TKID_INTF)); + if (val == 0x40) break; } @@ -1210,6 +1827,8 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT); mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV); mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV); + if (mtk_wed_is_v3_or_greater(dev->hw)) + wed_w32(dev, MTK_WED_RX1_CTRL2, 0); } else { wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, MTK_WED_WPDMA_RESET_IDX_TX | @@ -1218,7 +1837,7 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) } dev->init_done = false; - if (dev->hw->version == 1) + if (mtk_wed_is_v1(dev->hw)) return; if (!busy) { @@ -1226,7 +1845,14 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev) wed_w32(dev, MTK_WED_RESET_IDX, 0); } - mtk_wed_rx_reset(dev); + if (mtk_wed_is_v3_or_greater(dev->hw)) { + /* reset amsdu engine */ + wed_clr(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); + mtk_wed_reset(dev, MTK_WED_RESET_TX_AMSDU); + } + + if (mtk_wed_get_rx_capa(dev)) + mtk_wed_rx_reset(dev); } static int @@ -1249,7 +1875,6 @@ static int mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size, bool reset) { - u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version; struct mtk_wed_ring *wdma; if (idx >= ARRAY_SIZE(dev->rx_wdma)) @@ -1257,7 +1882,7 @@ mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size, wdma = &dev->rx_wdma[idx]; if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, - desc_size, true)) + dev->hw->soc->wdma_desc_size, true)) return -ENOMEM; wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE, @@ -1278,7 +1903,6 @@ static int mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size, bool reset) { - u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version; struct mtk_wed_ring *wdma; if (idx >= ARRAY_SIZE(dev->tx_wdma)) @@ -1286,9 +1910,27 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size, wdma = &dev->tx_wdma[idx]; if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, - desc_size, true)) + dev->hw->soc->wdma_desc_size, true)) return -ENOMEM; + if (mtk_wed_is_v3_or_greater(dev->hw)) { + struct mtk_wdma_desc *desc = wdma->desc; + int i; + + for (i = 0; i < MTK_WED_WDMA_RING_SIZE; i++) { + desc->buf0 = 0; + desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE); + desc->buf1 = 0; + desc->info = cpu_to_le32(MTK_WDMA_TXD0_DESC_INFO_DMA_DONE); + desc++; + desc->buf0 = 0; + desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE); + desc->buf1 = 0; + desc->info = cpu_to_le32(MTK_WDMA_TXD1_DESC_INFO_DMA_DONE); + desc++; + } + } + wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE, wdma->desc_phys); wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT, @@ -1344,7 +1986,7 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask) MTK_WED_CTRL_WED_TX_BM_EN | MTK_WED_CTRL_WED_TX_FREE_AGENT_EN); - if (dev->hw->version == 1) { + if (mtk_wed_is_v1(dev->hw)) { wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, MTK_WED_PCIE_INT_TRIGGER_STATUS); @@ -1354,8 +1996,9 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask) wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask); } else { - wdma_mask |= FIELD_PREP(MTK_WDMA_INT_MASK_TX_DONE, - GENMASK(1, 0)); + if (mtk_wed_is_v3_or_greater(dev->hw)) + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_TKID_ALI_EN); + /* initail tx interrupt trigger */ wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX, MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN | @@ -1374,15 +2017,20 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask) FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_TRIG, dev->wlan.txfree_tbit)); - wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RX, - MTK_WED_WPDMA_INT_CTRL_RX0_EN | - MTK_WED_WPDMA_INT_CTRL_RX0_CLR | - MTK_WED_WPDMA_INT_CTRL_RX1_EN | - MTK_WED_WPDMA_INT_CTRL_RX1_CLR | - FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG, - dev->wlan.rx_tbit[0]) | - FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG, - dev->wlan.rx_tbit[1])); + if (mtk_wed_get_rx_capa(dev)) { + wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RX, + MTK_WED_WPDMA_INT_CTRL_RX0_EN | + MTK_WED_WPDMA_INT_CTRL_RX0_CLR | + MTK_WED_WPDMA_INT_CTRL_RX1_EN | + MTK_WED_WPDMA_INT_CTRL_RX1_CLR | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG, + dev->wlan.rx_tbit[0]) | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG, + dev->wlan.rx_tbit[1])); + + wdma_mask |= FIELD_PREP(MTK_WDMA_INT_MASK_TX_DONE, + GENMASK(1, 0)); + } wed_w32(dev, MTK_WED_WDMA_INT_CLR, wdma_mask); wed_set(dev, MTK_WED_WDMA_INT_CTRL, @@ -1398,55 +2046,280 @@ mtk_wed_configure_irq(struct mtk_wed_device *dev, u32 irq_mask) wed_w32(dev, MTK_WED_INT_MASK, irq_mask); } +#define MTK_WFMDA_RX_DMA_EN BIT(2) static void mtk_wed_dma_enable(struct mtk_wed_device *dev) { - wed_set(dev, MTK_WED_WPDMA_INT_CTRL, MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV); + int i; + + if (!mtk_wed_is_v3_or_greater(dev->hw)) { + wed_set(dev, MTK_WED_WPDMA_INT_CTRL, + MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV); + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wdma_set(dev, MTK_WDMA_GLO_CFG, + MTK_WDMA_GLO_CFG_TX_DMA_EN | + MTK_WDMA_GLO_CFG_RX_INFO1_PRERES | + MTK_WDMA_GLO_CFG_RX_INFO2_PRERES); + wed_set(dev, MTK_WED_WPDMA_CTRL, MTK_WED_WPDMA_CTRL_SDL1_FIXED); + } else { + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN | + MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR); + wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_TX_DMA_EN); + } wed_set(dev, MTK_WED_GLO_CFG, MTK_WED_GLO_CFG_TX_DMA_EN | MTK_WED_GLO_CFG_RX_DMA_EN); - wed_set(dev, MTK_WED_WPDMA_GLO_CFG, - MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN | - MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN); + wed_set(dev, MTK_WED_WDMA_GLO_CFG, MTK_WED_WDMA_GLO_CFG_RX_DRV_EN); - wdma_set(dev, MTK_WDMA_GLO_CFG, - MTK_WDMA_GLO_CFG_TX_DMA_EN | - MTK_WDMA_GLO_CFG_RX_INFO1_PRERES | - MTK_WDMA_GLO_CFG_RX_INFO2_PRERES); - - if (dev->hw->version == 1) { + if (mtk_wed_is_v1(dev->hw)) { wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO3_PRERES); - } else { - int i; + return; + } - wed_set(dev, MTK_WED_WPDMA_CTRL, - MTK_WED_WPDMA_CTRL_SDL1_FIXED); + wed_set(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC); - wed_set(dev, MTK_WED_WDMA_GLO_CFG, - MTK_WED_WDMA_GLO_CFG_TX_DRV_EN | - MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK); + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, + FIELD_PREP(MTK_WED_WDMA_RX_PREF_BURST_SIZE, 0x10) | + FIELD_PREP(MTK_WED_WDMA_RX_PREF_LOW_THRES, 0x8)); + wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_DDONE2_EN); + wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, MTK_WED_WDMA_RX_PREF_EN); + wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST); wed_set(dev, MTK_WED_WPDMA_GLO_CFG, - MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC | - MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC); + MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK | + MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4); - wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, - MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP | - MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV); + wdma_set(dev, MTK_WDMA_PREF_RX_CFG, MTK_WDMA_PREF_RX_CFG_PREF_EN); + wdma_set(dev, MTK_WDMA_WRBK_RX_CFG, MTK_WDMA_WRBK_RX_CFG_WRBK_EN); + } - wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, - MTK_WED_WPDMA_RX_D_RX_DRV_EN | - FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) | - FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL, - 0x2)); + wed_clr(dev, MTK_WED_WPDMA_GLO_CFG, + MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP | + MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV); + + if (!mtk_wed_get_rx_capa(dev)) + return; + + wed_set(dev, MTK_WED_WDMA_GLO_CFG, + MTK_WED_WDMA_GLO_CFG_TX_DRV_EN | + MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK); + + wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, MTK_WED_WPDMA_RX_D_RXD_READ_LEN); + wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG, + MTK_WED_WPDMA_RX_D_RX_DRV_EN | + FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) | + FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL, 0x2)); + + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_set(dev, MTK_WED_WPDMA_RX_D_PREF_CFG, + MTK_WED_WPDMA_RX_D_PREF_EN | + FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE, 0x10) | + FIELD_PREP(MTK_WED_WPDMA_RX_D_PREF_LOW_THRES, 0x8)); + + wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_RX_D_DRV_EN); + wdma_set(dev, MTK_WDMA_PREF_TX_CFG, MTK_WDMA_PREF_TX_CFG_PREF_EN); + wdma_set(dev, MTK_WDMA_WRBK_TX_CFG, MTK_WDMA_WRBK_TX_CFG_WRBK_EN); + } + + for (i = 0; i < MTK_WED_RX_QUEUES; i++) { + struct mtk_wed_ring *ring = &dev->rx_ring[i]; + u32 val; + + if (!(ring->flags & MTK_WED_RING_CONFIGURED)) + continue; /* queue is not configured by mt76 */ + + if (mtk_wed_check_wfdma_rx_fill(dev, ring)) { + dev_err(dev->hw->dev, + "rx_ring(%d) dma enable failed\n", i); + continue; + } + + val = wifi_r32(dev, + dev->wlan.wpdma_rx_glo - + dev->wlan.phy_base) | MTK_WFMDA_RX_DMA_EN; + wifi_w32(dev, + dev->wlan.wpdma_rx_glo - dev->wlan.phy_base, + val); + } +} + +static void +mtk_wed_start_hw_rro(struct mtk_wed_device *dev, u32 irq_mask, bool reset) +{ + int i; + + wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask); + wed_w32(dev, MTK_WED_INT_MASK, irq_mask); + + if (!mtk_wed_get_rx_capa(dev) || !dev->wlan.hw_rro) + return; + + if (reset) { + wed_set(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_EN); + return; + } + + wed_set(dev, MTK_WED_RRO_RX_D_CFG(2), MTK_WED_RRO_MSDU_PG_DRV_CLR); + wed_w32(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_CLR); + + wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_RX, + MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR | + MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG, + dev->wlan.rro_rx_tbit[0]) | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG, + dev->wlan.rro_rx_tbit[1])); + + wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG, + MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR | + MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR | + MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN | + MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG, + dev->wlan.rx_pg_tbit[0]) | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG, + dev->wlan.rx_pg_tbit[1]) | + FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG, + dev->wlan.rx_pg_tbit[2])); + + /* RRO_MSDU_PG_RING2_CFG1_FLD_DRV_EN should be enabled after + * WM FWDL completed, otherwise RRO_MSDU_PG ring may broken + */ + wed_set(dev, MTK_WED_RRO_MSDU_PG_RING2_CFG, + MTK_WED_RRO_MSDU_PG_DRV_EN); + + for (i = 0; i < MTK_WED_RX_QUEUES; i++) { + struct mtk_wed_ring *ring = &dev->rx_rro_ring[i]; + + if (!(ring->flags & MTK_WED_RING_CONFIGURED)) + continue; + + if (mtk_wed_check_wfdma_rx_fill(dev, ring)) + dev_err(dev->hw->dev, + "rx_rro_ring(%d) initialization failed\n", i); + } + + for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) { + struct mtk_wed_ring *ring = &dev->rx_page_ring[i]; + + if (!(ring->flags & MTK_WED_RING_CONFIGURED)) + continue; + + if (mtk_wed_check_wfdma_rx_fill(dev, ring)) + dev_err(dev->hw->dev, + "rx_page_ring(%d) initialization failed\n", i); + } +} + +static void +mtk_wed_rro_rx_ring_setup(struct mtk_wed_device *dev, int idx, + void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->rx_rro_ring[idx]; + + ring->wpdma = regs; + wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_BASE, + readl(regs)); + wed_w32(dev, MTK_WED_RRO_RX_D_RX(idx) + MTK_WED_RING_OFS_COUNT, + readl(regs + MTK_WED_RING_OFS_COUNT)); + ring->flags |= MTK_WED_RING_CONFIGURED; +} + +static void +mtk_wed_msdu_pg_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->rx_page_ring[idx]; + + ring->wpdma = regs; + wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_BASE, + readl(regs)); + wed_w32(dev, MTK_WED_RRO_MSDU_PG_CTRL0(idx) + MTK_WED_RING_OFS_COUNT, + readl(regs + MTK_WED_RING_OFS_COUNT)); + ring->flags |= MTK_WED_RING_CONFIGURED; +} + +static int +mtk_wed_ind_rx_ring_setup(struct mtk_wed_device *dev, void __iomem *regs) +{ + struct mtk_wed_ring *ring = &dev->ind_cmd_ring; + u32 val = readl(regs + MTK_WED_RING_OFS_COUNT); + int i, count = 0; - for (i = 0; i < MTK_WED_RX_QUEUES; i++) - mtk_wed_check_wfdma_rx_fill(dev, i); + ring->wpdma = regs; + wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_BASE, + readl(regs) & 0xfffffff0); + + wed_w32(dev, MTK_WED_IND_CMD_RX_CTRL1 + MTK_WED_RING_OFS_COUNT, + readl(regs + MTK_WED_RING_OFS_COUNT)); + + /* ack sn cr */ + wed_w32(dev, MTK_WED_RRO_CFG0, dev->wlan.phy_base + + dev->wlan.ind_cmd.ack_sn_addr); + wed_w32(dev, MTK_WED_RRO_CFG1, + FIELD_PREP(MTK_WED_RRO_CFG1_MAX_WIN_SZ, + dev->wlan.ind_cmd.win_size) | + FIELD_PREP(MTK_WED_RRO_CFG1_PARTICL_SE_ID, + dev->wlan.ind_cmd.particular_sid)); + + /* particular session addr element */ + wed_w32(dev, MTK_WED_ADDR_ELEM_CFG0, + dev->wlan.ind_cmd.particular_se_phys); + + for (i = 0; i < dev->wlan.ind_cmd.se_group_nums; i++) { + wed_w32(dev, MTK_WED_RADDR_ELEM_TBL_WDATA, + dev->wlan.ind_cmd.addr_elem_phys[i] >> 4); + wed_w32(dev, MTK_WED_ADDR_ELEM_TBL_CFG, + MTK_WED_ADDR_ELEM_TBL_WR | (i & 0x7f)); + + val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG); + while (!(val & MTK_WED_ADDR_ELEM_TBL_WR_RDY) && count++ < 100) + val = wed_r32(dev, MTK_WED_ADDR_ELEM_TBL_CFG); + if (count >= 100) + dev_err(dev->hw->dev, + "write ba session base failed\n"); + } + + /* pn check init */ + for (i = 0; i < dev->wlan.ind_cmd.particular_sid; i++) { + wed_w32(dev, MTK_WED_PN_CHECK_WDATA_M, + MTK_WED_PN_CHECK_IS_FIRST); + + wed_w32(dev, MTK_WED_PN_CHECK_CFG, MTK_WED_PN_CHECK_WR | + FIELD_PREP(MTK_WED_PN_CHECK_SE_ID, i)); + + count = 0; + val = wed_r32(dev, MTK_WED_PN_CHECK_CFG); + while (!(val & MTK_WED_PN_CHECK_WR_RDY) && count++ < 100) + val = wed_r32(dev, MTK_WED_PN_CHECK_CFG); + if (count >= 100) + dev_err(dev->hw->dev, + "session(%d) initialization failed\n", i); } + + wed_w32(dev, MTK_WED_RX_IND_CMD_CNT0, MTK_WED_RX_IND_CMD_DBG_CNT_EN); + wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_IND_CMD_EN); + + return 0; } static void @@ -1466,14 +2339,14 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) mtk_wed_set_ext_int(dev, true); - if (dev->hw->version == 1) { + if (mtk_wed_is_v1(dev->hw)) { u32 val = dev->wlan.wpdma_phys | MTK_PCIE_MIRROR_MAP_EN | FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index); val |= BIT(0) | (BIT(1) * !!dev->hw->index); regmap_write(dev->hw->mirror, dev->hw->index * 4, val); - } else { + } else if (mtk_wed_get_rx_capa(dev)) { /* driver set mid ready and only once */ wed_w32(dev, MTK_WED_EXT_INT_MASK1, MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY); @@ -1483,12 +2356,18 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) wed_r32(dev, MTK_WED_EXT_INT_MASK1); wed_r32(dev, MTK_WED_EXT_INT_MASK2); + if (mtk_wed_is_v3_or_greater(dev->hw)) { + wed_w32(dev, MTK_WED_EXT_INT_MASK3, + MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY); + wed_r32(dev, MTK_WED_EXT_INT_MASK3); + } + if (mtk_wed_rro_cfg(dev)) return; - } mtk_wed_set_512_support(dev, dev->wlan.wcid_512); + mtk_wed_amsdu_init(dev); mtk_wed_dma_enable(dev); dev->running = true; @@ -1535,6 +2414,7 @@ mtk_wed_attach(struct mtk_wed_device *dev) dev->irq = hw->irq; dev->wdma_idx = hw->index; dev->version = hw->version; + dev->hw->pcie_base = mtk_wed_get_pcie_base(dev); if (hw->eth->dma_dev == hw->eth->dev && of_dma_is_coherent(hw->eth->dev->of_node)) @@ -1544,6 +2424,10 @@ mtk_wed_attach(struct mtk_wed_device *dev) if (ret) goto out; + ret = mtk_wed_amsdu_buffer_alloc(dev); + if (ret) + goto out; + if (mtk_wed_get_rx_capa(dev)) { ret = mtk_wed_rro_alloc(dev); if (ret) @@ -1551,13 +2435,14 @@ mtk_wed_attach(struct mtk_wed_device *dev) } mtk_wed_hw_init_early(dev); - if (hw->version == 1) { + if (mtk_wed_is_v1(hw)) regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0); - } else { + else dev->rev_id = wed_r32(dev, MTK_WED_REV_ID); + + if (mtk_wed_get_rx_capa(dev)) ret = mtk_wed_wo_init(hw); - } out: if (ret) { dev_err(dev->hw->dev, "failed to attach wed device\n"); @@ -1601,6 +2486,23 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs, ring->reg_base = MTK_WED_RING_TX(idx); ring->wpdma = regs; + if (mtk_wed_is_v3_or_greater(dev->hw) && idx == 1) { + /* reset prefetch index */ + wed_set(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR | + MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR); + + wed_clr(dev, MTK_WED_WDMA_RX_PREF_CFG, + MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR | + MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR); + + /* reset prefetch FIFO */ + wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG, + MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR | + MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR); + wed_w32(dev, MTK_WED_WDMA_RX_PREF_FIFO_CFG, 0); + } + /* WED -> WPDMA */ wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys); wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE); @@ -1619,7 +2521,7 @@ static int mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs) { struct mtk_wed_ring *ring = &dev->txfree_ring; - int i, index = dev->hw->version == 1; + int i, index = mtk_wed_is_v1(dev->hw); /* * For txfree event handling, the same DMA ring is shared between WED @@ -1675,15 +2577,13 @@ mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs, static u32 mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask) { - u32 val, ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; + u32 val, ext_mask; - if (dev->hw->version == 1) - ext_mask |= MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR; + if (mtk_wed_is_v3_or_greater(dev->hw)) + ext_mask = MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | + MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD; else - ext_mask |= MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH | - MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH | - MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | - MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR; + ext_mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK; val = wed_r32(dev, MTK_WED_EXT_INT_STATUS); wed_w32(dev, MTK_WED_EXT_INT_STATUS, val); @@ -1713,19 +2613,20 @@ mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask) int mtk_wed_flow_add(int index) { struct mtk_wed_hw *hw = hw_list[index]; - int ret; + int ret = 0; - if (!hw || !hw->wed_dev) - return -ENODEV; + mutex_lock(&hw_lock); - if (hw->num_flows) { - hw->num_flows++; - return 0; + if (!hw || !hw->wed_dev) { + ret = -ENODEV; + goto out; } - mutex_lock(&hw_lock); - if (!hw->wed_dev) { - ret = -ENODEV; + if (!hw->wed_dev->wlan.offload_enable) + goto out; + + if (hw->num_flows) { + hw->num_flows++; goto out; } @@ -1744,14 +2645,15 @@ void mtk_wed_flow_remove(int index) { struct mtk_wed_hw *hw = hw_list[index]; - if (!hw) - return; + mutex_lock(&hw_lock); - if (--hw->num_flows) - return; + if (!hw || !hw->wed_dev) + goto out; - mutex_lock(&hw_lock); - if (!hw->wed_dev) + if (!hw->wed_dev->wlan.offload_disable) + goto out; + + if (--hw->num_flows) goto out; hw->wed_dev->wlan.offload_disable(hw->wed_dev); @@ -1842,7 +2744,7 @@ mtk_wed_setup_tc(struct mtk_wed_device *wed, struct net_device *dev, { struct mtk_wed_hw *hw = wed->hw; - if (hw->version < 2) + if (mtk_wed_is_v1(hw)) return -EOPNOTSUPP; switch (type) { @@ -1874,6 +2776,10 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, .detach = mtk_wed_detach, .ppe_check = mtk_wed_ppe_check, .setup_tc = mtk_wed_setup_tc, + .start_hw_rro = mtk_wed_start_hw_rro, + .rro_rx_ring_setup = mtk_wed_rro_rx_ring_setup, + .msdu_pg_rx_ring_setup = mtk_wed_msdu_pg_rx_ring_setup, + .ind_rx_ring_setup = mtk_wed_ind_rx_ring_setup, }; struct device_node *eth_np = eth->dev->of_node; struct platform_device *pdev; @@ -1916,9 +2822,17 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, hw->wdma = wdma; hw->index = index; hw->irq = irq; - hw->version = mtk_is_netsys_v1(eth) ? 1 : 2; + hw->version = eth->soc->version; - if (hw->version == 1) { + switch (hw->version) { + case 2: + hw->soc = &mt7986_data; + break; + case 3: + hw->soc = &mt7988_data; + break; + default: + case 1: hw->mirror = syscon_regmap_lookup_by_phandle(eth_np, "mediatek,pcie-mirror"); hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np, @@ -1932,6 +2846,8 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, regmap_write(hw->mirror, 0, 0); regmap_write(hw->mirror, 4, 0); } + hw->soc = &mt7622_data; + break; } mtk_wed_hw_add_debugfs(hw); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h index 43ab77eaf683..c1f0479d7a71 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.h +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -9,10 +9,29 @@ #include <linux/regmap.h> #include <linux/netdevice.h> +#include "mtk_wed_regs.h" + struct mtk_eth; struct mtk_wed_wo; +struct mtk_wed_soc_data { + struct { + u32 tx_bm_tkid; + u32 wpdma_rx_ring0; + u32 reset_idx_tx_mask; + u32 reset_idx_rx_mask; + } regmap; + u32 tx_ring_desc_size; + u32 wdma_desc_size; +}; + +struct mtk_wed_amsdu { + void *txd; + dma_addr_t txd_phy; +}; + struct mtk_wed_hw { + const struct mtk_wed_soc_data *soc; struct device_node *node; struct mtk_eth *eth; struct regmap *regs; @@ -24,6 +43,8 @@ struct mtk_wed_hw { struct dentry *debugfs_dir; struct mtk_wed_device *wed_dev; struct mtk_wed_wo *wed_wo; + struct mtk_wed_amsdu *wed_amsdu; + u32 pcie_base; u32 debugfs_reg; u32 num_flows; u8 version; @@ -37,9 +58,30 @@ struct mtk_wdma_info { u8 queue; u16 wcid; u8 bss; + u8 amsdu; }; #ifdef CONFIG_NET_MEDIATEK_SOC_WED +static inline bool mtk_wed_is_v1(struct mtk_wed_hw *hw) +{ + return hw->version == 1; +} + +static inline bool mtk_wed_is_v2(struct mtk_wed_hw *hw) +{ + return hw->version == 2; +} + +static inline bool mtk_wed_is_v3(struct mtk_wed_hw *hw) +{ + return hw->version == 3; +} + +static inline bool mtk_wed_is_v3_or_greater(struct mtk_wed_hw *hw) +{ + return hw->version > 2; +} + static inline void wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val) { @@ -122,6 +164,21 @@ wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val) writel(val, dev->txfree_ring.wpdma + reg); } +static inline u32 mtk_wed_get_pcie_base(struct mtk_wed_device *dev) +{ + if (!mtk_wed_is_v3_or_greater(dev->hw)) + return MTK_WED_PCIE_BASE; + + switch (dev->hw->index) { + case 1: + return MTK_WED_PCIE_BASE1; + case 2: + return MTK_WED_PCIE_BASE2; + default: + return MTK_WED_PCIE_BASE0; + } +} + void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth, void __iomem *wdma, phys_addr_t wdma_phy, int index); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c index e24afeaea0da..781c691473e1 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c @@ -11,6 +11,7 @@ struct reg_dump { u16 offset; u8 type; u8 base; + u32 mask; }; enum { @@ -25,6 +26,8 @@ enum { #define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING } #define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ } +#define DUMP_REG_MASK(_reg, _mask) \ + { #_mask, MTK_##_reg, DUMP_TYPE_WED, 0, MTK_##_mask } #define DUMP_RING(_prefix, _base, ...) \ { _prefix " BASE", _base, __VA_ARGS__ }, \ { _prefix " CNT", _base + 0x4, __VA_ARGS__ }, \ @@ -32,6 +35,7 @@ enum { { _prefix " DIDX", _base + 0xc, __VA_ARGS__ } #define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED) +#define DUMP_WED_MASK(_reg, _mask) DUMP_REG_MASK(_reg, _mask) #define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED) #define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA) @@ -151,7 +155,7 @@ DEFINE_SHOW_ATTRIBUTE(wed_txinfo); static int wed_rxinfo_show(struct seq_file *s, void *data) { - static const struct reg_dump regs[] = { + static const struct reg_dump regs_common[] = { DUMP_STR("WPDMA RX"), DUMP_WPDMA_RX_RING(0), DUMP_WPDMA_RX_RING(1), @@ -169,7 +173,7 @@ wed_rxinfo_show(struct seq_file *s, void *data) DUMP_WED_RING(WED_RING_RX_DATA(0)), DUMP_WED_RING(WED_RING_RX_DATA(1)), - DUMP_STR("WED RRO"), + DUMP_STR("WED WO RRO"), DUMP_WED_RRO_RING(WED_RROQM_MIOD_CTRL0), DUMP_WED(WED_RROQM_MID_MIB), DUMP_WED(WED_RROQM_MOD_MIB), @@ -180,17 +184,6 @@ wed_rxinfo_show(struct seq_file *s, void *data) DUMP_WED(WED_RROQM_FDBK_ANC_MIB), DUMP_WED(WED_RROQM_FDBK_ANC2H_MIB), - DUMP_STR("WED Route QM"), - DUMP_WED(WED_RTQM_R2H_MIB(0)), - DUMP_WED(WED_RTQM_R2Q_MIB(0)), - DUMP_WED(WED_RTQM_Q2H_MIB(0)), - DUMP_WED(WED_RTQM_R2H_MIB(1)), - DUMP_WED(WED_RTQM_R2Q_MIB(1)), - DUMP_WED(WED_RTQM_Q2H_MIB(1)), - DUMP_WED(WED_RTQM_Q2N_MIB), - DUMP_WED(WED_RTQM_Q2B_MIB), - DUMP_WED(WED_RTQM_PFDBK_MIB), - DUMP_STR("WED WDMA TX"), DUMP_WED(WED_WDMA_TX_MIB), DUMP_WED_RING(WED_WDMA_RING_TX), @@ -211,6 +204,287 @@ wed_rxinfo_show(struct seq_file *s, void *data) DUMP_WED(WED_RX_BM_INTF), DUMP_WED(WED_RX_BM_ERR_STS), }; + static const struct reg_dump regs_wed_v2[] = { + DUMP_STR("WED Route QM"), + DUMP_WED(WED_RTQM_R2H_MIB(0)), + DUMP_WED(WED_RTQM_R2Q_MIB(0)), + DUMP_WED(WED_RTQM_Q2H_MIB(0)), + DUMP_WED(WED_RTQM_R2H_MIB(1)), + DUMP_WED(WED_RTQM_R2Q_MIB(1)), + DUMP_WED(WED_RTQM_Q2H_MIB(1)), + DUMP_WED(WED_RTQM_Q2N_MIB), + DUMP_WED(WED_RTQM_Q2B_MIB), + DUMP_WED(WED_RTQM_PFDBK_MIB), + }; + static const struct reg_dump regs_wed_v3[] = { + DUMP_STR("WED RX RRO DATA"), + DUMP_WED_RING(WED_RRO_RX_D_RX(0)), + DUMP_WED_RING(WED_RRO_RX_D_RX(1)), + + DUMP_STR("WED RX MSDU PAGE"), + DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(0)), + DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(1)), + DUMP_WED_RING(WED_RRO_MSDU_PG_CTRL0(2)), + + DUMP_STR("WED RX IND CMD"), + DUMP_WED(WED_IND_CMD_RX_CTRL1), + DUMP_WED_MASK(WED_IND_CMD_RX_CTRL2, WED_IND_CMD_MAX_CNT), + DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0, WED_IND_CMD_PROC_IDX), + DUMP_WED_MASK(RRO_IND_CMD_SIGNATURE, RRO_IND_CMD_DMA_IDX), + DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0, WED_IND_CMD_MAGIC_CNT), + DUMP_WED_MASK(RRO_IND_CMD_SIGNATURE, RRO_IND_CMD_MAGIC_CNT), + DUMP_WED_MASK(WED_IND_CMD_RX_CTRL0, + WED_IND_CMD_PREFETCH_FREE_CNT), + DUMP_WED_MASK(WED_RRO_CFG1, WED_RRO_CFG1_PARTICL_SE_ID), + + DUMP_STR("WED ADDR ELEM"), + DUMP_WED(WED_ADDR_ELEM_CFG0), + DUMP_WED_MASK(WED_ADDR_ELEM_CFG1, + WED_ADDR_ELEM_PREFETCH_FREE_CNT), + + DUMP_STR("WED Route QM"), + DUMP_WED(WED_RTQM_ENQ_I2Q_DMAD_CNT), + DUMP_WED(WED_RTQM_ENQ_I2N_DMAD_CNT), + DUMP_WED(WED_RTQM_ENQ_I2Q_PKT_CNT), + DUMP_WED(WED_RTQM_ENQ_I2N_PKT_CNT), + DUMP_WED(WED_RTQM_ENQ_USED_ENTRY_CNT), + DUMP_WED(WED_RTQM_ENQ_ERR_CNT), + + DUMP_WED(WED_RTQM_DEQ_DMAD_CNT), + DUMP_WED(WED_RTQM_DEQ_Q2I_DMAD_CNT), + DUMP_WED(WED_RTQM_DEQ_PKT_CNT), + DUMP_WED(WED_RTQM_DEQ_Q2I_PKT_CNT), + DUMP_WED(WED_RTQM_DEQ_USED_PFDBK_CNT), + DUMP_WED(WED_RTQM_DEQ_ERR_CNT), + }; + struct mtk_wed_hw *hw = s->private; + struct mtk_wed_device *dev = hw->wed_dev; + + if (dev) { + dump_wed_regs(s, dev, regs_common, ARRAY_SIZE(regs_common)); + if (mtk_wed_is_v2(hw)) + dump_wed_regs(s, dev, + regs_wed_v2, ARRAY_SIZE(regs_wed_v2)); + else + dump_wed_regs(s, dev, + regs_wed_v3, ARRAY_SIZE(regs_wed_v3)); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(wed_rxinfo); + +static int +wed_amsdu_show(struct seq_file *s, void *data) +{ + static const struct reg_dump regs[] = { + DUMP_STR("WED AMDSU INFO"), + DUMP_WED(WED_MON_AMSDU_FIFO_DMAD), + + DUMP_STR("WED AMDSU ENG0 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(0)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(0)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(0)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(0)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(0)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(0), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(0), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(0), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG1 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(1)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(1)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(1)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(1)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(1)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(1), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(1), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(1), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG2 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(2)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(2)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(2)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(2)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(2)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(2), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(2), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(2), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG3 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(3)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(3)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(3)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(3)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(3)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(3), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(3), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(3), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG4 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(4)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(4)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(4)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(4)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(4)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(4), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(4), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG5 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(5)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(5)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(5)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(5)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(5)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(5), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(5), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(5), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG6 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(6)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(6)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(6)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(6)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(6)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(6), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(6), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(6), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG7 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(7)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(7)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(7)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(7)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(7)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(7), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(7), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(7), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(7), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(4), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED AMDSU ENG8 INFO"), + DUMP_WED(WED_MON_AMSDU_ENG_DMAD(8)), + DUMP_WED(WED_MON_AMSDU_ENG_QFPL(8)), + DUMP_WED(WED_MON_AMSDU_ENG_QENI(8)), + DUMP_WED(WED_MON_AMSDU_ENG_QENO(8)), + DUMP_WED(WED_MON_AMSDU_ENG_MERG(8)), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(8), + WED_AMSDU_ENG_MAX_PL_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT8(8), + WED_AMSDU_ENG_MAX_QGPP_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8), + WED_AMSDU_ENG_CUR_ENTRY), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8), + WED_AMSDU_ENG_MAX_BUF_MERGED), + DUMP_WED_MASK(WED_MON_AMSDU_ENG_CNT9(8), + WED_AMSDU_ENG_MAX_MSDU_MERGED), + + DUMP_STR("WED QMEM INFO"), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(0), WED_AMSDU_QMEM_FQ_CNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(0), WED_AMSDU_QMEM_SP_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(1), WED_AMSDU_QMEM_TID0_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(1), WED_AMSDU_QMEM_TID1_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(2), WED_AMSDU_QMEM_TID2_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(2), WED_AMSDU_QMEM_TID3_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(3), WED_AMSDU_QMEM_TID4_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(3), WED_AMSDU_QMEM_TID5_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(4), WED_AMSDU_QMEM_TID6_QCNT), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_CNT(4), WED_AMSDU_QMEM_TID7_QCNT), + + DUMP_STR("WED QMEM HEAD INFO"), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(0), WED_AMSDU_QMEM_FQ_HEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(0), WED_AMSDU_QMEM_SP_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(1), WED_AMSDU_QMEM_TID0_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(1), WED_AMSDU_QMEM_TID1_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(2), WED_AMSDU_QMEM_TID2_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(2), WED_AMSDU_QMEM_TID3_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(3), WED_AMSDU_QMEM_TID4_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(3), WED_AMSDU_QMEM_TID5_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(4), WED_AMSDU_QMEM_TID6_QHEAD), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(4), WED_AMSDU_QMEM_TID7_QHEAD), + + DUMP_STR("WED QMEM TAIL INFO"), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(5), WED_AMSDU_QMEM_FQ_TAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(5), WED_AMSDU_QMEM_SP_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(6), WED_AMSDU_QMEM_TID0_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(6), WED_AMSDU_QMEM_TID1_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(7), WED_AMSDU_QMEM_TID2_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(7), WED_AMSDU_QMEM_TID3_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(8), WED_AMSDU_QMEM_TID4_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(8), WED_AMSDU_QMEM_TID5_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(9), WED_AMSDU_QMEM_TID6_QTAIL), + DUMP_WED_MASK(WED_MON_AMSDU_QMEM_PTR(9), WED_AMSDU_QMEM_TID7_QTAIL), + + DUMP_STR("WED HIFTXD MSDU INFO"), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(1)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(2)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(3)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(4)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(5)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(6)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(7)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(8)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(9)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(10)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(11)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(12)), + DUMP_WED(WED_MON_AMSDU_HIFTXD_FETCH_MSDU(13)), + }; struct mtk_wed_hw *hw = s->private; struct mtk_wed_device *dev = hw->wed_dev; @@ -219,7 +493,94 @@ wed_rxinfo_show(struct seq_file *s, void *data) return 0; } -DEFINE_SHOW_ATTRIBUTE(wed_rxinfo); +DEFINE_SHOW_ATTRIBUTE(wed_amsdu); + +static int +wed_rtqm_show(struct seq_file *s, void *data) +{ + static const struct reg_dump regs[] = { + DUMP_STR("WED Route QM IGRS0(N2H + Recycle)"), + DUMP_WED(WED_RTQM_IGRS0_I2HW_DMAD_CNT), + DUMP_WED(WED_RTQM_IGRS0_I2H_DMAD_CNT(0)), + DUMP_WED(WED_RTQM_IGRS0_I2H_DMAD_CNT(1)), + DUMP_WED(WED_RTQM_IGRS0_I2HW_PKT_CNT), + DUMP_WED(WED_RTQM_IGRS0_I2H_PKT_CNT(0)), + DUMP_WED(WED_RTQM_IGRS0_I2H_PKT_CNT(0)), + DUMP_WED(WED_RTQM_IGRS0_FDROP_CNT), + + DUMP_STR("WED Route QM IGRS1(Legacy)"), + DUMP_WED(WED_RTQM_IGRS1_I2HW_DMAD_CNT), + DUMP_WED(WED_RTQM_IGRS1_I2H_DMAD_CNT(0)), + DUMP_WED(WED_RTQM_IGRS1_I2H_DMAD_CNT(1)), + DUMP_WED(WED_RTQM_IGRS1_I2HW_PKT_CNT), + DUMP_WED(WED_RTQM_IGRS1_I2H_PKT_CNT(0)), + DUMP_WED(WED_RTQM_IGRS1_I2H_PKT_CNT(1)), + DUMP_WED(WED_RTQM_IGRS1_FDROP_CNT), + + DUMP_STR("WED Route QM IGRS2(RRO3.0)"), + DUMP_WED(WED_RTQM_IGRS2_I2HW_DMAD_CNT), + DUMP_WED(WED_RTQM_IGRS2_I2H_DMAD_CNT(0)), + DUMP_WED(WED_RTQM_IGRS2_I2H_DMAD_CNT(1)), + DUMP_WED(WED_RTQM_IGRS2_I2HW_PKT_CNT), + DUMP_WED(WED_RTQM_IGRS2_I2H_PKT_CNT(0)), + DUMP_WED(WED_RTQM_IGRS2_I2H_PKT_CNT(1)), + DUMP_WED(WED_RTQM_IGRS2_FDROP_CNT), + + DUMP_STR("WED Route QM IGRS3(DEBUG)"), + DUMP_WED(WED_RTQM_IGRS2_I2HW_DMAD_CNT), + DUMP_WED(WED_RTQM_IGRS3_I2H_DMAD_CNT(0)), + DUMP_WED(WED_RTQM_IGRS3_I2H_DMAD_CNT(1)), + DUMP_WED(WED_RTQM_IGRS3_I2HW_PKT_CNT), + DUMP_WED(WED_RTQM_IGRS3_I2H_PKT_CNT(0)), + DUMP_WED(WED_RTQM_IGRS3_I2H_PKT_CNT(1)), + DUMP_WED(WED_RTQM_IGRS3_FDROP_CNT), + }; + struct mtk_wed_hw *hw = s->private; + struct mtk_wed_device *dev = hw->wed_dev; + + if (dev) + dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(wed_rtqm); + +static int +wed_rro_show(struct seq_file *s, void *data) +{ + static const struct reg_dump regs[] = { + DUMP_STR("RRO/IND CMD CNT"), + DUMP_WED(WED_RX_IND_CMD_CNT(1)), + DUMP_WED(WED_RX_IND_CMD_CNT(2)), + DUMP_WED(WED_RX_IND_CMD_CNT(3)), + DUMP_WED(WED_RX_IND_CMD_CNT(4)), + DUMP_WED(WED_RX_IND_CMD_CNT(5)), + DUMP_WED(WED_RX_IND_CMD_CNT(6)), + DUMP_WED(WED_RX_IND_CMD_CNT(7)), + DUMP_WED(WED_RX_IND_CMD_CNT(8)), + DUMP_WED_MASK(WED_RX_IND_CMD_CNT(9), + WED_IND_CMD_MAGIC_CNT_FAIL_CNT), + + DUMP_WED(WED_RX_ADDR_ELEM_CNT(0)), + DUMP_WED_MASK(WED_RX_ADDR_ELEM_CNT(1), + WED_ADDR_ELEM_SIG_FAIL_CNT), + DUMP_WED(WED_RX_MSDU_PG_CNT(1)), + DUMP_WED(WED_RX_MSDU_PG_CNT(2)), + DUMP_WED(WED_RX_MSDU_PG_CNT(3)), + DUMP_WED(WED_RX_MSDU_PG_CNT(4)), + DUMP_WED(WED_RX_MSDU_PG_CNT(5)), + DUMP_WED_MASK(WED_RX_PN_CHK_CNT, + WED_PN_CHK_FAIL_CNT), + }; + struct mtk_wed_hw *hw = s->private; + struct mtk_wed_device *dev = hw->wed_dev; + + if (dev) + dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(wed_rro); static int mtk_wed_reg_set(void *data, u64 val) @@ -261,7 +622,16 @@ void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw) debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg); debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval); debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops); - if (hw->version != 1) + if (!mtk_wed_is_v1(hw)) { debugfs_create_file_unsafe("rxinfo", 0400, dir, hw, &wed_rxinfo_fops); + if (mtk_wed_is_v3_or_greater(hw)) { + debugfs_create_file_unsafe("amsdu", 0400, dir, hw, + &wed_amsdu_fops); + debugfs_create_file_unsafe("rtqm", 0400, dir, hw, + &wed_rtqm_fops); + debugfs_create_file_unsafe("rro", 0400, dir, hw, + &wed_rro_fops); + } + } } diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c index 071ed3dea860..65a78e274009 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c @@ -16,14 +16,30 @@ #include "mtk_wed_wo.h" #include "mtk_wed.h" +static struct mtk_wed_wo_memory_region mem_region[] = { + [MTK_WED_WO_REGION_EMI] = { + .name = "wo-emi", + }, + [MTK_WED_WO_REGION_ILM] = { + .name = "wo-ilm", + }, + [MTK_WED_WO_REGION_DATA] = { + .name = "wo-data", + .shared = true, + }, + [MTK_WED_WO_REGION_BOOT] = { + .name = "wo-boot", + }, +}; + static u32 wo_r32(struct mtk_wed_wo *wo, u32 reg) { - return readl(wo->boot.addr + reg); + return readl(mem_region[MTK_WED_WO_REGION_BOOT].addr + reg); } static void wo_w32(struct mtk_wed_wo *wo, u32 reg, u32 val) { - writel(val, wo->boot.addr + reg); + writel(val, mem_region[MTK_WED_WO_REGION_BOOT].addr + reg); } static struct sk_buff * @@ -68,6 +84,9 @@ mtk_wed_update_rx_stats(struct mtk_wed_device *wed, struct sk_buff *skb) struct mtk_wed_wo_rx_stats *stats; int i; + if (!wed->wlan.update_wo_rx_stats) + return; + if (count * sizeof(*stats) > skb->len - sizeof(u32)) return; @@ -204,7 +223,7 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data, { struct mtk_wed_wo *wo = dev->hw->wed_wo; - if (dev->hw->version == 1) + if (!mtk_wed_get_rx_capa(dev)) return 0; if (WARN_ON(!wo)) @@ -215,19 +234,13 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data, } static int -mtk_wed_get_memory_region(struct mtk_wed_wo *wo, +mtk_wed_get_memory_region(struct mtk_wed_hw *hw, int index, struct mtk_wed_wo_memory_region *region) { struct reserved_mem *rmem; struct device_node *np; - int index; - - index = of_property_match_string(wo->hw->node, "memory-region-names", - region->name); - if (index < 0) - return index; - np = of_parse_phandle(wo->hw->node, "memory-region", index); + np = of_parse_phandle(hw->node, "memory-region", index); if (!np) return -ENODEV; @@ -239,7 +252,7 @@ mtk_wed_get_memory_region(struct mtk_wed_wo *wo, region->phy_addr = rmem->base; region->size = rmem->size; - region->addr = devm_ioremap(wo->hw->dev, region->phy_addr, region->size); + region->addr = devm_ioremap(hw->dev, region->phy_addr, region->size); return !region->addr ? -EINVAL : 0; } @@ -252,6 +265,9 @@ mtk_wed_mcu_run_firmware(struct mtk_wed_wo *wo, const struct firmware *fw, const struct mtk_wed_fw_trailer *trailer; const struct mtk_wed_fw_region *fw_region; + if (!region->phy_addr || !region->size) + return 0; + trailer_ptr = fw->data + fw->size - sizeof(*trailer); trailer = (const struct mtk_wed_fw_trailer *)trailer_ptr; region_ptr = trailer_ptr - trailer->num_region * sizeof(*fw_region); @@ -291,18 +307,6 @@ next: static int mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo) { - static struct mtk_wed_wo_memory_region mem_region[] = { - [MTK_WED_WO_REGION_EMI] = { - .name = "wo-emi", - }, - [MTK_WED_WO_REGION_ILM] = { - .name = "wo-ilm", - }, - [MTK_WED_WO_REGION_DATA] = { - .name = "wo-data", - .shared = true, - }, - }; const struct mtk_wed_fw_trailer *trailer; const struct firmware *fw; const char *fw_name; @@ -311,25 +315,38 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo) /* load firmware region metadata */ for (i = 0; i < ARRAY_SIZE(mem_region); i++) { - ret = mtk_wed_get_memory_region(wo, &mem_region[i]); + int index = of_property_match_string(wo->hw->node, + "memory-region-names", + mem_region[i].name); + if (index < 0) + continue; + + ret = mtk_wed_get_memory_region(wo->hw, index, &mem_region[i]); if (ret) return ret; } - wo->boot.name = "wo-boot"; - ret = mtk_wed_get_memory_region(wo, &wo->boot); - if (ret) - return ret; - /* set dummy cr */ wed_w32(wo->hw->wed_dev, MTK_WED_SCR0 + 4 * MTK_WED_DUMMY_CR_FWDL, wo->hw->index + 1); /* load firmware */ - if (of_device_is_compatible(wo->hw->node, "mediatek,mt7981-wed")) - fw_name = MT7981_FIRMWARE_WO; - else - fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1 : MT7986_FIRMWARE_WO0; + switch (wo->hw->version) { + case 2: + if (of_device_is_compatible(wo->hw->node, + "mediatek,mt7981-wed")) + fw_name = MT7981_FIRMWARE_WO; + else + fw_name = wo->hw->index ? MT7986_FIRMWARE_WO1 + : MT7986_FIRMWARE_WO0; + break; + case 3: + fw_name = wo->hw->index ? MT7988_FIRMWARE_WO1 + : MT7988_FIRMWARE_WO0; + break; + default: + return -EINVAL; + } ret = request_firmware(&fw, fw_name, wo->hw->dev); if (ret) @@ -350,15 +367,16 @@ mtk_wed_mcu_load_firmware(struct mtk_wed_wo *wo) } /* set the start address */ - boot_cr = wo->hw->index ? MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR - : MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR; + if (!mtk_wed_is_v3_or_greater(wo->hw) && wo->hw->index) + boot_cr = MTK_WO_MCU_CFG_LS_WA_BOOT_ADDR_ADDR; + else + boot_cr = MTK_WO_MCU_CFG_LS_WM_BOOT_ADDR_ADDR; wo_w32(wo, boot_cr, mem_region[MTK_WED_WO_REGION_EMI].phy_addr >> 16); /* wo firmware reset */ wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCCR_CLR_ADDR, 0xc00); - val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR); - val |= wo->hw->index ? MTK_WO_MCU_CFG_LS_WF_WM_WA_WA_CPU_RSTB_MASK - : MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK; + val = wo_r32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR) | + MTK_WO_MCU_CFG_LS_WF_WM_WA_WM_CPU_RSTB_MASK; wo_w32(wo, MTK_WO_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR, val); out: release_firmware(fw); @@ -393,3 +411,5 @@ int mtk_wed_mcu_init(struct mtk_wed_wo *wo) MODULE_FIRMWARE(MT7981_FIRMWARE_WO); MODULE_FIRMWARE(MT7986_FIRMWARE_WO0); MODULE_FIRMWARE(MT7986_FIRMWARE_WO1); +MODULE_FIRMWARE(MT7988_FIRMWARE_WO0); +MODULE_FIRMWARE(MT7988_FIRMWARE_WO1); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h index 47ea69feb3b2..c71190924816 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h +++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h @@ -13,6 +13,9 @@ #define MTK_WDMA_DESC_CTRL_LAST_SEG0 BIT(30) #define MTK_WDMA_DESC_CTRL_DMA_DONE BIT(31) +#define MTK_WDMA_TXD0_DESC_INFO_DMA_DONE BIT(29) +#define MTK_WDMA_TXD1_DESC_INFO_DMA_DONE BIT(31) + struct mtk_wdma_desc { __le32 buf0; __le32 ctrl; @@ -25,6 +28,8 @@ struct mtk_wdma_desc { #define MTK_WED_RESET 0x008 #define MTK_WED_RESET_TX_BM BIT(0) #define MTK_WED_RESET_RX_BM BIT(1) +#define MTK_WED_RESET_RX_PG_BM BIT(2) +#define MTK_WED_RESET_RRO_RX_TO_PG BIT(3) #define MTK_WED_RESET_TX_FREE_AGENT BIT(4) #define MTK_WED_RESET_WPDMA_TX_DRV BIT(8) #define MTK_WED_RESET_WPDMA_RX_DRV BIT(9) @@ -37,6 +42,7 @@ struct mtk_wdma_desc { #define MTK_WED_RESET_WDMA_INT_AGENT BIT(19) #define MTK_WED_RESET_RX_RRO_QM BIT(20) #define MTK_WED_RESET_RX_ROUTE_QM BIT(21) +#define MTK_WED_RESET_TX_AMSDU BIT(22) #define MTK_WED_RESET_WED BIT(31) #define MTK_WED_CTRL 0x00c @@ -44,6 +50,9 @@ struct mtk_wdma_desc { #define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY BIT(1) #define MTK_WED_CTRL_WDMA_INT_AGENT_EN BIT(2) #define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY BIT(3) +#define MTK_WED_CTRL_WED_RX_IND_CMD_EN BIT(5) +#define MTK_WED_CTRL_WED_RX_PG_BM_EN BIT(6) +#define MTK_WED_CTRL_WED_RX_PG_BM_BUSY BIT(7) #define MTK_WED_CTRL_WED_TX_BM_EN BIT(8) #define MTK_WED_CTRL_WED_TX_BM_BUSY BIT(9) #define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN BIT(10) @@ -54,9 +63,14 @@ struct mtk_wdma_desc { #define MTK_WED_CTRL_RX_RRO_QM_BUSY BIT(15) #define MTK_WED_CTRL_RX_ROUTE_QM_EN BIT(16) #define MTK_WED_CTRL_RX_ROUTE_QM_BUSY BIT(17) +#define MTK_WED_CTRL_TX_TKID_ALI_EN BIT(20) +#define MTK_WED_CTRL_TX_TKID_ALI_BUSY BIT(21) +#define MTK_WED_CTRL_TX_AMSDU_EN BIT(22) +#define MTK_WED_CTRL_TX_AMSDU_BUSY BIT(23) #define MTK_WED_CTRL_FINAL_DIDX_READ BIT(24) #define MTK_WED_CTRL_ETH_DMAD_FMT BIT(25) #define MTK_WED_CTRL_MIB_READ_CLEAR BIT(28) +#define MTK_WED_CTRL_FLD_MIB_RD_CLR BIT(28) #define MTK_WED_EXT_INT_STATUS 0x020 #define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR BIT(0) @@ -64,8 +78,8 @@ struct mtk_wdma_desc { #define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID BIT(4) #define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH BIT(8) #define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH BIT(9) -#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH BIT(12) -#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH BIT(13) +#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH BIT(10) /* wed v2 */ +#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH BIT(11) /* wed v2 */ #define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR BIT(16) #define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR BIT(17) #define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT BIT(18) @@ -89,19 +103,26 @@ struct mtk_wdma_desc { #define MTK_WED_EXT_INT_MASK 0x028 #define MTK_WED_EXT_INT_MASK1 0x02c #define MTK_WED_EXT_INT_MASK2 0x030 +#define MTK_WED_EXT_INT_MASK3 0x034 #define MTK_WED_STATUS 0x060 #define MTK_WED_STATUS_TX GENMASK(15, 8) +#define MTK_WED_WPDMA_STATUS 0x068 +#define MTK_WED_WPDMA_STATUS_TX_DRV GENMASK(15, 8) + #define MTK_WED_TX_BM_CTRL 0x080 #define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM GENMASK(6, 0) #define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM GENMASK(22, 16) +#define MTK_WED_TX_BM_CTRL_LEGACY_EN BIT(26) +#define MTK_WED_TX_TKID_CTRL_FREE_FORMAT BIT(27) #define MTK_WED_TX_BM_CTRL_PAUSE BIT(28) #define MTK_WED_TX_BM_BASE 0x084 +#define MTK_WED_TX_BM_INIT_PTR 0x088 +#define MTK_WED_TX_BM_SW_TAIL_IDX GENMASK(16, 0) +#define MTK_WED_TX_BM_INIT_SW_TAIL_IDX BIT(16) -#define MTK_WED_TX_BM_TKID 0x088 -#define MTK_WED_TX_BM_TKID_V2 0x0c8 #define MTK_WED_TX_BM_TKID_START GENMASK(15, 0) #define MTK_WED_TX_BM_TKID_END GENMASK(31, 16) @@ -124,6 +145,12 @@ struct mtk_wdma_desc { #define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM GENMASK(22, 16) #define MTK_WED_TX_TKID_CTRL_PAUSE BIT(28) +#define MTK_WED_TX_TKID_INTF 0x0dc +#define MTK_WED_TX_TKID_INTF_TKFIFO_FDEP GENMASK(25, 16) + +#define MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM_V3 GENMASK(7, 0) +#define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM_V3 GENMASK(23, 16) + #define MTK_WED_TX_TKID_DYN_THR 0x0e0 #define MTK_WED_TX_TKID_DYN_THR_LO GENMASK(6, 0) #define MTK_WED_TX_TKID_DYN_THR_HI GENMASK(22, 16) @@ -160,9 +187,6 @@ struct mtk_wdma_desc { #define MTK_WED_GLO_CFG_RX_2B_OFFSET BIT(31) #define MTK_WED_RESET_IDX 0x20c -#define MTK_WED_RESET_IDX_TX GENMASK(3, 0) -#define MTK_WED_RESET_IDX_RX GENMASK(17, 16) -#define MTK_WED_RESET_IDX_RX_V2 GENMASK(7, 6) #define MTK_WED_RESET_WPDMA_IDX_RX GENMASK(31, 30) #define MTK_WED_TX_MIB(_n) (0x2a0 + (_n) * 4) @@ -174,6 +198,7 @@ struct mtk_wdma_desc { #define MTK_WED_RING_RX_DATA(_n) (0x420 + (_n) * 0x10) #define MTK_WED_SCR0 0x3c0 +#define MTK_WED_RX1_CTRL2 0x418 #define MTK_WED_WPDMA_INT_TRIGGER 0x504 #define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE BIT(1) #define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE GENMASK(5, 4) @@ -204,12 +229,15 @@ struct mtk_wdma_desc { #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_PKT_PROC BIT(5) #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC BIT(6) #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_CRX_SYNC BIT(7) -#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER GENMASK(18, 16) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER GENMASK(15, 12) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNS_VER_FORCE_4 BIT(18) #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNSUPPORT_FMT BIT(19) -#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UEVENT_PKT_FMT_CHK BIT(20) +#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_CHK BIT(20) #define MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR BIT(21) #define MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP BIT(24) +#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK_LAST BIT(25) #define MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV BIT(28) +#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK BIT(30) #define MTK_WED_WPDMA_RESET_IDX 0x50c #define MTK_WED_WPDMA_RESET_IDX_TX GENMASK(3, 0) @@ -255,9 +283,10 @@ struct mtk_wdma_desc { #define MTK_WED_PCIE_INT_TRIGGER_STATUS BIT(16) #define MTK_WED_PCIE_INT_CTRL 0x57c -#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA BIT(20) -#define MTK_WED_PCIE_INT_CTRL_SRC_SEL GENMASK(17, 16) #define MTK_WED_PCIE_INT_CTRL_POLL_EN GENMASK(13, 12) +#define MTK_WED_PCIE_INT_CTRL_SRC_SEL GENMASK(17, 16) +#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA BIT(20) +#define MTK_WED_PCIE_INT_CTRL_MSK_IRQ_FILTER BIT(21) #define MTK_WED_WPDMA_CFG_BASE 0x580 #define MTK_WED_WPDMA_CFG_INT_MASK 0x584 @@ -283,15 +312,30 @@ struct mtk_wdma_desc { #define MTK_WED_WPDMA_RX_D_RST_IDX 0x760 #define MTK_WED_WPDMA_RX_D_RST_CRX_IDX GENMASK(17, 16) +#define MTK_WED_WPDMA_RX_D_RST_DRV_IDX_ALL BIT(20) #define MTK_WED_WPDMA_RX_D_RST_DRV_IDX GENMASK(25, 24) #define MTK_WED_WPDMA_RX_GLO_CFG 0x76c -#define MTK_WED_WPDMA_RX_RING 0x770 #define MTK_WED_WPDMA_RX_D_MIB(_n) (0x774 + (_n) * 4) #define MTK_WED_WPDMA_RX_D_PROCESSED_MIB(_n) (0x784 + (_n) * 4) #define MTK_WED_WPDMA_RX_D_COHERENT_MIB 0x78c +#define MTK_WED_WPDMA_RX_D_PREF_CFG 0x7b4 +#define MTK_WED_WPDMA_RX_D_PREF_EN BIT(0) +#define MTK_WED_WPDMA_RX_D_PREF_BUSY BIT(1) +#define MTK_WED_WPDMA_RX_D_PREF_BURST_SIZE GENMASK(12, 8) +#define MTK_WED_WPDMA_RX_D_PREF_LOW_THRES GENMASK(21, 16) + +#define MTK_WED_WPDMA_RX_D_PREF_RX0_SIDX 0x7b8 +#define MTK_WED_WPDMA_RX_D_PREF_SIDX_IDX_CLR BIT(15) + +#define MTK_WED_WPDMA_RX_D_PREF_RX1_SIDX 0x7bc + +#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG 0x7c0 +#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R0_CLR BIT(0) +#define MTK_WED_WPDMA_RX_D_PREF_FIFO_CFG_R1_CLR BIT(16) + #define MTK_WED_WDMA_RING_TX 0x800 #define MTK_WED_WDMA_TX_MIB 0x810 @@ -299,6 +343,20 @@ struct mtk_wdma_desc { #define MTK_WED_WDMA_RING_RX(_n) (0x900 + (_n) * 0x10) #define MTK_WED_WDMA_RX_THRES(_n) (0x940 + (_n) * 0x4) +#define MTK_WED_WDMA_RX_PREF_CFG 0x950 +#define MTK_WED_WDMA_RX_PREF_EN BIT(0) +#define MTK_WED_WDMA_RX_PREF_BUSY BIT(1) +#define MTK_WED_WDMA_RX_PREF_BURST_SIZE GENMASK(12, 8) +#define MTK_WED_WDMA_RX_PREF_LOW_THRES GENMASK(21, 16) +#define MTK_WED_WDMA_RX_PREF_RX0_SIDX_CLR BIT(24) +#define MTK_WED_WDMA_RX_PREF_RX1_SIDX_CLR BIT(25) +#define MTK_WED_WDMA_RX_PREF_DDONE2_EN BIT(26) +#define MTK_WED_WDMA_RX_PREF_DDONE2_BUSY BIT(27) + +#define MTK_WED_WDMA_RX_PREF_FIFO_CFG 0x95C +#define MTK_WED_WDMA_RX_PREF_FIFO_RX0_CLR BIT(0) +#define MTK_WED_WDMA_RX_PREF_FIFO_RX1_CLR BIT(16) + #define MTK_WED_WDMA_GLO_CFG 0xa04 #define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN BIT(0) #define MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK BIT(1) @@ -322,6 +380,7 @@ struct mtk_wdma_desc { #define MTK_WED_WDMA_RESET_IDX 0xa08 #define MTK_WED_WDMA_RESET_IDX_RX GENMASK(17, 16) +#define MTK_WED_WDMA_RESET_IDX_RX_ALL BIT(20) #define MTK_WED_WDMA_RESET_IDX_DRV GENMASK(25, 24) #define MTK_WED_WDMA_INT_CLR 0xa24 @@ -331,6 +390,7 @@ struct mtk_wdma_desc { #define MTK_WED_WDMA_INT_TRIGGER_RX_DONE GENMASK(17, 16) #define MTK_WED_WDMA_INT_CTRL 0xa2c +#define MTK_WED_WDMA_INT_POLL_PRD GENMASK(7, 0) #define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL GENMASK(17, 16) #define MTK_WED_WDMA_CFG_BASE 0xaa0 @@ -391,9 +451,62 @@ struct mtk_wdma_desc { #define MTK_WDMA_INT_MASK_RX_DELAY BIT(30) #define MTK_WDMA_INT_MASK_RX_COHERENT BIT(31) +#define MTK_WDMA_XDMA_TX_FIFO_CFG 0x238 +#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_PAR_FIFO_CLEAR BIT(0) +#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_CMD_FIFO_CLEAR BIT(4) +#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_DMAD_FIFO_CLEAR BIT(8) +#define MTK_WDMA_XDMA_TX_FIFO_CFG_TX_ARR_FIFO_CLEAR BIT(12) + +#define MTK_WDMA_XDMA_RX_FIFO_CFG 0x23c +#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_PAR_FIFO_CLEAR BIT(0) +#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_CMD_FIFO_CLEAR BIT(4) +#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_DMAD_FIFO_CLEAR BIT(8) +#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_ARR_FIFO_CLEAR BIT(12) +#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_LEN_FIFO_CLEAR BIT(15) +#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_WID_FIFO_CLEAR BIT(18) +#define MTK_WDMA_XDMA_RX_FIFO_CFG_RX_BID_FIFO_CLEAR BIT(21) + #define MTK_WDMA_INT_GRP1 0x250 #define MTK_WDMA_INT_GRP2 0x254 +#define MTK_WDMA_PREF_TX_CFG 0x2d0 +#define MTK_WDMA_PREF_TX_CFG_PREF_EN BIT(0) +#define MTK_WDMA_PREF_TX_CFG_PREF_BUSY BIT(1) + +#define MTK_WDMA_PREF_RX_CFG 0x2dc +#define MTK_WDMA_PREF_RX_CFG_PREF_EN BIT(0) +#define MTK_WDMA_PREF_RX_CFG_PREF_BUSY BIT(1) + +#define MTK_WDMA_PREF_RX_FIFO_CFG 0x2e0 +#define MTK_WDMA_PREF_RX_FIFO_CFG_RING0_CLEAR BIT(0) +#define MTK_WDMA_PREF_RX_FIFO_CFG_RING1_CLEAR BIT(16) + +#define MTK_WDMA_PREF_TX_FIFO_CFG 0x2d4 +#define MTK_WDMA_PREF_TX_FIFO_CFG_RING0_CLEAR BIT(0) +#define MTK_WDMA_PREF_TX_FIFO_CFG_RING1_CLEAR BIT(16) + +#define MTK_WDMA_PREF_SIDX_CFG 0x2e4 +#define MTK_WDMA_PREF_SIDX_CFG_TX_RING_CLEAR GENMASK(3, 0) +#define MTK_WDMA_PREF_SIDX_CFG_RX_RING_CLEAR GENMASK(5, 4) + +#define MTK_WDMA_WRBK_TX_CFG 0x300 +#define MTK_WDMA_WRBK_TX_CFG_WRBK_BUSY BIT(0) +#define MTK_WDMA_WRBK_TX_CFG_WRBK_EN BIT(30) + +#define MTK_WDMA_WRBK_TX_FIFO_CFG(_n) (0x304 + (_n) * 0x4) +#define MTK_WDMA_WRBK_TX_FIFO_CFG_RING_CLEAR BIT(0) + +#define MTK_WDMA_WRBK_RX_CFG 0x344 +#define MTK_WDMA_WRBK_RX_CFG_WRBK_BUSY BIT(0) +#define MTK_WDMA_WRBK_RX_CFG_WRBK_EN BIT(30) + +#define MTK_WDMA_WRBK_RX_FIFO_CFG(_n) (0x348 + (_n) * 0x4) +#define MTK_WDMA_WRBK_RX_FIFO_CFG_RING_CLEAR BIT(0) + +#define MTK_WDMA_WRBK_SIDX_CFG 0x388 +#define MTK_WDMA_WRBK_SIDX_CFG_TX_RING_CLEAR GENMASK(3, 0) +#define MTK_WDMA_WRBK_SIDX_CFG_RX_RING_CLEAR GENMASK(5, 4) + #define MTK_PCIE_MIRROR_MAP(n) ((n) ? 0x4 : 0x0) #define MTK_PCIE_MIRROR_MAP_EN BIT(0) #define MTK_PCIE_MIRROR_MAP_WED_ID BIT(1) @@ -407,6 +520,32 @@ struct mtk_wdma_desc { #define MTK_WED_RTQM_Q_DBG_BYPASS BIT(5) #define MTK_WED_RTQM_TXDMAD_FPORT GENMASK(23, 20) +#define MTK_WED_RTQM_RST 0xb04 + +#define MTK_WED_RTQM_IGRS0_I2HW_DMAD_CNT 0xb1c +#define MTK_WED_RTQM_IGRS0_I2H_DMAD_CNT(_n) (0xb20 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS0_I2HW_PKT_CNT 0xb28 +#define MTK_WED_RTQM_IGRS0_I2H_PKT_CNT(_n) (0xb2c + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS0_FDROP_CNT 0xb34 + +#define MTK_WED_RTQM_IGRS1_I2HW_DMAD_CNT 0xb44 +#define MTK_WED_RTQM_IGRS1_I2H_DMAD_CNT(_n) (0xb48 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS1_I2HW_PKT_CNT 0xb50 +#define MTK_WED_RTQM_IGRS1_I2H_PKT_CNT(_n) (0xb54 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS1_FDROP_CNT 0xb5c + +#define MTK_WED_RTQM_IGRS2_I2HW_DMAD_CNT 0xb6c +#define MTK_WED_RTQM_IGRS2_I2H_DMAD_CNT(_n) (0xb70 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS2_I2HW_PKT_CNT 0xb78 +#define MTK_WED_RTQM_IGRS2_I2H_PKT_CNT(_n) (0xb7c + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS2_FDROP_CNT 0xb84 + +#define MTK_WED_RTQM_IGRS3_I2HW_DMAD_CNT 0xb94 +#define MTK_WED_RTQM_IGRS3_I2H_DMAD_CNT(_n) (0xb98 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS3_I2HW_PKT_CNT 0xba0 +#define MTK_WED_RTQM_IGRS3_I2H_PKT_CNT(_n) (0xba4 + (_n) * 0x4) +#define MTK_WED_RTQM_IGRS3_FDROP_CNT 0xbac + #define MTK_WED_RTQM_R2H_MIB(_n) (0xb70 + (_n) * 0x4) #define MTK_WED_RTQM_R2Q_MIB(_n) (0xb78 + (_n) * 0x4) #define MTK_WED_RTQM_Q2N_MIB 0xb80 @@ -415,6 +554,24 @@ struct mtk_wdma_desc { #define MTK_WED_RTQM_Q2B_MIB 0xb8c #define MTK_WED_RTQM_PFDBK_MIB 0xb90 +#define MTK_WED_RTQM_ENQ_CFG0 0xbb8 +#define MTK_WED_RTQM_ENQ_CFG_TXDMAD_FPORT GENMASK(15, 12) + +#define MTK_WED_RTQM_FDROP_MIB 0xb84 +#define MTK_WED_RTQM_ENQ_I2Q_DMAD_CNT 0xbbc +#define MTK_WED_RTQM_ENQ_I2N_DMAD_CNT 0xbc0 +#define MTK_WED_RTQM_ENQ_I2Q_PKT_CNT 0xbc4 +#define MTK_WED_RTQM_ENQ_I2N_PKT_CNT 0xbc8 +#define MTK_WED_RTQM_ENQ_USED_ENTRY_CNT 0xbcc +#define MTK_WED_RTQM_ENQ_ERR_CNT 0xbd0 + +#define MTK_WED_RTQM_DEQ_DMAD_CNT 0xbd8 +#define MTK_WED_RTQM_DEQ_Q2I_DMAD_CNT 0xbdc +#define MTK_WED_RTQM_DEQ_PKT_CNT 0xbe0 +#define MTK_WED_RTQM_DEQ_Q2I_PKT_CNT 0xbe4 +#define MTK_WED_RTQM_DEQ_USED_PFDBK_CNT 0xbe8 +#define MTK_WED_RTQM_DEQ_ERR_CNT 0xbec + #define MTK_WED_RROQM_GLO_CFG 0xc04 #define MTK_WED_RROQM_RST_IDX 0xc08 #define MTK_WED_RROQM_RST_IDX_MIOD BIT(0) @@ -464,7 +621,195 @@ struct mtk_wdma_desc { #define MTK_WED_RX_BM_INTF 0xd9c #define MTK_WED_RX_BM_ERR_STS 0xda8 +#define MTK_RRO_IND_CMD_SIGNATURE 0xe00 +#define MTK_RRO_IND_CMD_DMA_IDX GENMASK(11, 0) +#define MTK_RRO_IND_CMD_MAGIC_CNT GENMASK(30, 28) + +#define MTK_WED_IND_CMD_RX_CTRL0 0xe04 +#define MTK_WED_IND_CMD_PROC_IDX GENMASK(11, 0) +#define MTK_WED_IND_CMD_PREFETCH_FREE_CNT GENMASK(19, 16) +#define MTK_WED_IND_CMD_MAGIC_CNT GENMASK(30, 28) + +#define MTK_WED_IND_CMD_RX_CTRL1 0xe08 +#define MTK_WED_IND_CMD_RX_CTRL2 0xe0c +#define MTK_WED_IND_CMD_MAX_CNT GENMASK(11, 0) +#define MTK_WED_IND_CMD_BASE_M GENMASK(19, 16) + +#define MTK_WED_RRO_CFG0 0xe10 +#define MTK_WED_RRO_CFG1 0xe14 +#define MTK_WED_RRO_CFG1_MAX_WIN_SZ GENMASK(31, 29) +#define MTK_WED_RRO_CFG1_ACK_SN_BASE_M GENMASK(19, 16) +#define MTK_WED_RRO_CFG1_PARTICL_SE_ID GENMASK(11, 0) + +#define MTK_WED_ADDR_ELEM_CFG0 0xe18 +#define MTK_WED_ADDR_ELEM_CFG1 0xe1c +#define MTK_WED_ADDR_ELEM_PREFETCH_FREE_CNT GENMASK(19, 16) + +#define MTK_WED_ADDR_ELEM_TBL_CFG 0xe20 +#define MTK_WED_ADDR_ELEM_TBL_OFFSET GENMASK(6, 0) +#define MTK_WED_ADDR_ELEM_TBL_RD_RDY BIT(28) +#define MTK_WED_ADDR_ELEM_TBL_WR_RDY BIT(29) +#define MTK_WED_ADDR_ELEM_TBL_RD BIT(30) +#define MTK_WED_ADDR_ELEM_TBL_WR BIT(31) + +#define MTK_WED_RADDR_ELEM_TBL_WDATA 0xe24 +#define MTK_WED_RADDR_ELEM_TBL_RDATA 0xe28 + +#define MTK_WED_PN_CHECK_CFG 0xe30 +#define MTK_WED_PN_CHECK_SE_ID GENMASK(11, 0) +#define MTK_WED_PN_CHECK_RD_RDY BIT(28) +#define MTK_WED_PN_CHECK_WR_RDY BIT(29) +#define MTK_WED_PN_CHECK_RD BIT(30) +#define MTK_WED_PN_CHECK_WR BIT(31) + +#define MTK_WED_PN_CHECK_WDATA_M 0xe38 +#define MTK_WED_PN_CHECK_IS_FIRST BIT(17) + +#define MTK_WED_RRO_MSDU_PG_RING_CFG(_n) (0xe44 + (_n) * 0x8) + +#define MTK_WED_RRO_MSDU_PG_RING2_CFG 0xe58 +#define MTK_WED_RRO_MSDU_PG_DRV_CLR BIT(26) +#define MTK_WED_RRO_MSDU_PG_DRV_EN BIT(31) + +#define MTK_WED_RRO_MSDU_PG_CTRL0(_n) (0xe5c + (_n) * 0xc) +#define MTK_WED_RRO_MSDU_PG_CTRL1(_n) (0xe60 + (_n) * 0xc) +#define MTK_WED_RRO_MSDU_PG_CTRL2(_n) (0xe64 + (_n) * 0xc) + +#define MTK_WED_RRO_RX_D_RX(_n) (0xe80 + (_n) * 0x10) + +#define MTK_WED_RRO_RX_MAGIC_CNT BIT(13) + +#define MTK_WED_RRO_RX_D_CFG(_n) (0xea0 + (_n) * 0x4) +#define MTK_WED_RRO_RX_D_DRV_CLR BIT(26) +#define MTK_WED_RRO_RX_D_DRV_EN BIT(31) + +#define MTK_WED_RRO_PG_BM_RX_DMAM 0xeb0 +#define MTK_WED_RRO_PG_BM_RX_SDL0 GENMASK(13, 0) + +#define MTK_WED_RRO_PG_BM_BASE 0xeb4 +#define MTK_WED_RRO_PG_BM_INIT_PTR 0xeb8 +#define MTK_WED_RRO_PG_BM_SW_TAIL_IDX GENMASK(15, 0) +#define MTK_WED_RRO_PG_BM_INIT_SW_TAIL_IDX BIT(16) + +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX 0xeec +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_EN BIT(0) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_CLR BIT(1) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX0_DONE_TRIG GENMASK(6, 2) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_EN BIT(8) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_CLR BIT(9) +#define MTK_WED_WPDMA_INT_CTRL_RRO_RX1_DONE_TRIG GENMASK(14, 10) + +#define MTK_WED_WPDMA_INT_CTRL_RRO_MSDU_PG 0xef4 +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_EN BIT(0) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_CLR BIT(1) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG0_DONE_TRIG GENMASK(6, 2) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_EN BIT(8) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_CLR BIT(9) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG1_DONE_TRIG GENMASK(14, 10) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_EN BIT(16) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_CLR BIT(17) +#define MTK_WED_WPDMA_INT_CTRL_RRO_PG2_DONE_TRIG GENMASK(22, 18) + +#define MTK_WED_RRO_RX_HW_STS 0xf00 +#define MTK_WED_RX_IND_CMD_BUSY GENMASK(31, 0) + +#define MTK_WED_RX_IND_CMD_CNT0 0xf20 +#define MTK_WED_RX_IND_CMD_DBG_CNT_EN BIT(31) + +#define MTK_WED_RX_IND_CMD_CNT(_n) (0xf20 + (_n) * 0x4) +#define MTK_WED_IND_CMD_MAGIC_CNT_FAIL_CNT GENMASK(15, 0) + +#define MTK_WED_RX_ADDR_ELEM_CNT(_n) (0xf48 + (_n) * 0x4) +#define MTK_WED_ADDR_ELEM_SIG_FAIL_CNT GENMASK(15, 0) +#define MTK_WED_ADDR_ELEM_FIRST_SIG_FAIL_CNT GENMASK(31, 16) +#define MTK_WED_ADDR_ELEM_ACKSN_CNT GENMASK(27, 0) + +#define MTK_WED_RX_MSDU_PG_CNT(_n) (0xf5c + (_n) * 0x4) + +#define MTK_WED_RX_PN_CHK_CNT 0xf70 +#define MTK_WED_PN_CHK_FAIL_CNT GENMASK(15, 0) + #define MTK_WED_WOCPU_VIEW_MIOD_BASE 0x8000 #define MTK_WED_PCIE_INT_MASK 0x0 +#define MTK_WED_AMSDU_FIFO 0x1800 +#define MTK_WED_AMSDU_IS_PRIOR0_RING BIT(10) + +#define MTK_WED_AMSDU_STA_INFO 0x01810 +#define MTK_WED_AMSDU_STA_INFO_DO_INIT BIT(0) +#define MTK_WED_AMSDU_STA_INFO_SET_INIT BIT(1) + +#define MTK_WED_AMSDU_STA_INFO_INIT 0x01814 +#define MTK_WED_AMSDU_STA_WTBL_HDRT_MODE BIT(0) +#define MTK_WED_AMSDU_STA_RMVL BIT(1) +#define MTK_WED_AMSDU_STA_MAX_AMSDU_LEN GENMASK(7, 2) +#define MTK_WED_AMSDU_STA_MAX_AMSDU_NUM GENMASK(11, 8) + +#define MTK_WED_AMSDU_HIFTXD_BASE_L(_n) (0x1980 + (_n) * 0x4) + +#define MTK_WED_AMSDU_PSE 0x1910 +#define MTK_WED_AMSDU_PSE_RESET BIT(16) + +#define MTK_WED_AMSDU_HIFTXD_CFG 0x1968 +#define MTK_WED_AMSDU_HIFTXD_SRC GENMASK(16, 15) + +#define MTK_WED_MON_AMSDU_FIFO_DMAD 0x1a34 + +#define MTK_WED_MON_AMSDU_ENG_DMAD(_n) (0x1a80 + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_QFPL(_n) (0x1a84 + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_QENI(_n) (0x1a88 + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_QENO(_n) (0x1a8c + (_n) * 0x50) +#define MTK_WED_MON_AMSDU_ENG_MERG(_n) (0x1a90 + (_n) * 0x50) + +#define MTK_WED_MON_AMSDU_ENG_CNT8(_n) (0x1a94 + (_n) * 0x50) +#define MTK_WED_AMSDU_ENG_MAX_QGPP_CNT GENMASK(10, 0) +#define MTK_WED_AMSDU_ENG_MAX_PL_CNT GENMASK(27, 16) + +#define MTK_WED_MON_AMSDU_ENG_CNT9(_n) (0x1a98 + (_n) * 0x50) +#define MTK_WED_AMSDU_ENG_CUR_ENTRY GENMASK(10, 0) +#define MTK_WED_AMSDU_ENG_MAX_BUF_MERGED GENMASK(20, 16) +#define MTK_WED_AMSDU_ENG_MAX_MSDU_MERGED GENMASK(28, 24) + +#define MTK_WED_MON_AMSDU_QMEM_STS1 0x1e04 + +#define MTK_WED_MON_AMSDU_QMEM_CNT(_n) (0x1e0c + (_n) * 0x4) +#define MTK_WED_AMSDU_QMEM_FQ_CNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_SP_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID0_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID1_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID2_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID3_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID4_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID5_QCNT GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID6_QCNT GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID7_QCNT GENMASK(11, 0) + +#define MTK_WED_MON_AMSDU_QMEM_PTR(_n) (0x1e20 + (_n) * 0x4) +#define MTK_WED_AMSDU_QMEM_FQ_HEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_SP_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID0_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID1_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID2_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID3_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID4_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID5_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID6_QHEAD GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID7_QHEAD GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_FQ_TAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_SP_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID0_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID1_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID2_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID3_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID4_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID5_QTAIL GENMASK(11, 0) +#define MTK_WED_AMSDU_QMEM_TID6_QTAIL GENMASK(27, 16) +#define MTK_WED_AMSDU_QMEM_TID7_QTAIL GENMASK(11, 0) + +#define MTK_WED_MON_AMSDU_HIFTXD_FETCH_MSDU(_n) (0x1ec4 + (_n) * 0x4) + +#define MTK_WED_PCIE_BASE 0x11280000 +#define MTK_WED_PCIE_BASE0 0x11300000 +#define MTK_WED_PCIE_BASE1 0x11310000 +#define MTK_WED_PCIE_BASE2 0x11290000 #endif diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.h b/drivers/net/ethernet/mediatek/mtk_wed_wo.h index 7a1a2a28f1ac..87a67fa3868d 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.h +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.h @@ -91,6 +91,8 @@ enum mtk_wed_dummy_cr_idx { #define MT7981_FIRMWARE_WO "mediatek/mt7981_wo.bin" #define MT7986_FIRMWARE_WO0 "mediatek/mt7986_wo_0.bin" #define MT7986_FIRMWARE_WO1 "mediatek/mt7986_wo_1.bin" +#define MT7988_FIRMWARE_WO0 "mediatek/mt7988_wo_0.bin" +#define MT7988_FIRMWARE_WO1 "mediatek/mt7988_wo_1.bin" #define MTK_WO_MCU_CFG_LS_BASE 0 #define MTK_WO_MCU_CFG_LS_HW_VER_ADDR (MTK_WO_MCU_CFG_LS_BASE + 0x000) @@ -228,7 +230,6 @@ struct mtk_wed_wo_queue { struct mtk_wed_wo { struct mtk_wed_hw *hw; - struct mtk_wed_wo_memory_region boot; struct mtk_wed_wo_queue q_tx; struct mtk_wed_wo_queue q_rx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index c4f4de82e29e..685335832a93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -189,3 +189,11 @@ config MLX5_SF_MANAGER port is managed through devlink. A subfunction supports RDMA, netdevice and vdpa device. It is similar to a SRIOV VF but it doesn't require SRIOV support. + +config MLX5_DPLL + tristate "Mellanox 5th generation network adapters (ConnectX series) DPLL support" + depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + select DPLL + help + DPLL support in Mellanox Technologies ConnectX NICs. + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 7e94caca4888..c44870b175f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -128,3 +128,6 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_ # SF manager # mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o + +obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o +mlx5_dpll-y := dpll.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 7909f378dc93..1fc03480c2ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -206,6 +206,19 @@ static bool is_ib_enabled(struct mlx5_core_dev *dev) return err ? false : val.vbool; } +static bool is_dpll_supported(struct mlx5_core_dev *dev) +{ + if (!IS_ENABLED(CONFIG_MLX5_DPLL)) + return false; + + if (!MLX5_CAP_MCAM_REG2(dev, synce_registers)) { + mlx5_core_warn(dev, "Missing SyncE capability\n"); + return false; + } + + return true; +} + enum { MLX5_INTERFACE_PROTOCOL_ETH, MLX5_INTERFACE_PROTOCOL_ETH_REP, @@ -215,6 +228,8 @@ enum { MLX5_INTERFACE_PROTOCOL_MPIB, MLX5_INTERFACE_PROTOCOL_VNET, + + MLX5_INTERFACE_PROTOCOL_DPLL, }; static const struct mlx5_adev_device { @@ -237,6 +252,8 @@ static const struct mlx5_adev_device { .is_supported = &is_ib_rep_supported }, [MLX5_INTERFACE_PROTOCOL_MPIB] = { .suffix = "multiport", .is_supported = &is_mp_supported }, + [MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll", + .is_supported = &is_dpll_supported }, }; int mlx5_adev_idx_alloc(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index af8460bb257b..3e064234f6fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -138,7 +138,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, { struct mlx5_core_dev *dev = devlink_priv(devlink); struct pci_dev *pdev = dev->pdev; - bool sf_dev_allocated; int ret = 0; if (mlx5_dev_is_lightweight(dev)) { @@ -148,16 +147,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return 0; } - sf_dev_allocated = mlx5_sf_dev_allocated(dev); - if (sf_dev_allocated) { - /* Reload results in deleting SF device which further results in - * unregistering devlink instance while holding devlink_mutext. - * Hence, do not support reload. - */ - NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated"); - return -EOPNOTSUPP; - } - if (mlx5_lag_is_active(dev)) { NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c new file mode 100644 index 000000000000..74f0c7867120 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/dpll.h> +#include <linux/mlx5/driver.h> + +/* This structure represents a reference to DPLL, one is created + * per mdev instance. + */ +struct mlx5_dpll { + struct dpll_device *dpll; + struct dpll_pin *dpll_pin; + struct mlx5_core_dev *mdev; + struct workqueue_struct *wq; + struct delayed_work work; + struct { + bool valid; + enum dpll_lock_status lock_status; + enum dpll_pin_state pin_state; + } last; + struct notifier_block mdev_nb; + struct net_device *tracking_netdev; +}; + +static int mlx5_dpll_clock_id_get(struct mlx5_core_dev *mdev, u64 *clock_id) +{ + u32 out[MLX5_ST_SZ_DW(msecq_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(msecq_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MSECQ, 0, 0); + if (err) + return err; + *clock_id = MLX5_GET64(msecq_reg, out, local_clock_identity); + return 0; +} + +static int +mlx5_dpll_synce_status_get(struct mlx5_core_dev *mdev, + enum mlx5_msees_admin_status *admin_status, + enum mlx5_msees_oper_status *oper_status, + bool *ho_acq) +{ + u32 out[MLX5_ST_SZ_DW(msees_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(msees_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MSEES, 0, 0); + if (err) + return err; + if (admin_status) + *admin_status = MLX5_GET(msees_reg, out, admin_status); + *oper_status = MLX5_GET(msees_reg, out, oper_status); + if (ho_acq) + *ho_acq = MLX5_GET(msees_reg, out, ho_acq); + return 0; +} + +static int +mlx5_dpll_synce_status_set(struct mlx5_core_dev *mdev, + enum mlx5_msees_admin_status admin_status) +{ + u32 out[MLX5_ST_SZ_DW(msees_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(msees_reg)] = {}; + + MLX5_SET(msees_reg, in, field_select, + MLX5_MSEES_FIELD_SELECT_ENABLE | + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS); + MLX5_SET(msees_reg, in, admin_status, admin_status); + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MSEES, 0, 1); +} + +static enum dpll_lock_status +mlx5_dpll_lock_status_get(enum mlx5_msees_oper_status oper_status, bool ho_acq) +{ + switch (oper_status) { + case MLX5_MSEES_OPER_STATUS_SELF_TRACK: + fallthrough; + case MLX5_MSEES_OPER_STATUS_OTHER_TRACK: + return ho_acq ? DPLL_LOCK_STATUS_LOCKED_HO_ACQ : + DPLL_LOCK_STATUS_LOCKED; + case MLX5_MSEES_OPER_STATUS_HOLDOVER: + fallthrough; + case MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER: + return DPLL_LOCK_STATUS_HOLDOVER; + default: + return DPLL_LOCK_STATUS_UNLOCKED; + } +} + +static enum dpll_pin_state +mlx5_dpll_pin_state_get(enum mlx5_msees_admin_status admin_status, + enum mlx5_msees_oper_status oper_status) +{ + return (admin_status == MLX5_MSEES_ADMIN_STATUS_TRACK && + (oper_status == MLX5_MSEES_OPER_STATUS_SELF_TRACK || + oper_status == MLX5_MSEES_OPER_STATUS_OTHER_TRACK)) ? + DPLL_PIN_STATE_CONNECTED : DPLL_PIN_STATE_DISCONNECTED; +} + +static int mlx5_dpll_device_lock_status_get(const struct dpll_device *dpll, + void *priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack) +{ + enum mlx5_msees_oper_status oper_status; + struct mlx5_dpll *mdpll = priv; + bool ho_acq; + int err; + + err = mlx5_dpll_synce_status_get(mdpll->mdev, NULL, + &oper_status, &ho_acq); + if (err) + return err; + + *status = mlx5_dpll_lock_status_get(oper_status, ho_acq); + return 0; +} + +static int mlx5_dpll_device_mode_get(const struct dpll_device *dpll, + void *priv, + u32 *mode, struct netlink_ext_ack *extack) +{ + *mode = DPLL_MODE_MANUAL; + return 0; +} + +static bool mlx5_dpll_device_mode_supported(const struct dpll_device *dpll, + void *priv, + enum dpll_mode mode, + struct netlink_ext_ack *extack) +{ + return mode == DPLL_MODE_MANUAL; +} + +static const struct dpll_device_ops mlx5_dpll_device_ops = { + .lock_status_get = mlx5_dpll_device_lock_status_get, + .mode_get = mlx5_dpll_device_mode_get, + .mode_supported = mlx5_dpll_device_mode_supported, +}; + +static int mlx5_dpll_pin_direction_get(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack) +{ + *direction = DPLL_PIN_DIRECTION_INPUT; + return 0; +} + +static int mlx5_dpll_state_on_dpll_get(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + enum mlx5_msees_admin_status admin_status; + enum mlx5_msees_oper_status oper_status; + struct mlx5_dpll *mdpll = pin_priv; + int err; + + err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status, + &oper_status, NULL); + if (err) + return err; + *state = mlx5_dpll_pin_state_get(admin_status, oper_status); + return 0; +} + +static int mlx5_dpll_state_on_dpll_set(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_state state, + struct netlink_ext_ack *extack) +{ + struct mlx5_dpll *mdpll = pin_priv; + + return mlx5_dpll_synce_status_set(mdpll->mdev, + state == DPLL_PIN_STATE_CONNECTED ? + MLX5_MSEES_ADMIN_STATUS_TRACK : + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING); +} + +static const struct dpll_pin_ops mlx5_dpll_pins_ops = { + .direction_get = mlx5_dpll_pin_direction_get, + .state_on_dpll_get = mlx5_dpll_state_on_dpll_get, + .state_on_dpll_set = mlx5_dpll_state_on_dpll_set, +}; + +static const struct dpll_pin_properties mlx5_dpll_pin_properties = { + .type = DPLL_PIN_TYPE_SYNCE_ETH_PORT, + .capabilities = DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE, +}; + +#define MLX5_DPLL_PERIODIC_WORK_INTERVAL 500 /* ms */ + +static void mlx5_dpll_periodic_work_queue(struct mlx5_dpll *mdpll) +{ + queue_delayed_work(mdpll->wq, &mdpll->work, + msecs_to_jiffies(MLX5_DPLL_PERIODIC_WORK_INTERVAL)); +} + +static void mlx5_dpll_periodic_work(struct work_struct *work) +{ + struct mlx5_dpll *mdpll = container_of(work, struct mlx5_dpll, + work.work); + enum mlx5_msees_admin_status admin_status; + enum mlx5_msees_oper_status oper_status; + enum dpll_lock_status lock_status; + enum dpll_pin_state pin_state; + bool ho_acq; + int err; + + err = mlx5_dpll_synce_status_get(mdpll->mdev, &admin_status, + &oper_status, &ho_acq); + if (err) + goto err_out; + lock_status = mlx5_dpll_lock_status_get(oper_status, ho_acq); + pin_state = mlx5_dpll_pin_state_get(admin_status, oper_status); + + if (!mdpll->last.valid) + goto invalid_out; + + if (mdpll->last.lock_status != lock_status) + dpll_device_change_ntf(mdpll->dpll); + if (mdpll->last.pin_state != pin_state) + dpll_pin_change_ntf(mdpll->dpll_pin); + +invalid_out: + mdpll->last.lock_status = lock_status; + mdpll->last.pin_state = pin_state; + mdpll->last.valid = true; +err_out: + mlx5_dpll_periodic_work_queue(mdpll); +} + +static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll, + struct net_device *netdev) +{ + if (mdpll->tracking_netdev) + return; + netdev_dpll_pin_set(netdev, mdpll->dpll_pin); + mdpll->tracking_netdev = netdev; +} + +static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll) +{ + if (!mdpll->tracking_netdev) + return; + netdev_dpll_pin_clear(mdpll->tracking_netdev); + mdpll->tracking_netdev = NULL; +} + +static int mlx5_dpll_mdev_notifier_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct mlx5_dpll *mdpll = container_of(nb, struct mlx5_dpll, mdev_nb); + struct net_device *netdev = data; + + switch (event) { + case MLX5_DRIVER_EVENT_UPLINK_NETDEV: + if (netdev) + mlx5_dpll_netdev_dpll_pin_set(mdpll, netdev); + else + mlx5_dpll_netdev_dpll_pin_clear(mdpll); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static void mlx5_dpll_mdev_netdev_track(struct mlx5_dpll *mdpll, + struct mlx5_core_dev *mdev) +{ + mdpll->mdev_nb.notifier_call = mlx5_dpll_mdev_notifier_event; + mlx5_blocking_notifier_register(mdev, &mdpll->mdev_nb); + mlx5_core_uplink_netdev_event_replay(mdev); +} + +static void mlx5_dpll_mdev_netdev_untrack(struct mlx5_dpll *mdpll, + struct mlx5_core_dev *mdev) +{ + mlx5_blocking_notifier_unregister(mdev, &mdpll->mdev_nb); + mlx5_dpll_netdev_dpll_pin_clear(mdpll); +} + +static int mlx5_dpll_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + struct mlx5_core_dev *mdev = edev->mdev; + struct mlx5_dpll *mdpll; + u64 clock_id; + int err; + + err = mlx5_dpll_synce_status_set(mdev, + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING); + if (err) + return err; + + err = mlx5_dpll_clock_id_get(mdev, &clock_id); + if (err) + return err; + + mdpll = kzalloc(sizeof(*mdpll), GFP_KERNEL); + if (!mdpll) + return -ENOMEM; + mdpll->mdev = mdev; + auxiliary_set_drvdata(adev, mdpll); + + /* Multiple mdev instances might share one DPLL device. */ + mdpll->dpll = dpll_device_get(clock_id, 0, THIS_MODULE); + if (IS_ERR(mdpll->dpll)) { + err = PTR_ERR(mdpll->dpll); + goto err_free_mdpll; + } + + err = dpll_device_register(mdpll->dpll, DPLL_TYPE_EEC, + &mlx5_dpll_device_ops, mdpll); + if (err) + goto err_put_dpll_device; + + /* Multiple mdev instances might share one DPLL pin. */ + mdpll->dpll_pin = dpll_pin_get(clock_id, mlx5_get_dev_index(mdev), + THIS_MODULE, &mlx5_dpll_pin_properties); + if (IS_ERR(mdpll->dpll_pin)) { + err = PTR_ERR(mdpll->dpll_pin); + goto err_unregister_dpll_device; + } + + err = dpll_pin_register(mdpll->dpll, mdpll->dpll_pin, + &mlx5_dpll_pins_ops, mdpll); + if (err) + goto err_put_dpll_pin; + + mdpll->wq = create_singlethread_workqueue("mlx5_dpll"); + if (!mdpll->wq) { + err = -ENOMEM; + goto err_unregister_dpll_pin; + } + + mlx5_dpll_mdev_netdev_track(mdpll, mdev); + + INIT_DELAYED_WORK(&mdpll->work, &mlx5_dpll_periodic_work); + mlx5_dpll_periodic_work_queue(mdpll); + + return 0; + +err_unregister_dpll_pin: + dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin, + &mlx5_dpll_pins_ops, mdpll); +err_put_dpll_pin: + dpll_pin_put(mdpll->dpll_pin); +err_unregister_dpll_device: + dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll); +err_put_dpll_device: + dpll_device_put(mdpll->dpll); +err_free_mdpll: + kfree(mdpll); + return err; +} + +static void mlx5_dpll_remove(struct auxiliary_device *adev) +{ + struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev); + struct mlx5_core_dev *mdev = mdpll->mdev; + + cancel_delayed_work(&mdpll->work); + mlx5_dpll_mdev_netdev_untrack(mdpll, mdev); + destroy_workqueue(mdpll->wq); + dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin, + &mlx5_dpll_pins_ops, mdpll); + dpll_pin_put(mdpll->dpll_pin); + dpll_device_unregister(mdpll->dpll, &mlx5_dpll_device_ops, mdpll); + dpll_device_put(mdpll->dpll); + kfree(mdpll); + + mlx5_dpll_synce_status_set(mdev, + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING); +} + +static int mlx5_dpll_suspend(struct auxiliary_device *adev, pm_message_t state) +{ + return 0; +} + +static int mlx5_dpll_resume(struct auxiliary_device *adev) +{ + return 0; +} + +static const struct auxiliary_device_id mlx5_dpll_id_table[] = { + { .name = MLX5_ADEV_NAME ".dpll", }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5_dpll_id_table); + +static struct auxiliary_driver mlx5_dpll_driver = { + .name = "dpll", + .probe = mlx5_dpll_probe, + .remove = mlx5_dpll_remove, + .suspend = mlx5_dpll_suspend, + .resume = mlx5_dpll_resume, + .id_table = mlx5_dpll_id_table, +}; + +static int __init mlx5_dpll_init(void) +{ + return auxiliary_driver_register(&mlx5_dpll_driver); +} + +static void __exit mlx5_dpll_exit(void) +{ + auxiliary_driver_unregister(&mlx5_dpll_driver); +} + +module_init(mlx5_dpll_init); +module_exit(mlx5_dpll_exit); + +MODULE_AUTHOR("Jiri Pirko <jiri@nvidia.com>"); +MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) DPLL driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index c6b6e290fd79..0b1ac6e5c890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -12,11 +12,19 @@ struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, { struct mlx5e_dev *mlx5e_dev; struct devlink *devlink; + int err; devlink = devlink_alloc_ns(&mlx5e_devlink_ops, sizeof(*mlx5e_dev), devlink_net(priv_to_devlink(mdev)), dev); if (!devlink) return ERR_PTR(-ENOMEM); + + err = devl_nested_devlink_set(priv_to_devlink(mdev), devlink); + if (err) { + devlink_free(devlink); + return ERR_PTR(err); + } + devlink_register(devlink); return devlink_priv(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 15561965d2af..d17c9c31b165 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1405,9 +1405,9 @@ err_irq_table: static void mlx5_unload(struct mlx5_core_dev *dev) { + mlx5_eswitch_disable(dev->priv.eswitch); mlx5_devlink_traps_unregister(priv_to_devlink(dev)); mlx5_sf_dev_table_destroy(dev); - mlx5_eswitch_disable(dev->priv.eswitch); mlx5_sriov_detach(dev); mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h index 2a66a427ef15..b99131e95e37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -19,6 +19,12 @@ struct mlx5_sf_dev { u16 fn_id; }; +struct mlx5_sf_peer_devlink_event_ctx { + u16 fn_id; + struct devlink *devlink; + int err; +}; + void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev); void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 8fe82f1191bb..169c2c68ed5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -8,6 +8,20 @@ #include "dev.h" #include "devlink.h" +static int mlx5_core_peer_devlink_set(struct mlx5_sf_dev *sf_dev, struct devlink *devlink) +{ + struct mlx5_sf_peer_devlink_event_ctx event_ctx = { + .fn_id = sf_dev->fn_id, + .devlink = devlink, + }; + int ret; + + ret = mlx5_blocking_notifier_call_chain(sf_dev->parent_mdev, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, + &event_ctx); + return ret == NOTIFY_OK ? event_ctx.err : 0; +} + static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); @@ -54,9 +68,21 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); goto init_one_err; } + + err = mlx5_core_peer_devlink_set(sf_dev, devlink); + if (err) { + mlx5_core_warn(mdev, "mlx5_core_peer_devlink_set err=%d\n", err); + goto peer_devlink_set_err; + } + devlink_register(devlink); return 0; +peer_devlink_set_err: + if (mlx5_dev_is_lightweight(sf_dev->mdev)) + mlx5_uninit_one_light(sf_dev->mdev); + else + mlx5_uninit_one(sf_dev->mdev); init_one_err: iounmap(mdev->iseg); remap_err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index e34a8f88c518..964a5b1876f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -28,6 +28,7 @@ struct mlx5_sf_table { struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */ struct notifier_block esw_nb; struct notifier_block vhca_nb; + struct notifier_block mdev_nb; }; static struct mlx5_sf * @@ -511,6 +512,35 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi return 0; } +static int mlx5_sf_mdev_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, mdev_nb); + struct mlx5_sf_peer_devlink_event_ctx *event_ctx = data; + int ret = NOTIFY_DONE; + struct mlx5_sf *sf; + + if (event != MLX5_DRIVER_EVENT_SF_PEER_DEVLINK) + return NOTIFY_DONE; + + table = mlx5_sf_table_try_get(table->dev); + if (!table) + return NOTIFY_DONE; + + mutex_lock(&table->sf_state_lock); + sf = mlx5_sf_lookup_by_function_id(table, event_ctx->fn_id); + if (!sf) + goto out; + + event_ctx->err = devl_port_fn_devlink_set(&sf->dl_port.dl_port, + event_ctx->devlink); + + ret = NOTIFY_OK; +out: + mutex_unlock(&table->sf_state_lock); + mlx5_sf_table_put(table); + return ret; +} + static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev) { return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && @@ -544,6 +574,9 @@ int mlx5_sf_table_init(struct mlx5_core_dev *dev) if (err) goto vhca_err; + table->mdev_nb.notifier_call = mlx5_sf_mdev_event; + mlx5_blocking_notifier_register(dev, &table->mdev_nb); + return 0; vhca_err: @@ -562,6 +595,7 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) if (!table) return; + mlx5_blocking_notifier_unregister(dev, &table->mdev_nb); mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); WARN_ON(refcount_read(&table->refcount)); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 694de9513b9f..954ba0826c61 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -471,7 +471,7 @@ out: return err; } -static int mlxbf_gige_remove(struct platform_device *pdev) +static void mlxbf_gige_remove(struct platform_device *pdev) { struct mlxbf_gige *priv = platform_get_drvdata(pdev); @@ -479,8 +479,6 @@ static int mlxbf_gige_remove(struct platform_device *pdev) phy_disconnect(priv->netdev->phydev); mlxbf_gige_mdio_remove(priv); platform_set_drvdata(pdev, NULL); - - return 0; } static void mlxbf_gige_shutdown(struct platform_device *pdev) @@ -499,7 +497,7 @@ MODULE_DEVICE_TABLE(acpi, mlxbf_gige_acpi_match); static struct platform_driver mlxbf_gige_driver = { .probe = mlxbf_gige_probe, - .remove = mlxbf_gige_remove, + .remove_new = mlxbf_gige_remove, .shutdown = mlxbf_gige_shutdown, .driver = { .name = KBUILD_MODNAME, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c index af37e650a8ad..e8d6fe35bf36 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c @@ -132,6 +132,7 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev, struct mlxsw_linecard *linecard = linecard_bdev->linecard; struct mlxsw_linecard_dev *linecard_dev; struct devlink *devlink; + int err; devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops, sizeof(*linecard_dev), &adev->dev); @@ -141,8 +142,12 @@ static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev, linecard_dev->linecard = linecard_bdev->linecard; linecard_bdev->linecard_dev = linecard_dev; + err = devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink); + if (err) { + devlink_free(devlink); + return err; + } devlink_register(devlink); - devlink_linecard_nested_dl_set(linecard->devlink_linecard, devlink); return 0; } @@ -151,9 +156,7 @@ static void mlxsw_linecard_bdev_remove(struct auxiliary_device *adev) struct mlxsw_linecard_bdev *linecard_bdev = container_of(adev, struct mlxsw_linecard_bdev, adev); struct devlink *devlink = priv_to_devlink(linecard_bdev->linecard_dev); - struct mlxsw_linecard *linecard = linecard_bdev->linecard; - devlink_linecard_nested_dl_set(linecard->devlink_linecard, NULL); devlink_unregister(devlink); devlink_free(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c index e2aced7ab454..95f63fcf4ba1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -496,7 +496,7 @@ mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks) * is 2^ACL_MAX_BF_LOG */ bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG); - bf = kzalloc(struct_size(bf, refcnt, bf_bank_size * num_erp_banks), + bf = kzalloc(struct_size(bf, refcnt, size_mul(bf_bank_size, num_erp_banks)), GFP_KERNEL); if (!bf) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index c11b118dc415..ddd87ef71caf 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -1228,7 +1228,7 @@ err_mem_region: return err; } -static int ks8842_remove(struct platform_device *pdev) +static void ks8842_remove(struct platform_device *pdev) { struct net_device *netdev = platform_get_drvdata(pdev); struct ks8842_adapter *adapter = netdev_priv(netdev); @@ -1239,7 +1239,6 @@ static int ks8842_remove(struct platform_device *pdev) iounmap(adapter->hw_addr); free_netdev(netdev); release_mem_region(iomem->start, resource_size(iomem)); - return 0; } @@ -1248,7 +1247,7 @@ static struct platform_driver ks8842_platform_driver = { .name = DRV_NAME, }, .probe = ks8842_probe, - .remove = ks8842_remove, + .remove_new = ks8842_remove, }; module_platform_driver(ks8842_platform_driver); diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 7f49042484bd..2a7f29854267 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -327,11 +327,9 @@ static int ks8851_probe_par(struct platform_device *pdev) return ks8851_probe_common(netdev, dev, msg_enable); } -static int ks8851_remove_par(struct platform_device *pdev) +static void ks8851_remove_par(struct platform_device *pdev) { ks8851_remove_common(&pdev->dev); - - return 0; } static const struct of_device_id ks8851_match_table[] = { @@ -347,7 +345,7 @@ static struct platform_driver ks8851_driver = { .pm = &ks8851_pm_ops, }, .probe = ks8851_probe_par, - .remove = ks8851_remove_par, + .remove_new = ks8851_remove_par, }; module_platform_driver(ks8851_driver); diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index c81cdeb4d4e7..f940895b14e8 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1466,9 +1466,15 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) static void lan743x_phy_close(struct lan743x_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct phy_device *phydev = netdev->phydev; phy_stop(netdev->phydev); phy_disconnect(netdev->phydev); + + /* using phydev here as phy_disconnect NULLs netdev->phydev */ + if (phy_is_pseudo_fixed_link(phydev)) + fixed_phy_unregister(phydev); + } static void lan743x_phy_interface_select(struct lan743x_adapter *adapter) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 0d6e79af2410..8e4101628fbd 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1261,7 +1261,7 @@ cleanup_ports: return err; } -static int lan966x_remove(struct platform_device *pdev) +static void lan966x_remove(struct platform_device *pdev) { struct lan966x *lan966x = platform_get_drvdata(pdev); @@ -1280,13 +1280,11 @@ static int lan966x_remove(struct platform_device *pdev) lan966x_ptp_deinit(lan966x); debugfs_remove_recursive(lan966x->debugfs_root); - - return 0; } static struct platform_driver lan966x_driver = { .probe = lan966x_probe, - .remove = lan966x_remove, + .remove_new = lan966x_remove, .driver = { .name = "lan966x-switch", .of_match_table = lan966x_match, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index dc9af480bfea..d1f7fc8b1b71 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -911,7 +911,7 @@ cleanup_pnode: return err; } -static int mchp_sparx5_remove(struct platform_device *pdev) +static void mchp_sparx5_remove(struct platform_device *pdev) { struct sparx5 *sparx5 = platform_get_drvdata(pdev); @@ -931,8 +931,6 @@ static int mchp_sparx5_remove(struct platform_device *pdev) /* Unregister netdevs */ sparx5_unregister_notifier_blocks(sparx5); destroy_workqueue(sparx5->mact_queue); - - return 0; } static const struct of_device_id mchp_sparx5_match[] = { @@ -943,7 +941,7 @@ MODULE_DEVICE_TABLE(of, mchp_sparx5_match); static struct platform_driver mchp_sparx5_driver = { .probe = mchp_sparx5_probe, - .remove = mchp_sparx5_remove, + .remove_new = mchp_sparx5_remove, .driver = { .name = "sparx5-switch", .of_match_table = mchp_sparx5_match, diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 3da99b62797d..96dc69e7141f 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -558,7 +558,7 @@ irq_map_fail: return ret; } -static int moxart_remove(struct platform_device *pdev) +static void moxart_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); @@ -566,8 +566,6 @@ static int moxart_remove(struct platform_device *pdev) devm_free_irq(&pdev->dev, ndev->irq, ndev); moxart_mac_free_memory(ndev); free_netdev(ndev); - - return 0; } static const struct of_device_id moxart_mac_match[] = { @@ -578,7 +576,7 @@ MODULE_DEVICE_TABLE(of, moxart_mac_match); static struct platform_driver moxart_mac_driver = { .probe = moxart_mac_probe, - .remove = moxart_remove, + .remove_new = moxart_remove, .driver = { .name = "moxart-ethernet", .of_match_table = moxart_mac_match, diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 151b42465348..993212c3a7da 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -392,7 +392,7 @@ out_free_devlink: return err; } -static int mscc_ocelot_remove(struct platform_device *pdev) +static void mscc_ocelot_remove(struct platform_device *pdev) { struct ocelot *ocelot = platform_get_drvdata(pdev); @@ -408,13 +408,11 @@ static int mscc_ocelot_remove(struct platform_device *pdev) unregister_switchdev_notifier(&ocelot_switchdev_nb); unregister_netdevice_notifier(&ocelot_netdevice_nb); devlink_free(ocelot->devlink); - - return 0; } static struct platform_driver mscc_ocelot_driver = { .probe = mscc_ocelot_probe, - .remove = mscc_ocelot_remove, + .remove_new = mscc_ocelot_remove, .driver = { .name = "ocelot-switch", .of_match_table = mscc_ocelot_match, diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c index 3f371faeb6d0..2b6e097df28f 100644 --- a/drivers/net/ethernet/natsemi/jazzsonic.c +++ b/drivers/net/ethernet/natsemi/jazzsonic.c @@ -227,7 +227,7 @@ MODULE_ALIAS("platform:jazzsonic"); #include "sonic.c" -static int jazz_sonic_device_remove(struct platform_device *pdev) +static void jazz_sonic_device_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct sonic_local* lp = netdev_priv(dev); @@ -237,13 +237,11 @@ static int jazz_sonic_device_remove(struct platform_device *pdev) lp->descriptors, lp->descriptors_laddr); release_mem_region(dev->base_addr, SONIC_MEM_SIZE); free_netdev(dev); - - return 0; } static struct platform_driver jazz_sonic_driver = { .probe = jazz_sonic_probe, - .remove = jazz_sonic_device_remove, + .remove_new = jazz_sonic_device_remove, .driver = { .name = jazz_sonic_string, }, diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index b16f7c830f9b..2fc63860dbdb 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -532,7 +532,7 @@ MODULE_ALIAS("platform:macsonic"); #include "sonic.c" -static int mac_sonic_platform_remove(struct platform_device *pdev) +static void mac_sonic_platform_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct sonic_local* lp = netdev_priv(dev); @@ -541,13 +541,11 @@ static int mac_sonic_platform_remove(struct platform_device *pdev) dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), lp->descriptors, lp->descriptors_laddr); free_netdev(dev); - - return 0; } static struct platform_driver mac_sonic_platform_driver = { .probe = mac_sonic_platform_probe, - .remove = mac_sonic_platform_remove, + .remove_new = mac_sonic_platform_remove, .driver = { .name = "macsonic", }, diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 52fef34d43f9..8943e7244310 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -249,7 +249,7 @@ MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver"); #include "sonic.c" -static int xtsonic_device_remove(struct platform_device *pdev) +static void xtsonic_device_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct sonic_local *lp = netdev_priv(dev); @@ -260,13 +260,11 @@ static int xtsonic_device_remove(struct platform_device *pdev) lp->descriptors, lp->descriptors_laddr); release_region (dev->base_addr, SONIC_MEM_SIZE); free_netdev(dev); - - return 0; } static struct platform_driver xtsonic_driver = { .probe = xtsonic_probe, - .remove = xtsonic_device_remove, + .remove_new = xtsonic_device_remove, .driver = { .name = xtsonic_string, }, diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index ba27bbc68f85..97f4798f4b42 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1397,7 +1397,7 @@ free_netdev: return err; } -static int nixge_remove(struct platform_device *pdev) +static void nixge_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct nixge_priv *priv = netdev_priv(ndev); @@ -1412,13 +1412,11 @@ static int nixge_remove(struct platform_device *pdev) mdiobus_unregister(priv->mii_bus); free_netdev(ndev); - - return 0; } static struct platform_driver nixge_driver = { .probe = nixge_probe, - .remove = nixge_remove, + .remove_new = nixge_remove, .driver = { .name = "nixge", .of_match_table = nixge_dt_ids, diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 1a4a272f4c5c..dd3e58a1319c 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1417,7 +1417,7 @@ err_exit: return ret; } -static int lpc_eth_drv_remove(struct platform_device *pdev) +static void lpc_eth_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct netdata_local *pldat = netdev_priv(ndev); @@ -1436,8 +1436,6 @@ static int lpc_eth_drv_remove(struct platform_device *pdev) clk_disable_unprepare(pldat->clk); clk_put(pldat->clk); free_netdev(ndev); - - return 0; } #ifdef CONFIG_PM @@ -1505,7 +1503,7 @@ MODULE_DEVICE_TABLE(of, lpc_eth_match); static struct platform_driver lpc_eth_driver = { .probe = lpc_eth_drv_probe, - .remove = lpc_eth_drv_remove, + .remove_new = lpc_eth_drv_remove, #ifdef CONFIG_PM .suspend = lpc_eth_drv_suspend, .resume = lpc_eth_drv_resume, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index aae4131f146a..1dbc3cb50b1d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -7,6 +7,7 @@ #include <linux/atomic.h> #include <linux/mutex.h> #include <linux/workqueue.h> +#include <linux/skbuff.h> #include "ionic_if.h" #include "ionic_regs.h" @@ -217,7 +218,7 @@ struct ionic_desc_info { }; unsigned int bytes; unsigned int nbufs; - struct ionic_buf_info bufs[IONIC_MAX_FRAGS]; + struct ionic_buf_info bufs[MAX_SKB_FRAGS + 1]; ionic_desc_cb cb; void *cb_arg; }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 2c3e36b2dd7f..edc14730ce88 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3831,6 +3831,18 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif) qtype, qti->max_sg_elems); dev_dbg(ionic->dev, " qtype[%d].sg_desc_stride = %d\n", qtype, qti->sg_desc_stride); + + if (qti->max_sg_elems >= IONIC_MAX_FRAGS) { + qti->max_sg_elems = IONIC_MAX_FRAGS - 1; + dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to IONIC_MAX_FRAGS-1 %d\n", + qtype, qti->max_sg_elems); + } + + if (qti->max_sg_elems > MAX_SKB_FRAGS) { + qti->max_sg_elems = MAX_SKB_FRAGS; + dev_dbg(ionic->dev, "limiting qtype %d max_sg_elems to MAX_SKB_FRAGS %d\n", + qtype, qti->max_sg_elems); + } } } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 44466e8c5d77..ccc1b1d407e4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -1243,25 +1243,84 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) { struct ionic_tx_stats *stats = q_to_tx_stats(q); + bool too_many_frags = false; + skb_frag_t *frag; + int desc_bufs; + int chunk_len; + int frag_rem; + int tso_rem; + int seg_rem; + bool encap; + int hdrlen; int ndescs; int err; /* Each desc is mss long max, so a descriptor for each gso_seg */ - if (skb_is_gso(skb)) + if (skb_is_gso(skb)) { ndescs = skb_shinfo(skb)->gso_segs; - else + } else { ndescs = 1; + if (skb_shinfo(skb)->nr_frags > q->max_sg_elems) { + too_many_frags = true; + goto linearize; + } + } - /* If non-TSO, just need 1 desc and nr_frags sg elems */ - if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems) + /* If non-TSO, or no frags to check, we're done */ + if (!skb_is_gso(skb) || !skb_shinfo(skb)->nr_frags) return ndescs; - /* Too many frags, so linearize */ - err = skb_linearize(skb); - if (err) - return err; + /* We need to scan the skb to be sure that none of the MTU sized + * packets in the TSO will require more sgs per descriptor than we + * can support. We loop through the frags, add up the lengths for + * a packet, and count the number of sgs used per packet. + */ + tso_rem = skb->len; + frag = skb_shinfo(skb)->frags; + encap = skb->encapsulation; + + /* start with just hdr in first part of first descriptor */ + if (encap) + hdrlen = skb_inner_tcp_all_headers(skb); + else + hdrlen = skb_tcp_all_headers(skb); + seg_rem = min_t(int, tso_rem, hdrlen + skb_shinfo(skb)->gso_size); + frag_rem = hdrlen; + + while (tso_rem > 0) { + desc_bufs = 0; + while (seg_rem > 0) { + desc_bufs++; + + /* We add the +1 because we can take buffers for one + * more than we have SGs: one for the initial desc data + * in addition to the SG segments that might follow. + */ + if (desc_bufs > q->max_sg_elems + 1) { + too_many_frags = true; + goto linearize; + } + + if (frag_rem == 0) { + frag_rem = skb_frag_size(frag); + frag++; + } + chunk_len = min(frag_rem, seg_rem); + frag_rem -= chunk_len; + tso_rem -= chunk_len; + seg_rem -= chunk_len; + } + + seg_rem = min_t(int, tso_rem, skb_shinfo(skb)->gso_size); + } - stats->linearize++; +linearize: + if (too_many_frags) { + err = skb_linearize(skb); + if (err) + return err; + stats->linearize++; + } return ndescs; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 19bb16daf4e7..3270df72541b 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -718,7 +718,7 @@ err_undo_netdev: return ret; } -static int emac_remove(struct platform_device *pdev) +static void emac_remove(struct platform_device *pdev) { struct net_device *netdev = dev_get_drvdata(&pdev->dev); struct emac_adapter *adpt = netdev_priv(netdev); @@ -742,8 +742,6 @@ static int emac_remove(struct platform_device *pdev) iounmap(adpt->phy.base); free_netdev(netdev); - - return 0; } static void emac_shutdown(struct platform_device *pdev) @@ -762,7 +760,7 @@ static void emac_shutdown(struct platform_device *pdev) static struct platform_driver emac_platform_driver = { .probe = emac_probe, - .remove = emac_remove, + .remove_new = emac_remove, .driver = { .name = "qcom-emac", .of_match_table = emac_dt_match, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0ef0b88b7145..c70cff80cc99 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2880,7 +2880,7 @@ out_release: return error; } -static int ravb_remove(struct platform_device *pdev) +static void ravb_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct ravb_private *priv = netdev_priv(ndev); @@ -2907,8 +2907,6 @@ static int ravb_remove(struct platform_device *pdev) reset_control_assert(priv->rstc); free_netdev(ndev); platform_set_drvdata(pdev, NULL); - - return 0; } static int ravb_wol_setup(struct net_device *ndev) @@ -3046,7 +3044,7 @@ static const struct dev_pm_ops ravb_dev_pm_ops = { static struct platform_driver ravb_driver = { .probe = ravb_probe, - .remove = ravb_remove, + .remove_new = ravb_remove, .driver = { .name = "ravb", .pm = &ravb_dev_pm_ops, diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 0fc0b6bea753..112e605f104a 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1981,7 +1981,7 @@ static void rswitch_deinit(struct rswitch_private *priv) rswitch_clock_disable(priv); } -static int renesas_eth_sw_remove(struct platform_device *pdev) +static void renesas_eth_sw_remove(struct platform_device *pdev) { struct rswitch_private *priv = platform_get_drvdata(pdev); @@ -1991,13 +1991,11 @@ static int renesas_eth_sw_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); platform_set_drvdata(pdev, NULL); - - return 0; } static struct platform_driver renesas_eth_sw_driver_platform = { .probe = renesas_eth_sw_probe, - .remove = renesas_eth_sw_remove, + .remove_new = renesas_eth_sw_remove, .driver = { .name = "renesas_eth_sw", .of_match_table = renesas_eth_sw_of_table, diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 274ea16c0a1f..475e1e8c1d35 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3431,7 +3431,7 @@ out_release: return ret; } -static int sh_eth_drv_remove(struct platform_device *pdev) +static void sh_eth_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct sh_eth_private *mdp = netdev_priv(ndev); @@ -3441,8 +3441,6 @@ static int sh_eth_drv_remove(struct platform_device *pdev) sh_mdio_release(mdp); pm_runtime_disable(&pdev->dev); free_netdev(ndev); - - return 0; } #ifdef CONFIG_PM @@ -3562,7 +3560,7 @@ MODULE_DEVICE_TABLE(platform, sh_eth_id_table); static struct platform_driver sh_eth_driver = { .probe = sh_eth_drv_probe, - .remove = sh_eth_drv_remove, + .remove_new = sh_eth_drv_remove, .id_table = sh_eth_id_table, .driver = { .name = CARDNAME, diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c index fb59ff94509a..e6e130dbe1de 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -169,13 +169,11 @@ err_out: * Description: this function calls the main to free the net resources * and calls the platforms hook and release the resources (e.g. mem). */ -static int sxgbe_platform_remove(struct platform_device *pdev) +static void sxgbe_platform_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); sxgbe_drv_remove(ndev); - - return 0; } #ifdef CONFIG_PM @@ -226,7 +224,7 @@ MODULE_DEVICE_TABLE(of, sxgbe_dt_ids); static struct platform_driver sxgbe_platform_driver = { .probe = sxgbe_platform_probe, - .remove = sxgbe_platform_remove, + .remove_new = sxgbe_platform_remove, .driver = { .name = SXGBE_RESOURCE_NAME, .pm = &sxgbe_platform_pm_ops, diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 96065dfc747b..76356dadf233 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -819,7 +819,7 @@ err_out: return err; } -static int sgiseeq_remove(struct platform_device *pdev) +static void sgiseeq_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct sgiseeq_private *sp = netdev_priv(dev); @@ -828,13 +828,11 @@ static int sgiseeq_remove(struct platform_device *pdev) dma_free_noncoherent(&pdev->dev, sizeof(*sp->srings), sp->srings, sp->srings_dma, DMA_BIDIRECTIONAL); free_netdev(dev); - - return 0; } static struct platform_driver sgiseeq_driver = { .probe = sgiseeq_probe, - .remove = sgiseeq_remove, + .remove_new = sgiseeq_remove, .driver = { .name = "sgiseeq", } diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index f54200f03e15..b04fdbb8aece 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -108,11 +108,17 @@ #define PTP_MIN_LENGTH 63 #define PTP_ADDR_IPV4 0xe0000181 /* 224.0.1.129 */ -#define PTP_ADDR_IPV6 {0xff, 0x0e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0x01, 0x81} /* ff0e::181 */ + +/* ff0e::181 */ +static const struct in6_addr ptp_addr_ipv6 = { { { + 0xff, 0x0e, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x01, 0x81 } } }; + +/* 01-1B-19-00-00-00 */ +static const u8 ptp_addr_ether[ETH_ALEN] __aligned(2) = { + 0x01, 0x1b, 0x19, 0x00, 0x00, 0x00 }; + #define PTP_EVENT_PORT 319 #define PTP_GENERAL_PORT 320 -#define PTP_ADDR_ETHER {0x01, 0x1b, 0x19, 0, 0, 0} /* 01-1B-19-00-00-00 */ /* Annoyingly the format of the version numbers are different between * versions 1 and 2 so it isn't possible to simply look for 1 or 2. @@ -1296,7 +1302,7 @@ static int efx_ptp_insert_ipv4_filter(struct efx_nic *efx, static int efx_ptp_insert_ipv6_filter(struct efx_nic *efx, struct list_head *filter_list, - struct in6_addr *addr, u16 port, + const struct in6_addr *addr, u16 port, unsigned long expiry) { struct efx_filter_spec spec; @@ -1309,11 +1315,10 @@ static int efx_ptp_insert_ipv6_filter(struct efx_nic *efx, static int efx_ptp_insert_eth_multicast_filter(struct efx_nic *efx) { struct efx_ptp_data *ptp = efx->ptp_data; - const u8 addr[ETH_ALEN] = PTP_ADDR_ETHER; struct efx_filter_spec spec; efx_ptp_init_filter(efx, &spec); - efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, addr); + efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, ptp_addr_ether); spec.match_flags |= EFX_FILTER_MATCH_ETHER_TYPE; spec.ether_type = htons(ETH_P_1588); return efx_ptp_insert_filter(efx, &ptp->rxfilters_mcast, &spec, 0); @@ -1346,15 +1351,13 @@ static int efx_ptp_insert_multicast_filters(struct efx_nic *efx) * PTP over IPv6 and Ethernet */ if (efx_ptp_use_mac_tx_timestamps(efx)) { - struct in6_addr ipv6_addr = {{PTP_ADDR_IPV6}}; - rc = efx_ptp_insert_ipv6_filter(efx, &ptp->rxfilters_mcast, - &ipv6_addr, PTP_EVENT_PORT, 0); + &ptp_addr_ipv6, PTP_EVENT_PORT, 0); if (rc < 0) goto fail; rc = efx_ptp_insert_ipv6_filter(efx, &ptp->rxfilters_mcast, - &ipv6_addr, PTP_GENERAL_PORT, 0); + &ptp_addr_ipv6, PTP_GENERAL_PORT, 0); if (rc < 0) goto fail; @@ -1379,9 +1382,7 @@ static bool efx_ptp_valid_unicast_event_pkt(struct sk_buff *skb) ip_hdr(skb)->protocol == IPPROTO_UDP && udp_hdr(skb)->source == htons(PTP_EVENT_PORT); } else if (skb->protocol == htons(ETH_P_IPV6)) { - struct in6_addr mcast_addr = {{PTP_ADDR_IPV6}}; - - return !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &mcast_addr) && + return !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &ptp_addr_ipv6) && ipv6_hdr(skb)->nexthdr == IPPROTO_UDP && udp_hdr(skb)->source == htons(PTP_EVENT_PORT); } diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 8fc3f5272fa7..98d0b561a057 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -962,7 +962,7 @@ out_free: return err; } -static int ioc3eth_remove(struct platform_device *pdev) +static void ioc3eth_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct ioc3_private *ip = netdev_priv(dev); @@ -973,8 +973,6 @@ static int ioc3eth_remove(struct platform_device *pdev) unregister_netdev(dev); del_timer_sync(&ip->ioc3_timer); free_netdev(dev); - - return 0; } @@ -1275,7 +1273,7 @@ static void ioc3_set_multicast_list(struct net_device *dev) static struct platform_driver ioc3eth_driver = { .probe = ioc3eth_probe, - .remove = ioc3eth_remove, + .remove_new = ioc3eth_remove, .driver = { .name = "ioc3-eth", } diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c index 6d850ea2b94c..18b6f93d875e 100644 --- a/drivers/net/ethernet/sgi/meth.c +++ b/drivers/net/ethernet/sgi/meth.c @@ -854,19 +854,17 @@ static int meth_probe(struct platform_device *pdev) return 0; } -static int meth_remove(struct platform_device *pdev) +static void meth_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); unregister_netdev(dev); free_netdev(dev); - - return 0; } static struct platform_driver meth_driver = { .probe = meth_probe, - .remove = meth_remove, + .remove_new = meth_remove, .driver = { .name = "meth", } diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 032eccf8eb42..758347616535 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2411,7 +2411,7 @@ static int smc_drv_probe(struct platform_device *pdev) return ret; } -static int smc_drv_remove(struct platform_device *pdev) +static void smc_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct smc_local *lp = netdev_priv(ndev); @@ -2436,8 +2436,6 @@ static int smc_drv_remove(struct platform_device *pdev) release_mem_region(res->start, SMC_IO_EXTENT); free_netdev(ndev); - - return 0; } static int smc_drv_suspend(struct device *dev) @@ -2480,7 +2478,7 @@ static const struct dev_pm_ops smc_drv_pm_ops = { static struct platform_driver smc_driver = { .probe = smc_drv_probe, - .remove = smc_drv_remove, + .remove_new = smc_drv_remove, .driver = { .name = CARDNAME, .pm = &smc_drv_pm_ops, diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index cb590db625e8..31cb7d0166f0 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2314,7 +2314,7 @@ static int smsc911x_init(struct net_device *dev) return 0; } -static int smsc911x_drv_remove(struct platform_device *pdev) +static void smsc911x_drv_remove(struct platform_device *pdev) { struct net_device *dev; struct smsc911x_data *pdata; @@ -2348,8 +2348,6 @@ static int smsc911x_drv_remove(struct platform_device *pdev) free_netdev(dev); pm_runtime_disable(&pdev->dev); - - return 0; } /* standard register acces */ @@ -2668,7 +2666,7 @@ MODULE_DEVICE_TABLE(acpi, smsc911x_acpi_match); static struct platform_driver smsc911x_driver = { .probe = smsc911x_drv_probe, - .remove = smsc911x_drv_remove, + .remove_new = smsc911x_drv_remove, .driver = { .name = SMSC_CHIPNAME, .pm = SMSC911X_PM_OPS, diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index f358ea003193..d9cfafb96085 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -2150,7 +2150,7 @@ free_ndev: return ret; } -static int netsec_remove(struct platform_device *pdev) +static void netsec_remove(struct platform_device *pdev) { struct netsec_priv *priv = platform_get_drvdata(pdev); @@ -2162,8 +2162,6 @@ static int netsec_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); free_netdev(priv->ndev); - - return 0; } #ifdef CONFIG_PM @@ -2211,7 +2209,7 @@ MODULE_DEVICE_TABLE(acpi, netsec_acpi_ids); static struct platform_driver netsec_driver = { .probe = netsec_probe, - .remove = netsec_remove, + .remove_new = netsec_remove, .driver = { .name = "netsec", .pm = &netsec_pm_ops, diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 4838d2383a43..eed24e67c5a6 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1719,7 +1719,7 @@ out_del_napi: return ret; } -static int ave_remove(struct platform_device *pdev) +static void ave_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct ave_private *priv = netdev_priv(ndev); @@ -1727,8 +1727,6 @@ static int ave_remove(struct platform_device *pdev) unregister_netdev(ndev); netif_napi_del(&priv->napi_rx); netif_napi_del(&priv->napi_tx); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -1976,7 +1974,7 @@ MODULE_DEVICE_TABLE(of, of_ave_match); static struct platform_driver ave_driver = { .probe = ave_probe, - .remove = ave_remove, + .remove_new = ave_remove, .driver = { .name = "ave", .pm = AVE_PM_OPS, diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 06c6871f8788..a2b9e289aa36 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -239,6 +239,17 @@ config DWMAC_INTEL_PLAT the stmmac device driver. This driver is used for the Intel Keem Bay SoC. +config DWMAC_LOONGSON1 + tristate "Loongson1 GMAC support" + default MACH_LOONGSON32 + depends on OF && (MACH_LOONGSON32 || COMPILE_TEST) + help + Support for ethernet controller on Loongson1 SoC. + + This selects Loongson1 SoC glue layer support for the stmmac + device driver. This driver is used for Loongson1-based boards + like Loongson LS1B/LS1C. + config DWMAC_TEGRA tristate "NVIDIA Tegra MGBE support" depends on ARCH_TEGRA || COMPILE_TEST diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 5b57aee19267..80e598bd4255 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o obj-$(CONFIG_DWMAC_SUN8I) += dwmac-sun8i.o obj-$(CONFIG_DWMAC_DWC_QOS_ETH) += dwmac-dwc-qos-eth.o obj-$(CONFIG_DWMAC_INTEL_PLAT) += dwmac-intel-plat.o +obj-$(CONFIG_DWMAC_LOONGSON1) += dwmac-loongson1.o obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o obj-$(CONFIG_DWMAC_TEGRA) += dwmac-tegra.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c index 58a7f08e8d78..643ee6d8d4dd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c @@ -115,7 +115,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev) if (IS_ERR(gmac)) return PTR_ERR(gmac); - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -124,13 +124,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev) anarion_gmac_init(pdev, gmac); plat_dat->bsp_priv = gmac; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) { - stmmac_remove_config_dt(pdev, plat_dat); - return ret; - } - - return 0; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } static const struct of_device_id anarion_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 61ebf36da13d..ec924c6c76c6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -435,15 +435,14 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(stmmac_res.addr)) return PTR_ERR(stmmac_res.addr); - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); ret = data->probe(pdev, plat_dat, &stmmac_res); if (ret < 0) { dev_err_probe(&pdev->dev, ret, "failed to probe subdriver\n"); - - goto remove_config; + return ret; } ret = dwc_eth_dwmac_config_dt(pdev, plat_dat); @@ -458,25 +457,17 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) remove: data->remove(pdev); -remove_config: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } static void dwc_eth_dwmac_remove(struct platform_device *pdev) { - struct net_device *ndev = platform_get_drvdata(pdev); - struct stmmac_priv *priv = netdev_priv(ndev); - const struct dwc_eth_dwmac_data *data; - - data = device_get_match_data(&pdev->dev); + const struct dwc_eth_dwmac_data *data = device_get_match_data(&pdev->dev); stmmac_dvr_remove(&pdev->dev); data->remove(pdev); - - stmmac_remove_config_dt(pdev, priv->plat); } static const struct of_device_id dwc_eth_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index 20fc455b3337..598eff926815 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -27,7 +27,7 @@ static int dwmac_generic_probe(struct platform_device *pdev) return ret; if (pdev->dev.of_node) { - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); @@ -46,17 +46,7 @@ static int dwmac_generic_probe(struct platform_device *pdev) plat_dat->unicast_filter_entries = 1; } - ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); - if (ret) - goto err_remove_config_dt; - - return 0; - -err_remove_config_dt: - if (pdev->dev.of_node) - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } static const struct of_device_id dwmac_generic_match[] = { @@ -77,7 +67,6 @@ MODULE_DEVICE_TABLE(of, dwmac_generic_match); static struct platform_driver dwmac_generic_driver = { .probe = dwmac_generic_probe, - .remove_new = stmmac_pltfr_remove, .driver = { .name = STMMAC_RESOURCE_NAME, .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index df34e34cc14f..8f730ada71f9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -331,15 +331,14 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (!dwmac) return -ENOMEM; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); data = of_device_get_match_data(&pdev->dev); if (!data) { dev_err(&pdev->dev, "failed to get match data\n"); - ret = -EINVAL; - goto err_match_data; + return -EINVAL; } dwmac->ops = data; @@ -348,7 +347,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) ret = imx_dwmac_parse_dt(dwmac, &pdev->dev); if (ret) { dev_err(&pdev->dev, "failed to parse OF data\n"); - goto err_parse_dt; + return ret; } if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY) @@ -365,7 +364,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) ret = imx_dwmac_clks_config(dwmac, true); if (ret) - goto err_clks_config; + return ret; ret = imx_dwmac_init(pdev, dwmac); if (ret) @@ -385,10 +384,6 @@ err_drv_probe: imx_dwmac_exit(pdev, plat_dat->bsp_priv); err_dwmac_init: imx_dwmac_clks_config(dwmac, false); -err_clks_config: -err_parse_dt: -err_match_data: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 0a20c3d24722..19c93b998fb3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -241,29 +241,25 @@ static int ingenic_mac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); mac = devm_kzalloc(&pdev->dev, sizeof(*mac), GFP_KERNEL); - if (!mac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!mac) + return -ENOMEM; data = of_device_get_match_data(&pdev->dev); if (!data) { dev_err(&pdev->dev, "No of match data provided\n"); - ret = -EINVAL; - goto err_remove_config_dt; + return -EINVAL; } /* Get MAC PHY control register */ mac->regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "mode-reg"); if (IS_ERR(mac->regmap)) { dev_err(&pdev->dev, "%s: Failed to get syscon regmap\n", __func__); - ret = PTR_ERR(mac->regmap); - goto err_remove_config_dt; + return PTR_ERR(mac->regmap); } if (!of_property_read_u32(pdev->dev.of_node, "tx-clk-delay-ps", &tx_delay_ps)) { @@ -272,8 +268,7 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->tx_delay = tx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps); - ret = -EINVAL; - goto err_remove_config_dt; + return -EINVAL; } } @@ -283,8 +278,7 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->rx_delay = rx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps); - ret = -EINVAL; - goto err_remove_config_dt; + return -EINVAL; } } @@ -295,18 +289,9 @@ static int ingenic_mac_probe(struct platform_device *pdev) ret = ingenic_mac_init(plat_dat); if (ret) - goto err_remove_config_dt; - - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto err_remove_config_dt; - - return 0; - -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); + return ret; - return ret; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index d352a14f9d48..70edc5232379 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -85,17 +85,15 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); } dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!dwmac) + return -ENOMEM; dwmac->dev = &pdev->dev; dwmac->tx_clk = NULL; @@ -110,10 +108,8 @@ static int intel_eth_plat_probe(struct platform_device *pdev) /* Enable TX clock */ if (dwmac->data->tx_clk_en) { dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk"); - if (IS_ERR(dwmac->tx_clk)) { - ret = PTR_ERR(dwmac->tx_clk); - goto err_remove_config_dt; - } + if (IS_ERR(dwmac->tx_clk)) + return PTR_ERR(dwmac->tx_clk); clk_prepare_enable(dwmac->tx_clk); @@ -126,7 +122,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to set tx_clk\n"); - goto err_remove_config_dt; + return ret; } } } @@ -140,7 +136,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to set clk_ptp_ref\n"); - goto err_remove_config_dt; + return ret; } } } @@ -158,15 +154,10 @@ static int intel_eth_plat_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) { clk_disable_unprepare(dwmac->tx_clk); - goto err_remove_config_dt; + return ret; } return 0; - -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; } static void intel_eth_plat_remove(struct platform_device *pdev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 9b0200749109..281687d7083b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -384,22 +384,20 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) if (val) return val; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL); - if (!gmac) { - err = -ENOMEM; - goto err_remove_config_dt; - } + if (!gmac) + return -ENOMEM; gmac->pdev = pdev; err = ipq806x_gmac_of_parse(gmac); if (err) { dev_err(dev, "device tree parsing error\n"); - goto err_remove_config_dt; + return err; } regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL, @@ -459,11 +457,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) if (gmac->phy_mode == PHY_INTERFACE_MODE_SGMII) { err = ipq806x_gmac_configure_qsgmii_params(gmac); if (err) - goto err_remove_config_dt; + return err; err = ipq806x_gmac_configure_qsgmii_pcs_speed(gmac); if (err) - goto err_remove_config_dt; + return err; } plat_dat->has_gmac = true; @@ -473,21 +471,12 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) plat_dat->tx_fifo_size = 8192; plat_dat->rx_fifo_size = 8192; - err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (err) - goto err_remove_config_dt; - - return 0; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); err_unsupported_phy: dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", phy_modes(gmac->phy_mode)); - err = -EINVAL; - -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); - - return err; + return -EINVAL; } static const struct of_device_id ipq806x_gmac_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c new file mode 100644 index 000000000000..3e86810717d3 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson1.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Loongson-1 DWMAC glue layer + * + * Copyright (C) 2011-2023 Keguang Zhang <keguang.zhang@gmail.com> + */ + +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> + +#include "stmmac.h" +#include "stmmac_platform.h" + +#define LS1B_GMAC0_BASE (0x1fe10000) +#define LS1B_GMAC1_BASE (0x1fe20000) + +/* Loongson-1 SYSCON Registers */ +#define LS1X_SYSCON0 (0x0) +#define LS1X_SYSCON1 (0x4) + +/* Loongson-1B SYSCON Register Bits */ +#define GMAC1_USE_UART1 BIT(4) +#define GMAC1_USE_UART0 BIT(3) + +#define GMAC1_SHUT BIT(13) +#define GMAC0_SHUT BIT(12) + +#define GMAC1_USE_TXCLK BIT(3) +#define GMAC0_USE_TXCLK BIT(2) +#define GMAC1_USE_PWM23 BIT(1) +#define GMAC0_USE_PWM01 BIT(0) + +/* Loongson-1C SYSCON Register Bits */ +#define GMAC_SHUT BIT(6) + +#define PHY_INTF_SELI GENMASK(30, 28) +#define PHY_INTF_MII FIELD_PREP(PHY_INTF_SELI, 0) +#define PHY_INTF_RMII FIELD_PREP(PHY_INTF_SELI, 4) + +struct ls1x_dwmac { + struct plat_stmmacenet_data *plat_dat; + struct regmap *regmap; +}; + +static int ls1b_dwmac_syscon_init(struct platform_device *pdev, void *priv) +{ + struct ls1x_dwmac *dwmac = priv; + struct plat_stmmacenet_data *plat = dwmac->plat_dat; + struct regmap *regmap = dwmac->regmap; + struct resource *res; + unsigned long reg_base; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Could not get IO_MEM resources\n"); + return -EINVAL; + } + reg_base = (unsigned long)res->start; + + if (reg_base == LS1B_GMAC0_BASE) { + switch (plat->phy_interface) { + case PHY_INTERFACE_MODE_RGMII_ID: + regmap_update_bits(regmap, LS1X_SYSCON0, + GMAC0_USE_TXCLK | GMAC0_USE_PWM01, + 0); + break; + case PHY_INTERFACE_MODE_MII: + regmap_update_bits(regmap, LS1X_SYSCON0, + GMAC0_USE_TXCLK | GMAC0_USE_PWM01, + GMAC0_USE_TXCLK | GMAC0_USE_PWM01); + break; + default: + dev_err(&pdev->dev, "Unsupported PHY mode %u\n", + plat->phy_interface); + return -EOPNOTSUPP; + } + + regmap_update_bits(regmap, LS1X_SYSCON0, GMAC0_SHUT, 0); + } else if (reg_base == LS1B_GMAC1_BASE) { + regmap_update_bits(regmap, LS1X_SYSCON0, + GMAC1_USE_UART1 | GMAC1_USE_UART0, + GMAC1_USE_UART1 | GMAC1_USE_UART0); + + switch (plat->phy_interface) { + case PHY_INTERFACE_MODE_RGMII_ID: + regmap_update_bits(regmap, LS1X_SYSCON1, + GMAC1_USE_TXCLK | GMAC1_USE_PWM23, + 0); + + break; + case PHY_INTERFACE_MODE_MII: + regmap_update_bits(regmap, LS1X_SYSCON1, + GMAC1_USE_TXCLK | GMAC1_USE_PWM23, + GMAC1_USE_TXCLK | GMAC1_USE_PWM23); + break; + default: + dev_err(&pdev->dev, "Unsupported PHY mode %u\n", + plat->phy_interface); + return -EOPNOTSUPP; + } + + regmap_update_bits(regmap, LS1X_SYSCON1, GMAC1_SHUT, 0); + } else { + dev_err(&pdev->dev, "Invalid Ethernet MAC base address %lx", + reg_base); + return -EINVAL; + } + + return 0; +} + +static int ls1c_dwmac_syscon_init(struct platform_device *pdev, void *priv) +{ + struct ls1x_dwmac *dwmac = priv; + struct plat_stmmacenet_data *plat = dwmac->plat_dat; + struct regmap *regmap = dwmac->regmap; + + switch (plat->phy_interface) { + case PHY_INTERFACE_MODE_MII: + regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI, + PHY_INTF_MII); + break; + case PHY_INTERFACE_MODE_RMII: + regmap_update_bits(regmap, LS1X_SYSCON1, PHY_INTF_SELI, + PHY_INTF_RMII); + break; + default: + dev_err(&pdev->dev, "Unsupported PHY-mode %u\n", + plat->phy_interface); + return -EOPNOTSUPP; + } + + regmap_update_bits(regmap, LS1X_SYSCON0, GMAC0_SHUT, 0); + + return 0; +} + +static int ls1x_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct regmap *regmap; + struct ls1x_dwmac *dwmac; + int (*init)(struct platform_device *pdev, void *priv); + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + /* Probe syscon */ + regmap = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "loongson,ls1-syscon"); + if (IS_ERR(regmap)) + return dev_err_probe(&pdev->dev, PTR_ERR(regmap), + "Unable to find syscon\n"); + + init = of_device_get_match_data(&pdev->dev); + if (!init) { + dev_err(&pdev->dev, "No of match data provided\n"); + return -EINVAL; + } + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return -ENOMEM; + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat), + "dt configuration failed\n"); + + plat_dat->bsp_priv = dwmac; + plat_dat->init = init; + dwmac->plat_dat = plat_dat; + dwmac->regmap = regmap; + + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); +} + +static const struct of_device_id ls1x_dwmac_match[] = { + { + .compatible = "loongson,ls1b-gmac", + .data = &ls1b_dwmac_syscon_init, + }, + { + .compatible = "loongson,ls1c-emac", + .data = &ls1c_dwmac_syscon_init, + }, + { } +}; +MODULE_DEVICE_TABLE(of, ls1x_dwmac_match); + +static struct platform_driver ls1x_dwmac_driver = { + .probe = ls1x_dwmac_probe, + .driver = { + .name = "loongson1-dwmac", + .of_match_table = ls1x_dwmac_match, + }, +}; +module_platform_driver(ls1x_dwmac_driver); + +MODULE_AUTHOR("Keguang Zhang <keguang.zhang@gmail.com>"); +MODULE_DESCRIPTION("Loongson-1 DWMAC glue layer"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index d0aa674ce705..4c810d8f5bea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -37,7 +37,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -46,8 +46,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); if (IS_ERR(reg)) { dev_err(&pdev->dev, "syscon lookup failed\n"); - ret = PTR_ERR(reg); - goto err_remove_config_dt; + return PTR_ERR(reg); } if (plat_dat->mac_interface == PHY_INTERFACE_MODE_MII) { @@ -56,23 +55,13 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII; } else { dev_err(&pdev->dev, "Only MII and RMII mode supported\n"); - ret = -EINVAL; - goto err_remove_config_dt; + return -EINVAL; } regmap_update_bits(reg, LPC18XX_CREG_CREG6, LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode); - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto err_remove_config_dt; - - return 0; - -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } static const struct of_device_id lpc18xx_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index cd796ec04132..2a9132d6d743 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -656,7 +656,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -665,7 +665,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) ret = mediatek_dwmac_clks_config(priv_plat, true); if (ret) - goto err_remove_config_dt; + return ret; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -675,8 +675,6 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) err_drv_probe: mediatek_dwmac_clks_config(priv_plat, false); -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index 959f88c6da16..a16bfa9089ea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -52,35 +52,22 @@ static int meson6_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!dwmac) + return -ENOMEM; dwmac->reg = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(dwmac->reg)) { - ret = PTR_ERR(dwmac->reg); - goto err_remove_config_dt; - } + if (IS_ERR(dwmac->reg)) + return PTR_ERR(dwmac->reg); plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = meson6_dwmac_fix_mac_speed; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto err_remove_config_dt; - - return 0; - -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } static const struct of_device_id meson6_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 0b159dc0d5f6..b23944aa344e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -400,33 +400,27 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!dwmac) + return -ENOMEM; dwmac->data = (const struct meson8b_dwmac_data *) of_device_get_match_data(&pdev->dev); - if (!dwmac->data) { - ret = -EINVAL; - goto err_remove_config_dt; - } + if (!dwmac->data) + return -EINVAL; dwmac->regs = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(dwmac->regs)) { - ret = PTR_ERR(dwmac->regs); - goto err_remove_config_dt; - } + if (IS_ERR(dwmac->regs)) + return PTR_ERR(dwmac->regs); dwmac->dev = &pdev->dev; ret = of_get_phy_mode(pdev->dev.of_node, &dwmac->phy_mode); if (ret) { dev_err(&pdev->dev, "missing phy-mode property\n"); - goto err_remove_config_dt; + return ret; } /* use 2ns as fallback since this value was previously hardcoded */ @@ -448,53 +442,40 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) if (dwmac->rx_delay_ps > 3000 || dwmac->rx_delay_ps % 200) { dev_err(dwmac->dev, "The RGMII RX delay range is 0..3000ps in 200ps steps"); - ret = -EINVAL; - goto err_remove_config_dt; + return -EINVAL; } } else { if (dwmac->rx_delay_ps != 0 && dwmac->rx_delay_ps != 2000) { dev_err(dwmac->dev, "The only allowed RGMII RX delays values are: 0ps, 2000ps"); - ret = -EINVAL; - goto err_remove_config_dt; + return -EINVAL; } } dwmac->timing_adj_clk = devm_clk_get_optional(dwmac->dev, "timing-adjustment"); - if (IS_ERR(dwmac->timing_adj_clk)) { - ret = PTR_ERR(dwmac->timing_adj_clk); - goto err_remove_config_dt; - } + if (IS_ERR(dwmac->timing_adj_clk)) + return PTR_ERR(dwmac->timing_adj_clk); ret = meson8b_init_rgmii_delays(dwmac); if (ret) - goto err_remove_config_dt; + return ret; ret = meson8b_init_rgmii_tx_clk(dwmac); if (ret) - goto err_remove_config_dt; + return ret; ret = dwmac->data->set_phy_mode(dwmac); if (ret) - goto err_remove_config_dt; + return ret; ret = meson8b_init_prg_eth(dwmac); if (ret) - goto err_remove_config_dt; + return ret; plat_dat->bsp_priv = dwmac; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto err_remove_config_dt; - - return 0; - -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } static const struct meson8b_dwmac_data meson8b_dwmac_data = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index d920a50dd16c..382e8de1255d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1824,7 +1824,7 @@ static int rk_gmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -1836,18 +1836,16 @@ static int rk_gmac_probe(struct platform_device *pdev) plat_dat->fix_mac_speed = rk_fix_speed; plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data); - if (IS_ERR(plat_dat->bsp_priv)) { - ret = PTR_ERR(plat_dat->bsp_priv); - goto err_remove_config_dt; - } + if (IS_ERR(plat_dat->bsp_priv)) + return PTR_ERR(plat_dat->bsp_priv); ret = rk_gmac_clk_init(plat_dat); if (ret) - goto err_remove_config_dt; + return ret; ret = rk_gmac_powerup(plat_dat->bsp_priv); if (ret) - goto err_remove_config_dt; + return ret; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -1857,8 +1855,6 @@ static int rk_gmac_probe(struct platform_device *pdev) err_gmac_powerdown: rk_gmac_powerdown(plat_dat->bsp_priv); -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 9bf102bbc6a0..ba2ce776bd4d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -400,21 +400,19 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!dwmac) + return -ENOMEM; dwmac->stmmac_ocp_rst = devm_reset_control_get_optional(dev, "stmmaceth-ocp"); if (IS_ERR(dwmac->stmmac_ocp_rst)) { ret = PTR_ERR(dwmac->stmmac_ocp_rst); dev_err(dev, "error getting reset control of ocp %d\n", ret); - goto err_remove_config_dt; + return ret; } reset_control_deassert(dwmac->stmmac_ocp_rst); @@ -422,7 +420,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) ret = socfpga_dwmac_parse_data(dwmac, dev); if (ret) { dev_err(dev, "Unable to parse OF data\n"); - goto err_remove_config_dt; + return ret; } dwmac->ops = ops; @@ -431,7 +429,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - goto err_remove_config_dt; + return ret; ndev = platform_get_drvdata(pdev); stpriv = netdev_priv(ndev); @@ -492,8 +490,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) err_dvr_remove: stmmac_dvr_remove(&pdev->dev); -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 9289bb87c3e3..5d630affb4d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -105,7 +105,7 @@ static int starfive_dwmac_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, err, "failed to get resources\n"); - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return dev_err_probe(&pdev->dev, PTR_ERR(plat_dat), "dt configuration failed\n"); @@ -141,13 +141,7 @@ static int starfive_dwmac_probe(struct platform_device *pdev) if (err) return err; - err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (err) { - stmmac_remove_config_dt(pdev, plat_dat); - return err; - } - - return 0; + return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } static const struct of_device_id starfive_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 0d653bbb931b..4445cddc4cbe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -273,20 +273,18 @@ static int sti_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!dwmac) + return -ENOMEM; ret = sti_dwmac_parse_data(dwmac, pdev); if (ret) { dev_err(&pdev->dev, "Unable to parse OF data\n"); - goto err_remove_config_dt; + return ret; } dwmac->fix_retime_src = data->fix_retime_src; @@ -296,7 +294,7 @@ static int sti_dwmac_probe(struct platform_device *pdev) ret = clk_prepare_enable(dwmac->clk); if (ret) - goto err_remove_config_dt; + return ret; ret = sti_dwmac_set_mode(dwmac); if (ret) @@ -310,8 +308,6 @@ static int sti_dwmac_probe(struct platform_device *pdev) disable_clk: clk_disable_unprepare(dwmac->clk); -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index a0e276783e65..d8d3c729f219 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -372,21 +372,18 @@ static int stm32_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!dwmac) + return -ENOMEM; data = of_device_get_match_data(&pdev->dev); if (!data) { dev_err(&pdev->dev, "no of match data provided\n"); - ret = -EINVAL; - goto err_remove_config_dt; + return -EINVAL; } dwmac->ops = data; @@ -395,14 +392,14 @@ static int stm32_dwmac_probe(struct platform_device *pdev) ret = stm32_dwmac_parse_data(dwmac, &pdev->dev); if (ret) { dev_err(&pdev->dev, "Unable to parse OF data\n"); - goto err_remove_config_dt; + return ret; } plat_dat->bsp_priv = dwmac; ret = stm32_dwmac_init(plat_dat); if (ret) - goto err_remove_config_dt; + return ret; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -412,8 +409,6 @@ static int stm32_dwmac_probe(struct platform_device *pdev) err_clk_disable: stm32_dwmac_clk_disable(dwmac); -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 465ff1fd4785..137741b94122 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1224,7 +1224,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (ret) return -EINVAL; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -1244,7 +1244,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat); if (ret) - goto dwmac_deconfig; + return ret; ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); if (ret) @@ -1295,8 +1295,6 @@ dwmac_exit: sun8i_dwmac_exit(pdev, gmac); dwmac_syscon: sun8i_dwmac_unset_syscon(gmac); -dwmac_deconfig: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index beceeae579bf..2653a9f0958c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -108,36 +108,31 @@ static int sun7i_gmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL); - if (!gmac) { - ret = -ENOMEM; - goto err_remove_config_dt; - } + if (!gmac) + return -ENOMEM; ret = of_get_phy_mode(dev->of_node, &gmac->interface); if (ret && ret != -ENODEV) { dev_err(dev, "Can't get phy-mode\n"); - goto err_remove_config_dt; + return ret; } gmac->tx_clk = devm_clk_get(dev, "allwinner_gmac_tx"); if (IS_ERR(gmac->tx_clk)) { dev_err(dev, "could not get tx clock\n"); - ret = PTR_ERR(gmac->tx_clk); - goto err_remove_config_dt; + return PTR_ERR(gmac->tx_clk); } /* Optional regulator for PHY */ gmac->regulator = devm_regulator_get_optional(dev, "phy"); if (IS_ERR(gmac->regulator)) { - if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto err_remove_config_dt; - } + if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER) + return -EPROBE_DEFER; dev_info(dev, "no regulator found\n"); gmac->regulator = NULL; } @@ -155,7 +150,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev) ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv); if (ret) - goto err_remove_config_dt; + return ret; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -165,8 +160,6 @@ static int sun7i_gmac_probe(struct platform_device *pdev) err_gmac_exit: sun7i_gmac_exit(pdev, plat_dat->bsp_priv); -err_remove_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index e0f3cbd36852..362f85136c3e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -284,7 +284,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev) if (err < 0) goto disable_clks; - plat = stmmac_probe_config_dt(pdev, res.mac); + plat = devm_stmmac_probe_config_dt(pdev, res.mac); if (IS_ERR(plat)) { err = PTR_ERR(plat); goto disable_clks; @@ -303,7 +303,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev) GFP_KERNEL); if (!plat->mdio_bus_data) { err = -ENOMEM; - goto remove; + goto disable_clks; } } @@ -321,7 +321,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev) 500, 500 * 2000); if (err < 0) { dev_err(mgbe->dev, "timeout waiting for TX lane to become enabled\n"); - goto remove; + goto disable_clks; } plat->serdes_powerup = mgbe_uphy_lane_bringup_serdes_up; @@ -342,12 +342,10 @@ static int tegra_mgbe_probe(struct platform_device *pdev) err = stmmac_dvr_probe(&pdev->dev, plat, &res); if (err < 0) - goto remove; + goto disable_clks; return 0; -remove: - stmmac_remove_config_dt(pdev, plat); disable_clks: clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index 22d113fb8e09..a5a5cfa989c6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -220,15 +220,13 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); - if (!dwmac) { - ret = -ENOMEM; - goto remove_config; - } + if (!dwmac) + return -ENOMEM; spin_lock_init(&dwmac->lock); dwmac->reg = stmmac_res.addr; @@ -238,7 +236,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) ret = visconti_eth_clock_probe(pdev, plat_dat); if (ret) - goto remove_config; + return ret; visconti_eth_init_hw(pdev, plat_dat); @@ -252,22 +250,14 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) remove: visconti_eth_clock_remove(pdev); -remove_config: - stmmac_remove_config_dt(pdev, plat_dat); return ret; } static void visconti_eth_dwmac_remove(struct platform_device *pdev) { - struct net_device *ndev = platform_get_drvdata(pdev); - struct stmmac_priv *priv = netdev_priv(ndev); - stmmac_pltfr_remove(pdev); - visconti_eth_clock_remove(pdev); - - stmmac_remove_config_dt(pdev, priv->plat); } static const struct of_device_id visconti_eth_dwmac_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5801f4d50f95..bd1249a9d7f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4415,6 +4415,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) WARN_ON(tx_q->tx_skbuff[first_entry]); csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL); + /* DWMAC IPs can be synthesized to support tx coe only for a few tx + * queues. In that case, checksum offloading for those queues that don't + * support tx coe needs to fallback to software checksum calculation. + */ + if (csum_insertion && + priv->plat->tx_queues_cfg[queue].coe_unsupported) { + if (unlikely(skb_checksum_help(skb))) + goto dma_map_err; + csum_insertion = !csum_insertion; + } if (likely(priv->extend_desc)) desc = (struct dma_desc *)(tx_q->dma_etx + entry); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 2f0678f15fb7..1ffde555da47 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -276,6 +276,9 @@ static int stmmac_mtl_setup(struct platform_device *pdev, plat->tx_queues_cfg[queue].use_prio = true; } + plat->tx_queues_cfg[queue].coe_unsupported = + of_property_read_bool(q_node, "snps,coe-unsupported"); + queue++; } if (queue != plat->tx_queues_to_use) { @@ -385,6 +388,22 @@ static int stmmac_of_get_mac_mode(struct device_node *np) } /** + * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt() + * @pdev: platform_device structure + * @plat: driver data platform structure + * + * Release resources claimed by stmmac_probe_config_dt(). + */ +static void stmmac_remove_config_dt(struct platform_device *pdev, + struct plat_stmmacenet_data *plat) +{ + clk_disable_unprepare(plat->stmmac_clk); + clk_disable_unprepare(plat->pclk); + of_node_put(plat->phy_node); + of_node_put(plat->mdio_node); +} + +/** * stmmac_probe_config_dt - parse device-tree driver parameters * @pdev: platform_device structure * @mac: MAC address to use @@ -392,7 +411,7 @@ static int stmmac_of_get_mac_mode(struct device_node *np) * this function is to read the driver parameters from device-tree and * set some private fields that will be used by the main at runtime. */ -struct plat_stmmacenet_data * +static struct plat_stmmacenet_data * stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) { struct device_node *np = pdev->dev.of_node; @@ -662,43 +681,14 @@ devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) return plat; } - -/** - * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt() - * @pdev: platform_device structure - * @plat: driver data platform structure - * - * Release resources claimed by stmmac_probe_config_dt(). - */ -void stmmac_remove_config_dt(struct platform_device *pdev, - struct plat_stmmacenet_data *plat) -{ - clk_disable_unprepare(plat->stmmac_clk); - clk_disable_unprepare(plat->pclk); - of_node_put(plat->phy_node); - of_node_put(plat->mdio_node); -} #else struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) -{ - return ERR_PTR(-EINVAL); -} - -struct plat_stmmacenet_data * devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) { return ERR_PTR(-EINVAL); } - -void stmmac_remove_config_dt(struct platform_device *pdev, - struct plat_stmmacenet_data *plat) -{ -} #endif /* CONFIG_OF */ -EXPORT_SYMBOL_GPL(stmmac_probe_config_dt); EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt); -EXPORT_SYMBOL_GPL(stmmac_remove_config_dt); int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res) @@ -807,7 +797,7 @@ static void devm_stmmac_pltfr_remove(void *data) { struct platform_device *pdev = data; - stmmac_pltfr_remove_no_dt(pdev); + stmmac_pltfr_remove(pdev); } /** @@ -834,12 +824,12 @@ int devm_stmmac_pltfr_probe(struct platform_device *pdev, EXPORT_SYMBOL_GPL(devm_stmmac_pltfr_probe); /** - * stmmac_pltfr_remove_no_dt + * stmmac_pltfr_remove * @pdev: pointer to the platform device * Description: This undoes the effects of stmmac_pltfr_probe() by removing the * driver and calling the platform's exit() callback. */ -void stmmac_pltfr_remove_no_dt(struct platform_device *pdev) +void stmmac_pltfr_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -848,23 +838,6 @@ void stmmac_pltfr_remove_no_dt(struct platform_device *pdev) stmmac_dvr_remove(&pdev->dev); stmmac_pltfr_exit(pdev, plat); } -EXPORT_SYMBOL_GPL(stmmac_pltfr_remove_no_dt); - -/** - * stmmac_pltfr_remove - * @pdev: platform device pointer - * Description: this function calls the main to free the net resources - * and calls the platforms hook and release the resources (e.g. mem). - */ -void stmmac_pltfr_remove(struct platform_device *pdev) -{ - struct net_device *ndev = platform_get_drvdata(pdev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct plat_stmmacenet_data *plat = priv->plat; - - stmmac_pltfr_remove_no_dt(pdev); - stmmac_remove_config_dt(pdev, plat); -} EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); /** diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index c5565b2a70ac..bb6fc7e59aed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -12,11 +12,7 @@ #include "stmmac.h" struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); -struct plat_stmmacenet_data * devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); -void stmmac_remove_config_dt(struct platform_device *pdev, - struct plat_stmmacenet_data *plat); int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res); @@ -32,7 +28,6 @@ int stmmac_pltfr_probe(struct platform_device *pdev, int devm_stmmac_pltfr_probe(struct platform_device *pdev, struct plat_stmmacenet_data *plat, struct stmmac_resources *res); -void stmmac_pltfr_remove_no_dt(struct platform_device *pdev); void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 011d74087f86..21431f43e4c2 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -10132,7 +10132,7 @@ err_out: return err; } -static int niu_of_remove(struct platform_device *op) +static void niu_of_remove(struct platform_device *op) { struct net_device *dev = platform_get_drvdata(op); @@ -10165,7 +10165,6 @@ static int niu_of_remove(struct platform_device *op) free_netdev(dev); } - return 0; } static const struct of_device_id niu_match[] = { @@ -10183,7 +10182,7 @@ static struct platform_driver niu_of_driver = { .of_match_table = niu_match, }, .probe = niu_of_probe, - .remove = niu_of_remove, + .remove_new = niu_of_remove, }; #endif /* CONFIG_SPARC64 */ diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index cc34d92d2e3d..16c86b13c185 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -1234,7 +1234,7 @@ static int bigmac_sbus_probe(struct platform_device *op) return bigmac_ether_init(op, qec_op); } -static int bigmac_sbus_remove(struct platform_device *op) +static void bigmac_sbus_remove(struct platform_device *op) { struct bigmac *bp = platform_get_drvdata(op); struct device *parent = op->dev.parent; @@ -1255,8 +1255,6 @@ static int bigmac_sbus_remove(struct platform_device *op) bp->bblock_dvma); free_netdev(net_dev); - - return 0; } static const struct of_device_id bigmac_sbus_match[] = { @@ -1274,7 +1272,7 @@ static struct platform_driver bigmac_sbus_driver = { .of_match_table = bigmac_sbus_match, }, .probe = bigmac_sbus_probe, - .remove = bigmac_sbus_remove, + .remove_new = bigmac_sbus_remove, }; module_platform_driver(bigmac_sbus_driver); diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index b37360f44972..aedd13c94225 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -933,7 +933,7 @@ static int qec_sbus_probe(struct platform_device *op) return qec_ether_init(op); } -static int qec_sbus_remove(struct platform_device *op) +static void qec_sbus_remove(struct platform_device *op) { struct sunqe *qp = platform_get_drvdata(op); struct net_device *net_dev = qp->dev; @@ -948,8 +948,6 @@ static int qec_sbus_remove(struct platform_device *op) qp->buffers, qp->buffers_dvma); free_netdev(net_dev); - - return 0; } static const struct of_device_id qec_sbus_match[] = { @@ -967,7 +965,7 @@ static struct platform_driver qec_sbus_driver = { .of_match_table = qec_sbus_match, }, .probe = qec_sbus_probe, - .remove = qec_sbus_remove, + .remove_new = qec_sbus_remove, }; static int __init qec_init(void) diff --git a/drivers/net/ethernet/sunplus/spl2sw_driver.c b/drivers/net/ethernet/sunplus/spl2sw_driver.c index c499a14314f1..391a1bc7f446 100644 --- a/drivers/net/ethernet/sunplus/spl2sw_driver.c +++ b/drivers/net/ethernet/sunplus/spl2sw_driver.c @@ -511,7 +511,7 @@ out_clk_disable: return ret; } -static int spl2sw_remove(struct platform_device *pdev) +static void spl2sw_remove(struct platform_device *pdev) { struct spl2sw_common *comm; int i; @@ -538,8 +538,6 @@ static int spl2sw_remove(struct platform_device *pdev) spl2sw_mdio_remove(comm); clk_disable_unprepare(comm->clk); - - return 0; } static const struct of_device_id spl2sw_of_match[] = { @@ -551,7 +549,7 @@ MODULE_DEVICE_TABLE(of, spl2sw_of_match); static struct platform_driver spl2sw_driver = { .probe = spl2sw_probe, - .remove = spl2sw_remove, + .remove_new = spl2sw_remove, .driver = { .name = "sp7021_emac", .of_match_table = spl2sw_of_match, diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 80eeeb463c4f..bc9b5d18427d 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1151,14 +1151,12 @@ fail: return rc; } -static int cpmac_remove(struct platform_device *pdev) +static void cpmac_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); unregister_netdev(dev); free_netdev(dev); - - return 0; } static struct platform_driver cpmac_driver = { @@ -1166,7 +1164,7 @@ static struct platform_driver cpmac_driver = { .name = "cpmac", }, .probe = cpmac_probe, - .remove = cpmac_remove, + .remove_new = cpmac_remove, }; int __init cpmac_init(void) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 2eb9d5a32588..5d756df133eb 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -2002,7 +2002,7 @@ err_free_netdev: * Called when removing the device driver. We disable clock usage and release * the resources taken up by the driver and unregister network device */ -static int davinci_emac_remove(struct platform_device *pdev) +static void davinci_emac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct emac_priv *priv = netdev_priv(ndev); @@ -2022,8 +2022,6 @@ static int davinci_emac_remove(struct platform_device *pdev) if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); free_netdev(ndev); - - return 0; } static int davinci_emac_suspend(struct device *dev) @@ -2076,7 +2074,7 @@ static struct platform_driver davinci_emac_driver = { .of_match_table = davinci_emac_of_match, }, .probe = davinci_emac_probe, - .remove = davinci_emac_remove, + .remove_new = davinci_emac_remove, }; /** diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 89b6d23e9937..628c87dc1d28 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -673,7 +673,7 @@ bail_out: return ret; } -static int davinci_mdio_remove(struct platform_device *pdev) +static void davinci_mdio_remove(struct platform_device *pdev) { struct davinci_mdio_data *data = platform_get_drvdata(pdev); @@ -686,8 +686,6 @@ static int davinci_mdio_remove(struct platform_device *pdev) pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_disable(&pdev->dev); - - return 0; } #ifdef CONFIG_PM @@ -766,7 +764,7 @@ static struct platform_driver davinci_mdio_driver = { .of_match_table = of_match_ptr(davinci_mdio_of_mtable), }, .probe = davinci_mdio_probe, - .remove = davinci_mdio_remove, + .remove_new = davinci_mdio_remove, }; static int __init davinci_mdio_init(void) diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index b272361e378f..99de8a40ed60 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -433,6 +433,17 @@ int emac_set_port_state(struct prueth_emac *emac, return ret; } +void icssg_config_half_duplex(struct prueth_emac *emac) +{ + u32 val; + + if (!emac->half_duplex) + return; + + val = get_random_u32(); + writel(val, emac->dram.va + HD_RAND_SEED_OFFSET); +} + void icssg_config_set_speed(struct prueth_emac *emac) { u8 fw_speed; @@ -453,5 +464,8 @@ void icssg_config_set_speed(struct prueth_emac *emac) return; } + if (emac->duplex == DUPLEX_HALF) + fw_speed |= FW_LINK_SPEED_HD; + writeb(fw_speed, emac->dram.va + PORT_LINK_SPEED_OFFSET); } diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 4914d0ef58e9..482ae82a99c1 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1029,6 +1029,8 @@ static void emac_adjust_link(struct net_device *ndev) * values */ if (emac->link) { + if (emac->duplex == DUPLEX_HALF) + icssg_config_half_duplex(emac); /* Set the RGMII cfg for gig en and full duplex */ icssg_update_rgmii_cfg(prueth->miig_rt, emac); @@ -1147,9 +1149,13 @@ static int emac_phy_connect(struct prueth_emac *emac) return -ENODEV; } + if (!emac->half_duplex) { + dev_dbg(prueth->dev, "half duplex mode is not supported\n"); + phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); + phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); + } + /* remove unsupported modes */ - phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); - phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Pause_BIT); phy_remove_link_mode(ndev->phydev, ETHTOOL_LINK_MODE_Asym_Pause_BIT); @@ -2113,6 +2119,10 @@ static int prueth_probe(struct platform_device *pdev) eth0_node->name); goto exit_iep; } + + if (of_find_property(eth0_node, "ti,half-duplex-capable", NULL)) + prueth->emac[PRUETH_MAC0]->half_duplex = 1; + prueth->emac[PRUETH_MAC0]->iep = prueth->iep0; } @@ -2124,6 +2134,9 @@ static int prueth_probe(struct platform_device *pdev) goto netdev_exit; } + if (of_find_property(eth1_node, "ti,half-duplex-capable", NULL)) + prueth->emac[PRUETH_MAC1]->half_duplex = 1; + prueth->emac[PRUETH_MAC1]->iep = prueth->iep0; } @@ -2313,8 +2326,13 @@ static const struct prueth_pdata am654_icssg_pdata = { .quirk_10m_link_issue = 1, }; +static const struct prueth_pdata am64x_icssg_pdata = { + .fdqring_mode = K3_RINGACC_RING_MODE_RING, +}; + static const struct of_device_id prueth_dt_match[] = { { .compatible = "ti,am654-icssg-prueth", .data = &am654_icssg_pdata }, + { .compatible = "ti,am642-icssg-prueth", .data = &am64x_icssg_pdata }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, prueth_dt_match); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index 3fe80a8758d3..8b6d6b497010 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -145,6 +145,7 @@ struct prueth_emac { struct icss_iep *iep; unsigned int rx_ts_enabled : 1; unsigned int tx_ts_enabled : 1; + unsigned int half_duplex : 1; /* DMA related */ struct prueth_tx_chn tx_chns[PRUETH_MAX_TX_QUEUES]; @@ -271,6 +272,7 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac, int emac_set_port_state(struct prueth_emac *emac, enum icssg_port_state_cmd state); void icssg_config_set_speed(struct prueth_emac *emac); +void icssg_config_half_duplex(struct prueth_emac *emac); /* Buffer queue helpers */ int icssg_queue_pop(struct prueth *prueth, u8 queue); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index d829113c16ee..11b90e1da0c6 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2228,7 +2228,7 @@ probe_quit: return ret; } -static int netcp_remove(struct platform_device *pdev) +static void netcp_remove(struct platform_device *pdev) { struct netcp_device *netcp_device = platform_get_drvdata(pdev); struct netcp_intf *netcp_intf, *netcp_tmp; @@ -2256,7 +2256,6 @@ static int netcp_remove(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); platform_set_drvdata(pdev, NULL); - return 0; } static const struct of_device_id of_match[] = { @@ -2271,7 +2270,7 @@ static struct platform_driver netcp_driver = { .of_match_table = of_match, }, .probe = netcp_probe, - .remove = netcp_remove, + .remove_new = netcp_remove, }; module_platform_driver(netcp_driver); diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 50d7eacfec58..87e67121477c 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2332,7 +2332,7 @@ spider_net_alloc_card(void) struct spider_net_card *card; netdev = alloc_etherdev(struct_size(card, darray, - tx_descriptors + rx_descriptors)); + size_add(tx_descriptors, rx_descriptors))); if (!netdev) return NULL; diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index d09d352e1c0a..554aff7c8f3b 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1660,7 +1660,7 @@ static void tsi108_timed_checker(struct timer_list *t) mod_timer(&data->timer, jiffies + CHECK_PHY_INTERVAL); } -static int tsi108_ether_remove(struct platform_device *pdev) +static void tsi108_ether_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct tsi108_prv_data *priv = netdev_priv(dev); @@ -1670,15 +1670,13 @@ static int tsi108_ether_remove(struct platform_device *pdev) iounmap(priv->regs); iounmap(priv->phyregs); free_netdev(dev); - - return 0; } /* Structure for a device driver */ static struct platform_driver tsi_eth_driver = { .probe = tsi108_init_one, - .remove = tsi108_ether_remove, + .remove_new = tsi108_ether_remove, .driver = { .name = "tsi-ethernet", }, diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 3e09e5036490..e80c02948801 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -2443,7 +2443,7 @@ static void rhine_remove_one_pci(struct pci_dev *pdev) pci_disable_device(pdev); } -static int rhine_remove_one_platform(struct platform_device *pdev) +static void rhine_remove_one_platform(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct rhine_private *rp = netdev_priv(dev); @@ -2453,8 +2453,6 @@ static int rhine_remove_one_platform(struct platform_device *pdev) iounmap(rp->base); free_netdev(dev); - - return 0; } static void rhine_shutdown_pci(struct pci_dev *pdev) @@ -2572,7 +2570,7 @@ static struct pci_driver rhine_driver_pci = { static struct platform_driver rhine_driver_platform = { .probe = rhine_init_one_platform, - .remove = rhine_remove_one_platform, + .remove_new = rhine_remove_one_platform, .driver = { .name = DRV_NAME, .of_match_table = rhine_of_tbl, diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index 731f689412e6..1c6b2a9bba08 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2957,11 +2957,9 @@ static int velocity_platform_probe(struct platform_device *pdev) return velocity_probe(&pdev->dev, irq, info, BUS_PLATFORM); } -static int velocity_platform_remove(struct platform_device *pdev) +static void velocity_platform_remove(struct platform_device *pdev) { velocity_remove(&pdev->dev); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -3249,7 +3247,7 @@ static struct pci_driver velocity_pci_driver = { static struct platform_driver velocity_platform_driver = { .probe = velocity_platform_probe, - .remove = velocity_platform_remove, + .remove_new = velocity_platform_remove, .driver = { .name = "via-velocity", .of_match_table = velocity_of_ids, diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 85dc16faca54..f0063d569c80 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -12,6 +12,98 @@ #include "wx_lib.h" #include "wx_hw.h" +static int wx_phy_read_reg_mdi(struct mii_bus *bus, int phy_addr, int devnum, int regnum) +{ + struct wx *wx = bus->priv; + u32 command, val; + int ret; + + /* setup and write the address cycle command */ + command = WX_MSCA_RA(regnum) | + WX_MSCA_PA(phy_addr) | + WX_MSCA_DA(devnum); + wr32(wx, WX_MSCA, command); + + command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY; + if (wx->mac.type == wx_mac_em) + command |= WX_MDIO_CLK(6); + wr32(wx, WX_MSCC, command); + + /* wait to complete */ + ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, + 100000, false, wx, WX_MSCC); + if (ret) { + wx_err(wx, "Mdio read c22 command did not complete.\n"); + return ret; + } + + return (u16)rd32(wx, WX_MSCC); +} + +static int wx_phy_write_reg_mdi(struct mii_bus *bus, int phy_addr, + int devnum, int regnum, u16 value) +{ + struct wx *wx = bus->priv; + u32 command, val; + int ret; + + /* setup and write the address cycle command */ + command = WX_MSCA_RA(regnum) | + WX_MSCA_PA(phy_addr) | + WX_MSCA_DA(devnum); + wr32(wx, WX_MSCA, command); + + command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY; + if (wx->mac.type == wx_mac_em) + command |= WX_MDIO_CLK(6); + wr32(wx, WX_MSCC, command); + + /* wait to complete */ + ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, + 100000, false, wx, WX_MSCC); + if (ret) + wx_err(wx, "Mdio write c22 command did not complete.\n"); + + return ret; +} + +int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF); + return wx_phy_read_reg_mdi(bus, phy_addr, 0, regnum); +} +EXPORT_SYMBOL(wx_phy_read_reg_mdi_c22); + +int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0xF); + return wx_phy_write_reg_mdi(bus, phy_addr, 0, regnum, value); +} +EXPORT_SYMBOL(wx_phy_write_reg_mdi_c22); + +int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0); + return wx_phy_read_reg_mdi(bus, phy_addr, devnum, regnum); +} +EXPORT_SYMBOL(wx_phy_read_reg_mdi_c45); + +int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr, + int devnum, int regnum, u16 value) +{ + struct wx *wx = bus->priv; + + wr32(wx, WX_MDIO_CLAUSE_SELECT, 0); + return wx_phy_write_reg_mdi(bus, phy_addr, devnum, regnum, value); +} +EXPORT_SYMBOL(wx_phy_write_reg_mdi_c45); + static void wx_intr_disable(struct wx *wx, u64 qmask) { u32 mask; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index 0b3447bc6f2f..48d3ccabc272 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -4,6 +4,13 @@ #ifndef _WX_HW_H_ #define _WX_HW_H_ +#include <linux/phy.h> + +int wx_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum); +int wx_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value); +int wx_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum); +int wx_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr, + int devnum, int regnum, u16 value); void wx_intr_enable(struct wx *wx, u64 qmask); void wx_irq_disable(struct wx *wx); int wx_check_flash_load(struct wx *wx, u32 check_bit); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index c5cbd177ef62..e3fc49284219 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -251,6 +251,7 @@ enum WX_MSCA_CMD_value { #define WX_MSCC_SADDR BIT(18) #define WX_MSCC_BUSY BIT(22) #define WX_MDIO_CLK(v) FIELD_PREP(GENMASK(21, 19), v) +#define WX_MDIO_CLAUSE_SELECT 0x11220 #define WX_MMC_CONTROL 0x11800 #define WX_MMC_CONTROL_RSTONRD BIT(2) /* reset on read */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index 591f5b7b6da6..6302ecca71bb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -29,117 +29,6 @@ static int ngbe_phy_write_reg_internal(struct mii_bus *bus, int phy_addr, int re return 0; } -static int ngbe_phy_read_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum) -{ - u32 command, val, device_type = 0; - struct wx *wx = bus->priv; - int ret; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(device_type); - wr32(wx, WX_MSCA, command); - command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) { - wx_err(wx, "Mdio read c22 command did not complete.\n"); - return ret; - } - - return (u16)rd32(wx, WX_MSCC); -} - -static int ngbe_phy_write_reg_mdi_c22(struct mii_bus *bus, int phy_addr, int regnum, u16 value) -{ - u32 command, val, device_type = 0; - struct wx *wx = bus->priv; - int ret; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(device_type); - wr32(wx, WX_MSCA, command); - command = value | - WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) - wx_err(wx, "Mdio write c22 command did not complete.\n"); - - return ret; -} - -static int ngbe_phy_read_reg_mdi_c45(struct mii_bus *bus, int phy_addr, int devnum, int regnum) -{ - struct wx *wx = bus->priv; - u32 val, command; - int ret; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) { - wx_err(wx, "Mdio read c45 command did not complete.\n"); - return ret; - } - - return (u16)rd32(wx, WX_MSCC); -} - -static int ngbe_phy_write_reg_mdi_c45(struct mii_bus *bus, int phy_addr, - int devnum, int regnum, u16 value) -{ - struct wx *wx = bus->priv; - int ret, command; - u16 val; - - wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0x0); - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - command = value | - WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | - WX_MSCC_BUSY | - WX_MDIO_CLK(6); - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) - wx_err(wx, "Mdio write c45 command did not complete.\n"); - - return ret; -} - static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum) { struct wx *wx = bus->priv; @@ -148,7 +37,7 @@ static int ngbe_phy_read_reg_c22(struct mii_bus *bus, int phy_addr, int regnum) if (wx->mac_type == em_mac_type_mdi) phy_data = ngbe_phy_read_reg_internal(bus, phy_addr, regnum); else - phy_data = ngbe_phy_read_reg_mdi_c22(bus, phy_addr, regnum); + phy_data = wx_phy_read_reg_mdi_c22(bus, phy_addr, regnum); return phy_data; } @@ -162,7 +51,7 @@ static int ngbe_phy_write_reg_c22(struct mii_bus *bus, int phy_addr, if (wx->mac_type == em_mac_type_mdi) ret = ngbe_phy_write_reg_internal(bus, phy_addr, regnum, value); else - ret = ngbe_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value); + ret = wx_phy_write_reg_mdi_c22(bus, phy_addr, regnum, value); return ret; } @@ -262,8 +151,8 @@ int ngbe_mdio_init(struct wx *wx) mii_bus->priv = wx; if (wx->mac_type == em_mac_type_rgmii) { - mii_bus->read_c45 = ngbe_phy_read_reg_mdi_c45; - mii_bus->write_c45 = ngbe_phy_write_reg_mdi_c45; + mii_bus->read_c45 = wx_phy_read_reg_mdi_c45; + mii_bus->write_c45 = wx_phy_write_reg_mdi_c45; } snprintf(mii_bus->id, MII_BUS_ID_SIZE, "ngbe-%x", pci_dev_id(pdev)); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index 72c8cd2d5575..ff754d69bdf6 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -59,9 +59,6 @@ #define NGBE_EEPROM_VERSION_L 0x1D #define NGBE_EEPROM_VERSION_H 0x1E -/* Media-dependent registers. */ -#define NGBE_MDIO_CLAUSE_SELECT 0x11220 - /* GPIO Registers */ #define NGBE_GPIO_DR 0x14800 #define NGBE_GPIO_DDR 0x14804 diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 4159c84035fd..b6c06adb8656 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -647,58 +647,6 @@ static int txgbe_sfp_register(struct txgbe *txgbe) return 0; } -static int txgbe_phy_read(struct mii_bus *bus, int phy_addr, - int devnum, int regnum) -{ - struct wx *wx = bus->priv; - u32 val, command; - int ret; - - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - - command = WX_MSCC_CMD(WX_MSCA_CMD_READ) | WX_MSCC_BUSY; - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) { - wx_err(wx, "Mdio read c45 command did not complete.\n"); - return ret; - } - - return (u16)rd32(wx, WX_MSCC); -} - -static int txgbe_phy_write(struct mii_bus *bus, int phy_addr, - int devnum, int regnum, u16 value) -{ - struct wx *wx = bus->priv; - int ret, command; - u16 val; - - /* setup and write the address cycle command */ - command = WX_MSCA_RA(regnum) | - WX_MSCA_PA(phy_addr) | - WX_MSCA_DA(devnum); - wr32(wx, WX_MSCA, command); - - command = value | WX_MSCC_CMD(WX_MSCA_CMD_WRITE) | WX_MSCC_BUSY; - wr32(wx, WX_MSCC, command); - - /* wait to complete */ - ret = read_poll_timeout(rd32, val, !(val & WX_MSCC_BUSY), 1000, - 100000, false, wx, WX_MSCC); - if (ret) - wx_err(wx, "Mdio write c45 command did not complete.\n"); - - return ret; -} - static int txgbe_ext_phy_init(struct txgbe *txgbe) { struct phy_device *phydev; @@ -715,8 +663,8 @@ static int txgbe_ext_phy_init(struct txgbe *txgbe) return -ENOMEM; mii_bus->name = "txgbe_mii_bus"; - mii_bus->read_c45 = &txgbe_phy_read; - mii_bus->write_c45 = &txgbe_phy_write; + mii_bus->read_c45 = &wx_phy_read_reg_mdi_c45; + mii_bus->write_c45 = &wx_phy_write_reg_mdi_c45; mii_bus->parent = &pdev->dev; mii_bus->phy_mask = GENMASK(31, 1); mii_bus->priv = wx; diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index 634946e87e5f..341ee2f249fd 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -1062,11 +1062,9 @@ static int w5100_mmio_probe(struct platform_device *pdev) mac_addr, irq, data ? data->link_gpio : -EINVAL); } -static int w5100_mmio_remove(struct platform_device *pdev) +static void w5100_mmio_remove(struct platform_device *pdev) { w5100_remove(&pdev->dev); - - return 0; } void *w5100_ops_priv(const struct net_device *ndev) @@ -1273,6 +1271,6 @@ static struct platform_driver w5100_mmio_driver = { .pm = &w5100_pm_ops, }, .probe = w5100_mmio_probe, - .remove = w5100_mmio_remove, + .remove_new = w5100_mmio_remove, }; module_platform_driver(w5100_mmio_driver); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index b0958fe8111e..3318b50a5911 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -627,7 +627,7 @@ err_register: return err; } -static int w5300_remove(struct platform_device *pdev) +static void w5300_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct w5300_priv *priv = netdev_priv(ndev); @@ -639,7 +639,6 @@ static int w5300_remove(struct platform_device *pdev) unregister_netdev(ndev); free_netdev(ndev); - return 0; } #ifdef CONFIG_PM_SLEEP @@ -683,7 +682,7 @@ static struct platform_driver w5300_driver = { .pm = &w5300_pm_ops, }, .probe = w5300_probe, - .remove = w5300_remove, + .remove_new = w5300_remove, }; module_platform_driver(w5300_driver); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 1444b855e7aa..9df39cf8b097 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1626,7 +1626,7 @@ err_sysfs_create: return rc; } -static int temac_remove(struct platform_device *pdev) +static void temac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct temac_local *lp = netdev_priv(ndev); @@ -1636,7 +1636,6 @@ static int temac_remove(struct platform_device *pdev) if (lp->phy_node) of_node_put(lp->phy_node); temac_mdio_teardown(lp); - return 0; } static const struct of_device_id temac_of_match[] = { @@ -1650,7 +1649,7 @@ MODULE_DEVICE_TABLE(of, temac_of_match); static struct platform_driver temac_driver = { .probe = temac_probe, - .remove = temac_remove, + .remove_new = temac_remove, .driver = { .name = "xilinx_temac", .of_match_table = temac_of_match, diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b7ec4dafae90..82d0d44b2b02 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2183,7 +2183,7 @@ free_netdev: return ret; } -static int axienet_remove(struct platform_device *pdev) +static void axienet_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct axienet_local *lp = netdev_priv(ndev); @@ -2202,8 +2202,6 @@ static int axienet_remove(struct platform_device *pdev) clk_disable_unprepare(lp->axi_clk); free_netdev(ndev); - - return 0; } static void axienet_shutdown(struct platform_device *pdev) @@ -2256,7 +2254,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(axienet_pm_ops, static struct platform_driver axienet_driver = { .probe = axienet_probe, - .remove = axienet_remove, + .remove_new = axienet_remove, .shutdown = axienet_shutdown, .driver = { .name = "xilinx_axienet", diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index b358ecc67227..32a502e7318b 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1183,7 +1183,7 @@ error: * * Return: 0, always. */ -static int xemaclite_of_remove(struct platform_device *of_dev) +static void xemaclite_of_remove(struct platform_device *of_dev) { struct net_device *ndev = platform_get_drvdata(of_dev); @@ -1202,8 +1202,6 @@ static int xemaclite_of_remove(struct platform_device *of_dev) lp->phy_node = NULL; free_netdev(ndev); - - return 0; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1262,7 +1260,7 @@ static struct platform_driver xemaclite_of_driver = { .of_match_table = xemaclite_of_match, }, .probe = xemaclite_of_probe, - .remove = xemaclite_of_remove, + .remove_new = xemaclite_of_remove, }; module_platform_driver(xemaclite_of_driver); diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 3b0c5f177447..b242aa61d8ab 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1533,7 +1533,7 @@ err_free_mem: return err; } -static int ixp4xx_eth_remove(struct platform_device *pdev) +static void ixp4xx_eth_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct phy_device *phydev = ndev->phydev; @@ -1544,7 +1544,6 @@ static int ixp4xx_eth_remove(struct platform_device *pdev) ixp4xx_mdio_remove(); npe_port_tab[NPE_ID(port->id)] = NULL; npe_release(port->npe); - return 0; } static const struct of_device_id ixp4xx_eth_of_match[] = { @@ -1560,7 +1559,7 @@ static struct platform_driver ixp4xx_eth_driver = { .of_match_table = of_match_ptr(ixp4xx_eth_of_match), }, .probe = ixp4xx_eth_probe, - .remove = ixp4xx_eth_remove, + .remove_new = ixp4xx_eth_remove, }; module_platform_driver(ixp4xx_eth_driver); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index b22596b18ee8..b1919278e931 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -630,7 +630,7 @@ static void __gtp_encap_destroy(struct sock *sk) gtp->sk0 = NULL; else gtp->sk1u = NULL; - udp_sk(sk)->encap_type = 0; + WRITE_ONCE(udp_sk(sk)->encap_type, 0); rcu_assign_sk_user_data(sk, NULL); release_sock(sk); sock_put(sk); @@ -682,7 +682,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); - switch (udp_sk(sk)->encap_type) { + switch (READ_ONCE(udp_sk(sk)->encap_type)) { case UDP_ENCAP_GTP0: netdev_dbg(gtp->dev, "received GTP0 packet\n"); ret = gtp0_udp_encap_recv(gtp, skb); diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index c727103c8b05..70edeeb7771e 100644 --- a/drivers/net/mdio/mdio-aspeed.c +++ b/drivers/net/mdio/mdio-aspeed.c @@ -177,15 +177,13 @@ static int aspeed_mdio_probe(struct platform_device *pdev) return 0; } -static int aspeed_mdio_remove(struct platform_device *pdev) +static void aspeed_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = (struct mii_bus *)platform_get_drvdata(pdev); struct aspeed_mdio *ctx = bus->priv; reset_control_assert(ctx->reset); mdiobus_unregister(bus); - - return 0; } static const struct of_device_id aspeed_mdio_of_match[] = { @@ -200,7 +198,7 @@ static struct platform_driver aspeed_mdio_driver = { .of_match_table = aspeed_mdio_of_match, }, .probe = aspeed_mdio_probe, - .remove = aspeed_mdio_remove, + .remove_new = aspeed_mdio_remove, }; module_platform_driver(aspeed_mdio_driver); diff --git a/drivers/net/mdio/mdio-bcm-iproc.c b/drivers/net/mdio/mdio-bcm-iproc.c index 77fc970cdfde..5a2d26c6afdc 100644 --- a/drivers/net/mdio/mdio-bcm-iproc.c +++ b/drivers/net/mdio/mdio-bcm-iproc.c @@ -168,14 +168,12 @@ err_iproc_mdio: return rc; } -static int iproc_mdio_remove(struct platform_device *pdev) +static void iproc_mdio_remove(struct platform_device *pdev) { struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); mdiobus_unregister(priv->mii_bus); mdiobus_free(priv->mii_bus); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -210,7 +208,7 @@ static struct platform_driver iproc_mdio_driver = { #endif }, .probe = iproc_mdio_probe, - .remove = iproc_mdio_remove, + .remove_new = iproc_mdio_remove, }; module_platform_driver(iproc_mdio_driver); diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c index 6b26a0803696..e8cd8eef319b 100644 --- a/drivers/net/mdio/mdio-bcm-unimac.c +++ b/drivers/net/mdio/mdio-bcm-unimac.c @@ -296,15 +296,13 @@ out_clk_disable: return ret; } -static int unimac_mdio_remove(struct platform_device *pdev) +static void unimac_mdio_remove(struct platform_device *pdev) { struct unimac_mdio_priv *priv = platform_get_drvdata(pdev); mdiobus_unregister(priv->mii_bus); mdiobus_free(priv->mii_bus); clk_disable_unprepare(priv->clk); - - return 0; } static int __maybe_unused unimac_mdio_suspend(struct device *d) @@ -353,7 +351,7 @@ static struct platform_driver unimac_mdio_driver = { .pm = &unimac_mdio_pm_ops, }, .probe = unimac_mdio_probe, - .remove = unimac_mdio_remove, + .remove_new = unimac_mdio_remove, }; module_platform_driver(unimac_mdio_driver); diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c index 0fb3c2de0845..897b88c50bbb 100644 --- a/drivers/net/mdio/mdio-gpio.c +++ b/drivers/net/mdio/mdio-gpio.c @@ -194,11 +194,9 @@ static int mdio_gpio_probe(struct platform_device *pdev) return ret; } -static int mdio_gpio_remove(struct platform_device *pdev) +static void mdio_gpio_remove(struct platform_device *pdev) { mdio_gpio_bus_destroy(&pdev->dev); - - return 0; } static const struct of_device_id mdio_gpio_of_match[] = { @@ -210,7 +208,7 @@ MODULE_DEVICE_TABLE(of, mdio_gpio_of_match); static struct platform_driver mdio_gpio_driver = { .probe = mdio_gpio_probe, - .remove = mdio_gpio_remove, + .remove_new = mdio_gpio_remove, .driver = { .name = "mdio-gpio", .of_match_table = mdio_gpio_of_match, diff --git a/drivers/net/mdio/mdio-hisi-femac.c b/drivers/net/mdio/mdio-hisi-femac.c index f231c2fbb1de..6703f626ee83 100644 --- a/drivers/net/mdio/mdio-hisi-femac.c +++ b/drivers/net/mdio/mdio-hisi-femac.c @@ -118,7 +118,7 @@ err_out_free_mdiobus: return ret; } -static int hisi_femac_mdio_remove(struct platform_device *pdev) +static void hisi_femac_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); struct hisi_femac_mdio_data *data = bus->priv; @@ -126,8 +126,6 @@ static int hisi_femac_mdio_remove(struct platform_device *pdev) mdiobus_unregister(bus); clk_disable_unprepare(data->clk); mdiobus_free(bus); - - return 0; } static const struct of_device_id hisi_femac_mdio_dt_ids[] = { @@ -138,7 +136,7 @@ MODULE_DEVICE_TABLE(of, hisi_femac_mdio_dt_ids); static struct platform_driver hisi_femac_mdio_driver = { .probe = hisi_femac_mdio_probe, - .remove = hisi_femac_mdio_remove, + .remove_new = hisi_femac_mdio_remove, .driver = { .name = "hisi-femac-mdio", .of_match_table = hisi_femac_mdio_dt_ids, diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c index 78b93de636f5..abd8b508ec16 100644 --- a/drivers/net/mdio/mdio-ipq4019.c +++ b/drivers/net/mdio/mdio-ipq4019.c @@ -278,13 +278,11 @@ static int ipq4019_mdio_probe(struct platform_device *pdev) return 0; } -static int ipq4019_mdio_remove(struct platform_device *pdev) +static void ipq4019_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); mdiobus_unregister(bus); - - return 0; } static const struct of_device_id ipq4019_mdio_dt_ids[] = { @@ -296,7 +294,7 @@ MODULE_DEVICE_TABLE(of, ipq4019_mdio_dt_ids); static struct platform_driver ipq4019_mdio_driver = { .probe = ipq4019_mdio_probe, - .remove = ipq4019_mdio_remove, + .remove_new = ipq4019_mdio_remove, .driver = { .name = "ipq4019-mdio", .of_match_table = ipq4019_mdio_dt_ids, diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c index fd9716960106..f71b6e1c66e4 100644 --- a/drivers/net/mdio/mdio-ipq8064.c +++ b/drivers/net/mdio/mdio-ipq8064.c @@ -147,14 +147,11 @@ ipq8064_mdio_probe(struct platform_device *pdev) return 0; } -static int -ipq8064_mdio_remove(struct platform_device *pdev) +static void ipq8064_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); mdiobus_unregister(bus); - - return 0; } static const struct of_device_id ipq8064_mdio_dt_ids[] = { @@ -165,7 +162,7 @@ MODULE_DEVICE_TABLE(of, ipq8064_mdio_dt_ids); static struct platform_driver ipq8064_mdio_driver = { .probe = ipq8064_mdio_probe, - .remove = ipq8064_mdio_remove, + .remove_new = ipq8064_mdio_remove, .driver = { .name = "ipq8064-mdio", .of_match_table = ipq8064_mdio_dt_ids, diff --git a/drivers/net/mdio/mdio-moxart.c b/drivers/net/mdio/mdio-moxart.c index f0cff584e176..d35af8cd7c4d 100644 --- a/drivers/net/mdio/mdio-moxart.c +++ b/drivers/net/mdio/mdio-moxart.c @@ -155,14 +155,12 @@ err_out_free_mdiobus: return ret; } -static int moxart_mdio_remove(struct platform_device *pdev) +static void moxart_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); mdiobus_unregister(bus); mdiobus_free(bus); - - return 0; } static const struct of_device_id moxart_mdio_dt_ids[] = { @@ -173,7 +171,7 @@ MODULE_DEVICE_TABLE(of, moxart_mdio_dt_ids); static struct platform_driver moxart_mdio_driver = { .probe = moxart_mdio_probe, - .remove = moxart_mdio_remove, + .remove_new = moxart_mdio_remove, .driver = { .name = "moxart-mdio", .of_match_table = moxart_mdio_dt_ids, diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index 1a1b95ae95fa..c29377c85307 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -335,15 +335,13 @@ out_disable_clk: return ret; } -static int mscc_miim_remove(struct platform_device *pdev) +static void mscc_miim_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); struct mscc_miim_dev *miim = bus->priv; clk_disable_unprepare(miim->clk); mdiobus_unregister(bus); - - return 0; } static const struct mscc_miim_info mscc_ocelot_miim_info = { @@ -371,7 +369,7 @@ MODULE_DEVICE_TABLE(of, mscc_miim_match); static struct platform_driver mscc_miim_driver = { .probe = mscc_miim_probe, - .remove = mscc_miim_remove, + .remove_new = mscc_miim_remove, .driver = { .name = "mscc-miim", .of_match_table = mscc_miim_match, diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c index 956d54846b62..a750bd4c77a0 100644 --- a/drivers/net/mdio/mdio-mux-bcm-iproc.c +++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c @@ -287,15 +287,13 @@ out_clk: return rc; } -static int mdio_mux_iproc_remove(struct platform_device *pdev) +static void mdio_mux_iproc_remove(struct platform_device *pdev) { struct iproc_mdiomux_desc *md = platform_get_drvdata(pdev); mdio_mux_uninit(md->mux_handle); mdiobus_unregister(md->mii_bus); clk_disable_unprepare(md->core_clk); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -342,7 +340,7 @@ static struct platform_driver mdiomux_iproc_driver = { .pm = &mdio_mux_iproc_pm_ops, }, .probe = mdio_mux_iproc_probe, - .remove = mdio_mux_iproc_remove, + .remove_new = mdio_mux_iproc_remove, }; module_platform_driver(mdiomux_iproc_driver); diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c index 8b444a8eb6b5..1b77e0e3e6e1 100644 --- a/drivers/net/mdio/mdio-mux-bcm6368.c +++ b/drivers/net/mdio/mdio-mux-bcm6368.c @@ -153,14 +153,12 @@ out_register: return rc; } -static int bcm6368_mdiomux_remove(struct platform_device *pdev) +static void bcm6368_mdiomux_remove(struct platform_device *pdev) { struct bcm6368_mdiomux_desc *md = platform_get_drvdata(pdev); mdio_mux_uninit(md->mux_handle); mdiobus_unregister(md->mii_bus); - - return 0; } static const struct of_device_id bcm6368_mdiomux_ids[] = { @@ -175,7 +173,7 @@ static struct platform_driver bcm6368_mdiomux_driver = { .of_match_table = bcm6368_mdiomux_ids, }, .probe = bcm6368_mdiomux_probe, - .remove = bcm6368_mdiomux_remove, + .remove_new = bcm6368_mdiomux_remove, }; module_platform_driver(bcm6368_mdiomux_driver); diff --git a/drivers/net/mdio/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c index 3c7f16f06b45..38fb031f8979 100644 --- a/drivers/net/mdio/mdio-mux-gpio.c +++ b/drivers/net/mdio/mdio-mux-gpio.c @@ -62,11 +62,10 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev) return 0; } -static int mdio_mux_gpio_remove(struct platform_device *pdev) +static void mdio_mux_gpio_remove(struct platform_device *pdev) { struct mdio_mux_gpio_state *s = dev_get_platdata(&pdev->dev); mdio_mux_uninit(s->mux_handle); - return 0; } static const struct of_device_id mdio_mux_gpio_match[] = { @@ -87,7 +86,7 @@ static struct platform_driver mdio_mux_gpio_driver = { .of_match_table = mdio_mux_gpio_match, }, .probe = mdio_mux_gpio_probe, - .remove = mdio_mux_gpio_remove, + .remove_new = mdio_mux_gpio_remove, }; module_platform_driver(mdio_mux_gpio_driver); diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c index 910e5cf74e89..754b0f2cf15b 100644 --- a/drivers/net/mdio/mdio-mux-meson-g12a.c +++ b/drivers/net/mdio/mdio-mux-meson-g12a.c @@ -336,7 +336,7 @@ static int g12a_mdio_mux_probe(struct platform_device *pdev) return ret; } -static int g12a_mdio_mux_remove(struct platform_device *pdev) +static void g12a_mdio_mux_remove(struct platform_device *pdev) { struct g12a_mdio_mux *priv = platform_get_drvdata(pdev); @@ -344,13 +344,11 @@ static int g12a_mdio_mux_remove(struct platform_device *pdev) if (__clk_is_enabled(priv->pll)) clk_disable_unprepare(priv->pll); - - return 0; } static struct platform_driver g12a_mdio_mux_driver = { .probe = g12a_mdio_mux_probe, - .remove = g12a_mdio_mux_remove, + .remove_new = g12a_mdio_mux_remove, .driver = { .name = "g12a-mdio_mux", .of_match_table = g12a_mdio_mux_match, diff --git a/drivers/net/mdio/mdio-mux-meson-gxl.c b/drivers/net/mdio/mdio-mux-meson-gxl.c index 76188575ca1f..89554021b5cc 100644 --- a/drivers/net/mdio/mdio-mux-meson-gxl.c +++ b/drivers/net/mdio/mdio-mux-meson-gxl.c @@ -140,18 +140,16 @@ static int gxl_mdio_mux_probe(struct platform_device *pdev) return ret; } -static int gxl_mdio_mux_remove(struct platform_device *pdev) +static void gxl_mdio_mux_remove(struct platform_device *pdev) { struct gxl_mdio_mux *priv = platform_get_drvdata(pdev); mdio_mux_uninit(priv->mux_handle); - - return 0; } static struct platform_driver gxl_mdio_mux_driver = { .probe = gxl_mdio_mux_probe, - .remove = gxl_mdio_mux_remove, + .remove_new = gxl_mdio_mux_remove, .driver = { .name = "gxl-mdio-mux", .of_match_table = gxl_mdio_mux_match, diff --git a/drivers/net/mdio/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c index 09af150ed774..de08419d0c98 100644 --- a/drivers/net/mdio/mdio-mux-mmioreg.c +++ b/drivers/net/mdio/mdio-mux-mmioreg.c @@ -169,13 +169,11 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) return 0; } -static int mdio_mux_mmioreg_remove(struct platform_device *pdev) +static void mdio_mux_mmioreg_remove(struct platform_device *pdev) { struct mdio_mux_mmioreg_state *s = dev_get_platdata(&pdev->dev); mdio_mux_uninit(s->mux_handle); - - return 0; } static const struct of_device_id mdio_mux_mmioreg_match[] = { @@ -192,7 +190,7 @@ static struct platform_driver mdio_mux_mmioreg_driver = { .of_match_table = mdio_mux_mmioreg_match, }, .probe = mdio_mux_mmioreg_probe, - .remove = mdio_mux_mmioreg_remove, + .remove_new = mdio_mux_mmioreg_remove, }; module_platform_driver(mdio_mux_mmioreg_driver); diff --git a/drivers/net/mdio/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c index bfa5af577b0a..569b13383191 100644 --- a/drivers/net/mdio/mdio-mux-multiplexer.c +++ b/drivers/net/mdio/mdio-mux-multiplexer.c @@ -85,7 +85,7 @@ static int mdio_mux_multiplexer_probe(struct platform_device *pdev) return ret; } -static int mdio_mux_multiplexer_remove(struct platform_device *pdev) +static void mdio_mux_multiplexer_remove(struct platform_device *pdev) { struct mdio_mux_multiplexer_state *s = platform_get_drvdata(pdev); @@ -93,8 +93,6 @@ static int mdio_mux_multiplexer_remove(struct platform_device *pdev) if (s->do_deselect) mux_control_deselect(s->muxc); - - return 0; } static const struct of_device_id mdio_mux_multiplexer_match[] = { @@ -109,7 +107,7 @@ static struct platform_driver mdio_mux_multiplexer_driver = { .of_match_table = mdio_mux_multiplexer_match, }, .probe = mdio_mux_multiplexer_probe, - .remove = mdio_mux_multiplexer_remove, + .remove_new = mdio_mux_multiplexer_remove, }; module_platform_driver(mdio_mux_multiplexer_driver); diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c index 7c65c547d377..037a38cfed56 100644 --- a/drivers/net/mdio/mdio-octeon.c +++ b/drivers/net/mdio/mdio-octeon.c @@ -78,7 +78,7 @@ fail_register: return err; } -static int octeon_mdiobus_remove(struct platform_device *pdev) +static void octeon_mdiobus_remove(struct platform_device *pdev) { struct cavium_mdiobus *bus; union cvmx_smix_en smi_en; @@ -88,7 +88,6 @@ static int octeon_mdiobus_remove(struct platform_device *pdev) mdiobus_unregister(bus->mii_bus); smi_en.u64 = 0; oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN); - return 0; } static const struct of_device_id octeon_mdiobus_match[] = { @@ -105,7 +104,7 @@ static struct platform_driver octeon_mdiobus_driver = { .of_match_table = octeon_mdiobus_match, }, .probe = octeon_mdiobus_probe, - .remove = octeon_mdiobus_remove, + .remove_new = octeon_mdiobus_remove, }; module_platform_driver(octeon_mdiobus_driver); diff --git a/drivers/net/mdio/mdio-sun4i.c b/drivers/net/mdio/mdio-sun4i.c index f798de3276dc..4511bcc73b36 100644 --- a/drivers/net/mdio/mdio-sun4i.c +++ b/drivers/net/mdio/mdio-sun4i.c @@ -142,7 +142,7 @@ err_out_free_mdiobus: return ret; } -static int sun4i_mdio_remove(struct platform_device *pdev) +static void sun4i_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); struct sun4i_mdio_data *data = bus->priv; @@ -151,8 +151,6 @@ static int sun4i_mdio_remove(struct platform_device *pdev) if (data->regulator) regulator_disable(data->regulator); mdiobus_free(bus); - - return 0; } static const struct of_device_id sun4i_mdio_dt_ids[] = { @@ -166,7 +164,7 @@ MODULE_DEVICE_TABLE(of, sun4i_mdio_dt_ids); static struct platform_driver sun4i_mdio_driver = { .probe = sun4i_mdio_probe, - .remove = sun4i_mdio_remove, + .remove_new = sun4i_mdio_remove, .driver = { .name = "sun4i-mdio", .of_match_table = sun4i_mdio_dt_ids, diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c index 1190a793555a..7909d7caf45c 100644 --- a/drivers/net/mdio/mdio-xgene.c +++ b/drivers/net/mdio/mdio-xgene.c @@ -432,7 +432,7 @@ out_clk: return ret; } -static int xgene_mdio_remove(struct platform_device *pdev) +static void xgene_mdio_remove(struct platform_device *pdev) { struct xgene_mdio_pdata *pdata = platform_get_drvdata(pdev); struct mii_bus *mdio_bus = pdata->mdio_bus; @@ -443,8 +443,6 @@ static int xgene_mdio_remove(struct platform_device *pdev) if (dev->of_node) clk_disable_unprepare(pdata->clk); - - return 0; } static struct platform_driver xgene_mdio_driver = { @@ -454,7 +452,7 @@ static struct platform_driver xgene_mdio_driver = { .acpi_match_table = ACPI_PTR(xgene_mdio_acpi_match), }, .probe = xgene_mdio_probe, - .remove = xgene_mdio_remove, + .remove_new = xgene_mdio_remove, }; module_platform_driver(xgene_mdio_driver); diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c index 0f1e617a26c9..eb74a8cf8df1 100644 --- a/drivers/net/phy/ax88796b.c +++ b/drivers/net/phy/ax88796b.c @@ -90,7 +90,7 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev) */ if (phydev->state == PHY_NOLINK) { phy_init_hw(phydev); - phy_start_aneg(phydev); + _phy_start_aneg(phydev); } } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index df54c137c5f5..a5fa077650e8 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -981,7 +981,7 @@ static int phy_check_link_status(struct phy_device *phydev) * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -static int _phy_start_aneg(struct phy_device *phydev) +int _phy_start_aneg(struct phy_device *phydev) { int err; @@ -1002,6 +1002,7 @@ static int _phy_start_aneg(struct phy_device *phydev) return err; } +EXPORT_SYMBOL(_phy_start_aneg); /** * phy_start_aneg - start auto-negotiation for this PHY device @@ -1231,9 +1232,7 @@ static void phy_error_precise(struct phy_device *phydev, const void *func, int err) { WARN(1, "%pS: returned: %d\n", func, err); - mutex_lock(&phydev->lock); phy_process_error(phydev); - mutex_unlock(&phydev->lock); } /** @@ -1355,6 +1354,113 @@ void phy_free_interrupt(struct phy_device *phydev) } EXPORT_SYMBOL(phy_free_interrupt); +enum phy_state_work { + PHY_STATE_WORK_NONE, + PHY_STATE_WORK_ANEG, + PHY_STATE_WORK_SUSPEND, +}; + +static enum phy_state_work _phy_state_machine(struct phy_device *phydev) +{ + enum phy_state_work state_work = PHY_STATE_WORK_NONE; + struct net_device *dev = phydev->attached_dev; + enum phy_state old_state = phydev->state; + const void *func = NULL; + bool finished = false; + int err = 0; + + switch (phydev->state) { + case PHY_DOWN: + case PHY_READY: + break; + case PHY_UP: + state_work = PHY_STATE_WORK_ANEG; + break; + case PHY_NOLINK: + case PHY_RUNNING: + err = phy_check_link_status(phydev); + func = &phy_check_link_status; + break; + case PHY_CABLETEST: + err = phydev->drv->cable_test_get_status(phydev, &finished); + if (err) { + phy_abort_cable_test(phydev); + netif_testing_off(dev); + state_work = PHY_STATE_WORK_ANEG; + phydev->state = PHY_UP; + break; + } + + if (finished) { + ethnl_cable_test_finished(phydev); + netif_testing_off(dev); + state_work = PHY_STATE_WORK_ANEG; + phydev->state = PHY_UP; + } + break; + case PHY_HALTED: + case PHY_ERROR: + if (phydev->link) { + phydev->link = 0; + phy_link_down(phydev); + } + state_work = PHY_STATE_WORK_SUSPEND; + break; + } + + if (state_work == PHY_STATE_WORK_ANEG) { + err = _phy_start_aneg(phydev); + func = &_phy_start_aneg; + } + + if (err == -ENODEV) + return state_work; + + if (err < 0) + phy_error_precise(phydev, func, err); + + phy_process_state_change(phydev, old_state); + + /* Only re-schedule a PHY state machine change if we are polling the + * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving + * between states from phy_mac_interrupt(). + * + * In state PHY_HALTED the PHY gets suspended, so rescheduling the + * state machine would be pointless and possibly error prone when + * called from phy_disconnect() synchronously. + */ + if (phy_polling_mode(phydev) && phy_is_started(phydev)) + phy_queue_state_machine(phydev, PHY_STATE_TIME); + + return state_work; +} + +/* unlocked part of the PHY state machine */ +static void _phy_state_machine_post_work(struct phy_device *phydev, + enum phy_state_work state_work) +{ + if (state_work == PHY_STATE_WORK_SUSPEND) + phy_suspend(phydev); +} + +/** + * phy_state_machine - Handle the state machine + * @work: work_struct that describes the work to be done + */ +void phy_state_machine(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct phy_device *phydev = + container_of(dwork, struct phy_device, state_queue); + enum phy_state_work state_work; + + mutex_lock(&phydev->lock); + state_work = _phy_state_machine(phydev); + mutex_unlock(&phydev->lock); + + _phy_state_machine_post_work(phydev, state_work); +} + /** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct @@ -1362,6 +1468,7 @@ EXPORT_SYMBOL(phy_free_interrupt); void phy_stop(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; + enum phy_state_work state_work; enum phy_state old_state; if (!phy_is_started(phydev) && phydev->state != PHY_DOWN && @@ -1385,9 +1492,10 @@ void phy_stop(struct phy_device *phydev) phydev->state = PHY_HALTED; phy_process_state_change(phydev, old_state); + state_work = _phy_state_machine(phydev); mutex_unlock(&phydev->lock); - phy_state_machine(&phydev->state_queue.work); + _phy_state_machine_post_work(phydev, state_work); phy_stop_machine(phydev); /* Cannot call flush_scheduled_work() here as desired because @@ -1432,97 +1540,6 @@ out: EXPORT_SYMBOL(phy_start); /** - * phy_state_machine - Handle the state machine - * @work: work_struct that describes the work to be done - */ -void phy_state_machine(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct phy_device *phydev = - container_of(dwork, struct phy_device, state_queue); - struct net_device *dev = phydev->attached_dev; - bool needs_aneg = false, do_suspend = false; - enum phy_state old_state; - const void *func = NULL; - bool finished = false; - int err = 0; - - mutex_lock(&phydev->lock); - - old_state = phydev->state; - - switch (phydev->state) { - case PHY_DOWN: - case PHY_READY: - break; - case PHY_UP: - needs_aneg = true; - - break; - case PHY_NOLINK: - case PHY_RUNNING: - err = phy_check_link_status(phydev); - func = &phy_check_link_status; - break; - case PHY_CABLETEST: - err = phydev->drv->cable_test_get_status(phydev, &finished); - if (err) { - phy_abort_cable_test(phydev); - netif_testing_off(dev); - needs_aneg = true; - phydev->state = PHY_UP; - break; - } - - if (finished) { - ethnl_cable_test_finished(phydev); - netif_testing_off(dev); - needs_aneg = true; - phydev->state = PHY_UP; - } - break; - case PHY_HALTED: - case PHY_ERROR: - if (phydev->link) { - phydev->link = 0; - phy_link_down(phydev); - } - do_suspend = true; - break; - } - - mutex_unlock(&phydev->lock); - - if (needs_aneg) { - err = phy_start_aneg(phydev); - func = &phy_start_aneg; - } else if (do_suspend) { - phy_suspend(phydev); - } - - if (err == -ENODEV) - return; - - if (err < 0) - phy_error_precise(phydev, func, err); - - phy_process_state_change(phydev, old_state); - - /* Only re-schedule a PHY state machine change if we are polling the - * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving - * between states from phy_mac_interrupt(). - * - * In state PHY_HALTED the PHY gets suspended, so rescheduling the - * state machine would be pointless and possibly error prone when - * called from phy_disconnect() synchronously. - */ - mutex_lock(&phydev->lock); - if (phy_polling_mode(phydev) && phy_is_started(phydev)) - phy_queue_state_machine(phydev, PHY_STATE_TIME); - mutex_unlock(&phydev->lock); -} - -/** * phy_mac_interrupt - MAC says the link has changed * @phydev: phy_device struct with changed link * diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index fc7ace638ce8..e7d8e03f826f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -591,7 +591,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed) static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size) { - struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc; + struct mtk_wed_bm_desc *desc = wed->rx_buf_ring.desc; struct mt76_txwi_cache *t = NULL; struct mt7915_dev *dev; struct mt76_queue *q; diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 284ab1f56391..87df60916960 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */ +#include <linux/bitmap.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/debugfs.h> @@ -395,7 +396,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt) char buf[0x20]; int id; - idmap = (unsigned long *)get_zeroed_page(GFP_KERNEL); + idmap = bitmap_zalloc(max_ports, GFP_KERNEL); if (!idmap) return -ENOMEM; @@ -414,7 +415,7 @@ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt) /* Allocate unique id */ id = find_first_zero_bit(idmap, max_ports); - free_page((unsigned long)idmap); + bitmap_free(idmap); snprintf(buf, sizeof(buf), fmt, id); /* Name generation */ diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index ed9d97a032f1..5dd5f188e14f 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -188,6 +188,7 @@ config PTP_1588_CLOCK_OCP depends on COMMON_CLK select NET_DEVLINK select CRC16 + select DPLL help This driver adds support for an OpenCompute time card. diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index a7a6947ab4bc..41eaffcae462 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -23,6 +23,7 @@ #include <linux/mtd/mtd.h> #include <linux/nvmem-consumer.h> #include <linux/crc16.h> +#include <linux/dpll.h> #define PCI_VENDOR_ID_FACEBOOK 0x1d9b #define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400 @@ -260,12 +261,21 @@ enum ptp_ocp_sma_mode { SMA_MODE_OUT, }; +static struct dpll_pin_frequency ptp_ocp_sma_freq[] = { + DPLL_PIN_FREQUENCY_1PPS, + DPLL_PIN_FREQUENCY_10MHZ, + DPLL_PIN_FREQUENCY_IRIG_B, + DPLL_PIN_FREQUENCY_DCF77, +}; + struct ptp_ocp_sma_connector { enum ptp_ocp_sma_mode mode; bool fixed_fcn; bool fixed_dir; bool disabled; u8 default_fcn; + struct dpll_pin *dpll_pin; + struct dpll_pin_properties dpll_prop; }; struct ocp_attr_group { @@ -294,6 +304,7 @@ struct ptp_ocp_serial_port { #define OCP_BOARD_ID_LEN 13 #define OCP_SERIAL_LEN 6 +#define OCP_SMA_NUM 4 struct ptp_ocp { struct pci_dev *pdev; @@ -331,7 +342,9 @@ struct ptp_ocp { const struct attribute_group **attr_group; const struct ptp_ocp_eeprom_map *eeprom_map; struct dentry *debug_root; + bool sync; time64_t gnss_lost; + struct delayed_work sync_work; int id; int n_irqs; struct ptp_ocp_serial_port gnss_port; @@ -350,8 +363,9 @@ struct ptp_ocp { u32 ts_window_adjust; u64 fw_cap; struct ptp_ocp_signal signal[4]; - struct ptp_ocp_sma_connector sma[4]; + struct ptp_ocp_sma_connector sma[OCP_SMA_NUM]; const struct ocp_sma_op *sma_op; + struct dpll_device *dpll; }; #define OCP_REQ_TIMESTAMP BIT(0) @@ -835,6 +849,7 @@ static DEFINE_IDR(ptp_ocp_idr); struct ocp_selector { const char *name; int value; + u64 frequency; }; static const struct ocp_selector ptp_ocp_clock[] = { @@ -855,31 +870,31 @@ static const struct ocp_selector ptp_ocp_clock[] = { #define SMA_SELECT_MASK GENMASK(14, 0) static const struct ocp_selector ptp_ocp_sma_in[] = { - { .name = "10Mhz", .value = 0x0000 }, - { .name = "PPS1", .value = 0x0001 }, - { .name = "PPS2", .value = 0x0002 }, - { .name = "TS1", .value = 0x0004 }, - { .name = "TS2", .value = 0x0008 }, - { .name = "IRIG", .value = 0x0010 }, - { .name = "DCF", .value = 0x0020 }, - { .name = "TS3", .value = 0x0040 }, - { .name = "TS4", .value = 0x0080 }, - { .name = "FREQ1", .value = 0x0100 }, - { .name = "FREQ2", .value = 0x0200 }, - { .name = "FREQ3", .value = 0x0400 }, - { .name = "FREQ4", .value = 0x0800 }, - { .name = "None", .value = SMA_DISABLE }, + { .name = "10Mhz", .value = 0x0000, .frequency = 10000000 }, + { .name = "PPS1", .value = 0x0001, .frequency = 1 }, + { .name = "PPS2", .value = 0x0002, .frequency = 1 }, + { .name = "TS1", .value = 0x0004, .frequency = 0 }, + { .name = "TS2", .value = 0x0008, .frequency = 0 }, + { .name = "IRIG", .value = 0x0010, .frequency = 10000 }, + { .name = "DCF", .value = 0x0020, .frequency = 77500 }, + { .name = "TS3", .value = 0x0040, .frequency = 0 }, + { .name = "TS4", .value = 0x0080, .frequency = 0 }, + { .name = "FREQ1", .value = 0x0100, .frequency = 0 }, + { .name = "FREQ2", .value = 0x0200, .frequency = 0 }, + { .name = "FREQ3", .value = 0x0400, .frequency = 0 }, + { .name = "FREQ4", .value = 0x0800, .frequency = 0 }, + { .name = "None", .value = SMA_DISABLE, .frequency = 0 }, { } }; static const struct ocp_selector ptp_ocp_sma_out[] = { - { .name = "10Mhz", .value = 0x0000 }, - { .name = "PHC", .value = 0x0001 }, - { .name = "MAC", .value = 0x0002 }, - { .name = "GNSS1", .value = 0x0004 }, - { .name = "GNSS2", .value = 0x0008 }, - { .name = "IRIG", .value = 0x0010 }, - { .name = "DCF", .value = 0x0020 }, + { .name = "10Mhz", .value = 0x0000, .frequency = 10000000 }, + { .name = "PHC", .value = 0x0001, .frequency = 1 }, + { .name = "MAC", .value = 0x0002, .frequency = 1 }, + { .name = "GNSS1", .value = 0x0004, .frequency = 1 }, + { .name = "GNSS2", .value = 0x0008, .frequency = 1 }, + { .name = "IRIG", .value = 0x0010, .frequency = 10000 }, + { .name = "DCF", .value = 0x0020, .frequency = 77000 }, { .name = "GEN1", .value = 0x0040 }, { .name = "GEN2", .value = 0x0080 }, { .name = "GEN3", .value = 0x0100 }, @@ -890,15 +905,15 @@ static const struct ocp_selector ptp_ocp_sma_out[] = { }; static const struct ocp_selector ptp_ocp_art_sma_in[] = { - { .name = "PPS1", .value = 0x0001 }, - { .name = "10Mhz", .value = 0x0008 }, + { .name = "PPS1", .value = 0x0001, .frequency = 1 }, + { .name = "10Mhz", .value = 0x0008, .frequency = 1000000 }, { } }; static const struct ocp_selector ptp_ocp_art_sma_out[] = { - { .name = "PHC", .value = 0x0002 }, - { .name = "GNSS", .value = 0x0004 }, - { .name = "10Mhz", .value = 0x0010 }, + { .name = "PHC", .value = 0x0002, .frequency = 1 }, + { .name = "GNSS", .value = 0x0004, .frequency = 1 }, + { .name = "10Mhz", .value = 0x0010, .frequency = 10000000 }, { } }; @@ -1351,7 +1366,6 @@ static int ptp_ocp_init_clock(struct ptp_ocp *bp) { struct timespec64 ts; - bool sync; u32 ctrl; ctrl = OCP_CTRL_ENABLE; @@ -1375,8 +1389,8 @@ ptp_ocp_init_clock(struct ptp_ocp *bp) ptp_ocp_estimate_pci_timing(bp); - sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC; - if (!sync) { + bp->sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC; + if (!bp->sync) { ktime_get_clocktai_ts64(&ts); ptp_ocp_settime(&bp->ptp_info, &ts); } @@ -2289,22 +2303,35 @@ ptp_ocp_sma_fb_set_inputs(struct ptp_ocp *bp, int sma_nr, u32 val) static void ptp_ocp_sma_fb_init(struct ptp_ocp *bp) { + struct dpll_pin_properties prop = { + .board_label = NULL, + .type = DPLL_PIN_TYPE_EXT, + .capabilities = DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE, + .freq_supported_num = ARRAY_SIZE(ptp_ocp_sma_freq), + .freq_supported = ptp_ocp_sma_freq, + + }; u32 reg; int i; /* defaults */ + for (i = 0; i < OCP_SMA_NUM; i++) { + bp->sma[i].default_fcn = i & 1; + bp->sma[i].dpll_prop = prop; + bp->sma[i].dpll_prop.board_label = + bp->ptp_info.pin_config[i].name; + } bp->sma[0].mode = SMA_MODE_IN; bp->sma[1].mode = SMA_MODE_IN; bp->sma[2].mode = SMA_MODE_OUT; bp->sma[3].mode = SMA_MODE_OUT; - for (i = 0; i < 4; i++) - bp->sma[i].default_fcn = i & 1; - /* If no SMA1 map, the pin functions and directions are fixed. */ if (!bp->sma_map1) { - for (i = 0; i < 4; i++) { + for (i = 0; i < OCP_SMA_NUM; i++) { bp->sma[i].fixed_fcn = true; bp->sma[i].fixed_dir = true; + bp->sma[1].dpll_prop.capabilities &= + ~DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE; } return; } @@ -2314,7 +2341,7 @@ ptp_ocp_sma_fb_init(struct ptp_ocp *bp) */ reg = ioread32(&bp->sma_map2->gpio2); if (reg == 0xffffffff) { - for (i = 0; i < 4; i++) + for (i = 0; i < OCP_SMA_NUM; i++) bp->sma[i].fixed_dir = true; } else { reg = ioread32(&bp->sma_map1->gpio1); @@ -2336,7 +2363,7 @@ static const struct ocp_sma_op ocp_fb_sma_op = { }; static int -ptp_ocp_fb_set_pins(struct ptp_ocp *bp) +ptp_ocp_set_pins(struct ptp_ocp *bp) { struct ptp_pin_desc *config; int i; @@ -2403,16 +2430,16 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) ptp_ocp_tod_init(bp); ptp_ocp_nmea_out_init(bp); - ptp_ocp_sma_init(bp); ptp_ocp_signal_init(bp); err = ptp_ocp_attr_group_add(bp, fb_timecard_groups); if (err) return err; - err = ptp_ocp_fb_set_pins(bp); + err = ptp_ocp_set_pins(bp); if (err) return err; + ptp_ocp_sma_init(bp); return ptp_ocp_init_clock(bp); } @@ -2452,6 +2479,14 @@ ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data) static void ptp_ocp_art_sma_init(struct ptp_ocp *bp) { + struct dpll_pin_properties prop = { + .board_label = NULL, + .type = DPLL_PIN_TYPE_EXT, + .capabilities = 0, + .freq_supported_num = ARRAY_SIZE(ptp_ocp_sma_freq), + .freq_supported = ptp_ocp_sma_freq, + + }; u32 reg; int i; @@ -2466,16 +2501,16 @@ ptp_ocp_art_sma_init(struct ptp_ocp *bp) bp->sma[2].default_fcn = 0x10; /* OUT: 10Mhz */ bp->sma[3].default_fcn = 0x02; /* OUT: PHC */ - /* If no SMA map, the pin functions and directions are fixed. */ - if (!bp->art_sma) { - for (i = 0; i < 4; i++) { + for (i = 0; i < OCP_SMA_NUM; i++) { + /* If no SMA map, the pin functions and directions are fixed. */ + bp->sma[i].dpll_prop = prop; + bp->sma[i].dpll_prop.board_label = + bp->ptp_info.pin_config[i].name; + if (!bp->art_sma) { bp->sma[i].fixed_fcn = true; bp->sma[i].fixed_dir = true; + continue; } - return; - } - - for (i = 0; i < 4; i++) { reg = ioread32(&bp->art_sma->map[i].gpio); switch (reg & 0xff) { @@ -2486,9 +2521,13 @@ ptp_ocp_art_sma_init(struct ptp_ocp *bp) case 1: case 8: bp->sma[i].mode = SMA_MODE_IN; + bp->sma[i].dpll_prop.capabilities = + DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE; break; default: bp->sma[i].mode = SMA_MODE_OUT; + bp->sma[i].dpll_prop.capabilities = + DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE; break; } } @@ -2555,6 +2594,9 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r) /* Enable MAC serial port during initialisation */ iowrite32(1, &bp->board_config->mro50_serial_activate); + err = ptp_ocp_set_pins(bp); + if (err) + return err; ptp_ocp_sma_init(bp); err = ptp_ocp_attr_group_add(bp, art_timecard_groups); @@ -2696,16 +2738,9 @@ sma4_show(struct device *dev, struct device_attribute *attr, char *buf) } static int -ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr) +ptp_ocp_sma_store_val(struct ptp_ocp *bp, int val, enum ptp_ocp_sma_mode mode, int sma_nr) { struct ptp_ocp_sma_connector *sma = &bp->sma[sma_nr - 1]; - enum ptp_ocp_sma_mode mode; - int val; - - mode = sma->mode; - val = sma_parse_inputs(bp->sma_op->tbl, buf, &mode); - if (val < 0) - return val; if (sma->fixed_dir && (mode != sma->mode || val & SMA_DISABLE)) return -EOPNOTSUPP; @@ -2740,6 +2775,20 @@ ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr) return val; } +static int +ptp_ocp_sma_store(struct ptp_ocp *bp, const char *buf, int sma_nr) +{ + struct ptp_ocp_sma_connector *sma = &bp->sma[sma_nr - 1]; + enum ptp_ocp_sma_mode mode; + int val; + + mode = sma->mode; + val = sma_parse_inputs(bp->sma_op->tbl, buf, &mode); + if (val < 0) + return val; + return ptp_ocp_sma_store_val(bp, val, mode, sma_nr); +} + static ssize_t sma1_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -3834,9 +3883,8 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) strcpy(buf, "unknown"); break; } - val = ioread32(&bp->reg->status); seq_printf(s, "%7s: %s, state: %s\n", "PHC src", buf, - val & OCP_STATUS_IN_SYNC ? "sync" : "unsynced"); + bp->sync ? "sync" : "unsynced"); if (!ptp_ocp_gettimex(&bp->ptp_info, &ts, &sts)) { struct timespec64 sys_ts; @@ -4067,7 +4115,6 @@ ptp_ocp_phc_info(struct ptp_ocp *bp) { struct timespec64 ts; u32 version, select; - bool sync; version = ioread32(&bp->reg->version); select = ioread32(&bp->reg->select); @@ -4076,11 +4123,10 @@ ptp_ocp_phc_info(struct ptp_ocp *bp) ptp_ocp_select_name_from_val(ptp_ocp_clock, select >> 16), ptp_clock_index(bp->ptp)); - sync = ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC; if (!ptp_ocp_gettimex(&bp->ptp_info, &ts, NULL)) dev_info(&bp->pdev->dev, "Time: %lld.%ld, %s\n", ts.tv_sec, ts.tv_nsec, - sync ? "in-sync" : "UNSYNCED"); + bp->sync ? "in-sync" : "UNSYNCED"); } static void @@ -4177,12 +4223,168 @@ ptp_ocp_detach(struct ptp_ocp *bp) device_unregister(&bp->dev); } +static int ptp_ocp_dpll_lock_status_get(const struct dpll_device *dpll, + void *priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = priv; + + *status = bp->sync ? DPLL_LOCK_STATUS_LOCKED : DPLL_LOCK_STATUS_UNLOCKED; + + return 0; +} + +static int ptp_ocp_dpll_state_get(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = priv; + int idx; + + if (bp->pps_select) { + idx = ioread32(&bp->pps_select->gpio1); + *state = (&bp->sma[idx] == pin_priv) ? DPLL_PIN_STATE_CONNECTED : + DPLL_PIN_STATE_SELECTABLE; + return 0; + } + NL_SET_ERR_MSG(extack, "pin selection is not supported on current HW"); + return -EINVAL; +} + +static int ptp_ocp_dpll_mode_get(const struct dpll_device *dpll, void *priv, + u32 *mode, struct netlink_ext_ack *extack) +{ + *mode = DPLL_MODE_AUTOMATIC; + return 0; +} + +static bool ptp_ocp_dpll_mode_supported(const struct dpll_device *dpll, + void *priv, const enum dpll_mode mode, + struct netlink_ext_ack *extack) +{ + return mode == DPLL_MODE_AUTOMATIC; +} + +static int ptp_ocp_dpll_direction_get(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp_sma_connector *sma = pin_priv; + + *direction = sma->mode == SMA_MODE_IN ? + DPLL_PIN_DIRECTION_INPUT : + DPLL_PIN_DIRECTION_OUTPUT; + return 0; +} + +static int ptp_ocp_dpll_direction_set(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + enum dpll_pin_direction direction, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp_sma_connector *sma = pin_priv; + struct ptp_ocp *bp = dpll_priv; + enum ptp_ocp_sma_mode mode; + int sma_nr = (sma - bp->sma); + + if (sma->fixed_dir) + return -EOPNOTSUPP; + mode = direction == DPLL_PIN_DIRECTION_INPUT ? + SMA_MODE_IN : SMA_MODE_OUT; + return ptp_ocp_sma_store_val(bp, 0, mode, sma_nr); +} + +static int ptp_ocp_dpll_frequency_set(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 frequency, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp_sma_connector *sma = pin_priv; + struct ptp_ocp *bp = dpll_priv; + const struct ocp_selector *tbl; + int sma_nr = (sma - bp->sma); + int i; + + if (sma->fixed_fcn) + return -EOPNOTSUPP; + + tbl = bp->sma_op->tbl[sma->mode]; + for (i = 0; tbl[i].name; i++) + if (tbl[i].frequency == frequency) + return ptp_ocp_sma_store_val(bp, i, sma->mode, sma_nr); + return -EINVAL; +} + +static int ptp_ocp_dpll_frequency_get(const struct dpll_pin *pin, + void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, u64 *frequency, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp_sma_connector *sma = pin_priv; + struct ptp_ocp *bp = dpll_priv; + const struct ocp_selector *tbl; + int sma_nr = (sma - bp->sma); + u32 val; + int i; + + val = bp->sma_op->get(bp, sma_nr); + tbl = bp->sma_op->tbl[sma->mode]; + for (i = 0; tbl[i].name; i++) + if (val == tbl[i].value) { + *frequency = tbl[i].frequency; + return 0; + } + + return -EINVAL; +} + +static const struct dpll_device_ops dpll_ops = { + .lock_status_get = ptp_ocp_dpll_lock_status_get, + .mode_get = ptp_ocp_dpll_mode_get, + .mode_supported = ptp_ocp_dpll_mode_supported, +}; + +static const struct dpll_pin_ops dpll_pins_ops = { + .frequency_get = ptp_ocp_dpll_frequency_get, + .frequency_set = ptp_ocp_dpll_frequency_set, + .direction_get = ptp_ocp_dpll_direction_get, + .direction_set = ptp_ocp_dpll_direction_set, + .state_on_dpll_get = ptp_ocp_dpll_state_get, +}; + +static void +ptp_ocp_sync_work(struct work_struct *work) +{ + struct ptp_ocp *bp; + bool sync; + + bp = container_of(work, struct ptp_ocp, sync_work.work); + sync = !!(ioread32(&bp->reg->status) & OCP_STATUS_IN_SYNC); + + if (bp->sync != sync) + dpll_device_change_ntf(bp->dpll); + + bp->sync = sync; + + queue_delayed_work(system_power_efficient_wq, &bp->sync_work, HZ); +} + static int ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct devlink *devlink; struct ptp_ocp *bp; - int err; + int err, i; + u64 clkid; devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev); if (!devlink) { @@ -4201,6 +4403,8 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto out_disable; + INIT_DELAYED_WORK(&bp->sync_work, ptp_ocp_sync_work); + /* compat mode. * Older FPGA firmware only returns 2 irq's. * allow this - if not all of the IRQ's are returned, skip the @@ -4232,8 +4436,43 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) ptp_ocp_info(bp); devlink_register(devlink); - return 0; + clkid = pci_get_dsn(pdev); + bp->dpll = dpll_device_get(clkid, 0, THIS_MODULE); + if (IS_ERR(bp->dpll)) { + err = PTR_ERR(bp->dpll); + dev_err(&pdev->dev, "dpll_device_alloc failed\n"); + goto out; + } + + err = dpll_device_register(bp->dpll, DPLL_TYPE_PPS, &dpll_ops, bp); + if (err) + goto out; + + for (i = 0; i < OCP_SMA_NUM; i++) { + bp->sma[i].dpll_pin = dpll_pin_get(clkid, i, THIS_MODULE, &bp->sma[i].dpll_prop); + if (IS_ERR(bp->sma[i].dpll_pin)) { + err = PTR_ERR(bp->dpll); + goto out_dpll; + } + + err = dpll_pin_register(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, + &bp->sma[i]); + if (err) { + dpll_pin_put(bp->sma[i].dpll_pin); + goto out_dpll; + } + } + queue_delayed_work(system_power_efficient_wq, &bp->sync_work, HZ); + + return 0; +out_dpll: + while (i) { + --i; + dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, &bp->sma[i]); + dpll_pin_put(bp->sma[i].dpll_pin); + } + dpll_device_put(bp->dpll); out: ptp_ocp_detach(bp); out_disable: @@ -4248,7 +4487,17 @@ ptp_ocp_remove(struct pci_dev *pdev) { struct ptp_ocp *bp = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(bp); + int i; + cancel_delayed_work_sync(&bp->sync_work); + for (i = 0; i < OCP_SMA_NUM; i++) { + if (bp->sma[i].dpll_pin) { + dpll_pin_unregister(bp->dpll, bp->sma[i].dpll_pin, &dpll_pins_ops, bp); + dpll_pin_put(bp->sma[i].dpll_pin); + } + } + dpll_device_unregister(bp->dpll, &dpll_ops, bp); + dpll_device_put(bp->dpll); devlink_unregister(devlink); ptp_ocp_detach(bp); pci_disable_device(pdev); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 817d377a3f36..83711aad855c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -114,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct sk_buff *skb; unsigned out, in; size_t nbytes; + u32 offset; int head; skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); @@ -156,7 +157,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, } iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len); - payload_len = skb->len; + offset = VIRTIO_VSOCK_SKB_CB(skb)->offset; + payload_len = skb->len - offset; hdr = virtio_vsock_hdr(skb); /* If the packet is greater than the space available in the @@ -197,8 +199,10 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - nbytes = copy_to_iter(skb->data, payload_len, &iov_iter); - if (nbytes != payload_len) { + if (skb_copy_datagram_iter(skb, + offset, + &iov_iter, + payload_len)) { kfree_skb(skb); vq_err(vq, "Faulted on copying pkt buf\n"); break; @@ -212,13 +216,13 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, vhost_add_used(vq, head, sizeof(*hdr) + payload_len); added = true; - skb_pull(skb, payload_len); + VIRTIO_VSOCK_SKB_CB(skb)->offset += payload_len; total_len += payload_len; /* If we didn't send all the payload we can requeue the packet * to send it with the next available buffer. */ - if (skb->len > 0) { + if (VIRTIO_VSOCK_SKB_CB(skb)->offset < skb->len) { hdr->flags |= cpu_to_le32(flags_to_restore); /* We are queueing the same skb to handle diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index d0807ad43f93..dd71d3009771 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -240,6 +240,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES BIT(6) /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) +#define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) @@ -295,7 +296,13 @@ VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. - * PF configures requested queue and returns a status code. + * PF configures requested queue and returns a status code. The + * crc_disable flag disables CRC stripping on the VF. Setting + * the crc_disable flag to 1 will disable CRC stripping for each + * queue in the VF where the flag is set. The VIRTCHNL_VF_OFFLOAD_CRC + * offload must have been set prior to sending this info or the PF + * will ignore the request. This flag should be set the same for + * all of the queues for a VF. */ /* Rx queue config info */ @@ -307,7 +314,7 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u8 pad0; + u8 crc_disable; u8 rxdid; u8 pad1[2]; u64 dma_ring_addr; diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index 7b121bd780eb..0985221d5478 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -28,19 +28,24 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_BIND, CGROUP_INET4_CONNECT, CGROUP_INET6_CONNECT, + CGROUP_UNIX_CONNECT, CGROUP_INET4_POST_BIND, CGROUP_INET6_POST_BIND, CGROUP_UDP4_SENDMSG, CGROUP_UDP6_SENDMSG, + CGROUP_UNIX_SENDMSG, CGROUP_SYSCTL, CGROUP_UDP4_RECVMSG, CGROUP_UDP6_RECVMSG, + CGROUP_UNIX_RECVMSG, CGROUP_GETSOCKOPT, CGROUP_SETSOCKOPT, CGROUP_INET4_GETPEERNAME, CGROUP_INET6_GETPEERNAME, + CGROUP_UNIX_GETPEERNAME, CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, + CGROUP_UNIX_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8506690dbb9c..98b8cea904fe 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -48,19 +48,24 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET6_BIND); CGROUP_ATYPE(CGROUP_INET4_CONNECT); CGROUP_ATYPE(CGROUP_INET6_CONNECT); + CGROUP_ATYPE(CGROUP_UNIX_CONNECT); CGROUP_ATYPE(CGROUP_INET4_POST_BIND); CGROUP_ATYPE(CGROUP_INET6_POST_BIND); CGROUP_ATYPE(CGROUP_UDP4_SENDMSG); CGROUP_ATYPE(CGROUP_UDP6_SENDMSG); + CGROUP_ATYPE(CGROUP_UNIX_SENDMSG); CGROUP_ATYPE(CGROUP_SYSCTL); CGROUP_ATYPE(CGROUP_UDP4_RECVMSG); CGROUP_ATYPE(CGROUP_UDP6_RECVMSG); + CGROUP_ATYPE(CGROUP_UNIX_RECVMSG); CGROUP_ATYPE(CGROUP_GETSOCKOPT); CGROUP_ATYPE(CGROUP_SETSOCKOPT); CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETPEERNAME); CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); + CGROUP_ATYPE(CGROUP_UNIX_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; @@ -120,6 +125,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags); @@ -230,22 +236,22 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, NULL); \ __ret; \ }) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - t_ctx, NULL); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, t_ctx, NULL); \ release_sock(sk); \ } \ __ret; \ @@ -256,14 +262,14 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE). */ -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \ +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, bind_flags) \ ({ \ u32 __flags = 0; \ int __ret = 0; \ if (cgroup_bpf_enabled(atype)) { \ lock_sock(sk); \ - __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ - NULL, &__flags); \ + __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, uaddrlen, \ + atype, NULL, &__flags); \ release_sock(sk); \ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \ @@ -276,29 +282,38 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \ (sk)->sk_prot->pre_connect) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET4_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_INET6_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_CONNECT, NULL) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_SENDMSG, t_ctx) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \ - BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_SENDMSG, t_ctx) + +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP4_RECVMSG, NULL) + +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UDP6_RECVMSG, NULL) + +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) \ + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, CGROUP_UNIX_RECVMSG, NULL) /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a * fullsock and its parent fullsock cannot be traced by @@ -477,24 +492,27 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled(atype) (0) -#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, uaddrlen, atype, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, uaddrlen, atype) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, uaddrlen, atype, flags) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, uaddr, uaddrlen, t_ctx) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, uaddr, uaddrlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 49f8b691496c..d3c51a507508 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -55,8 +55,8 @@ struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; -extern struct bpf_mem_alloc bpf_global_ma; -extern bool bpf_global_ma_set; +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; +extern bool bpf_global_ma_set, bpf_global_percpu_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, @@ -180,14 +180,15 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), - BPF_RB_ROOT = (1 << 6), - BPF_RB_NODE = (1 << 7), + BPF_KPTR_PERCPU = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | BPF_RB_NODE | BPF_RB_ROOT, - BPF_REFCOUNT = (1 << 8), + BPF_REFCOUNT = (1 << 9), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU: + return "percpu_kptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_timer); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_TIMER: case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: break; default: WARN_ON_ONCE(1); @@ -1029,6 +1035,11 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) +/* Indicate that current trampoline is in a tail call context. Then, it has to + * cache and restore tail_call_cnt to avoid infinite tail call loop. + */ +#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ @@ -1378,6 +1389,7 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; @@ -1398,6 +1410,8 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool exception_cb; + bool exception_boundary; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1420,6 +1434,7 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; + unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp); #ifdef CONFIG_SECURITY void *security; #endif @@ -2149,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) static inline bool bpf_bypass_spec_v1(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } static inline bool bpf_bypass_spec_v4(void) { - return perfmon_capable(); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2407,9 +2422,11 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg); + struct bpf_reg_state *reg, bool is_ex_cb); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); @@ -2905,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, @@ -3183,4 +3216,9 @@ static inline gfp_t bpf_memcg_flags(gfp_t flags) return flags; } +static inline bool bpf_is_subprog(const struct bpf_prog *prog) +{ + return prog->aux->func_idx != 0; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b6e58dab8e27..94ec766432f5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -300,6 +300,7 @@ struct bpf_func_state { bool in_callback_fn; struct tnum callback_ret_range; bool in_async_callback_fn; + bool in_exception_callback_fn; /* The following fields should be last. See copy_func_state() */ int acquired_refs; @@ -480,6 +481,7 @@ struct bpf_insn_aux_data { bool zext_dst; /* this insn zero extends dst reg */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ + bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ @@ -540,7 +542,9 @@ struct bpf_subprog_info { bool has_tail_call; bool tail_call_reachable; bool has_ld_abs; + bool is_cb; bool is_async_cb; + bool is_exception_cb; }; struct bpf_verifier_env; @@ -587,6 +591,8 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + u32 hidden_subprog_cnt; /* number of hidden subprogs */ + int exception_callback_subprog; bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; @@ -594,10 +600,11 @@ struct bpf_verifier_env { bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool seen_exception; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; - struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 2]; /* max + 2 for the fake and exception subprogs */ union { struct bpf_idmap idmap_scratch; struct bpf_idset idset_scratch; diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index b658961156a0..7a9a40163c0f 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -19,7 +19,7 @@ struct ceph_monmap { struct ceph_fsid fsid; u32 epoch; u32 num_mon; - struct ceph_entity_inst mon_inst[]; + struct ceph_entity_inst mon_inst[] __counted_by(num_mon); }; struct ceph_mon_client; diff --git a/include/linux/dpll.h b/include/linux/dpll.h new file mode 100644 index 000000000000..bbc480cd2932 --- /dev/null +++ b/include/linux/dpll.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates + * Copyright (c) 2023 Intel and affiliates + */ + +#ifndef __DPLL_H__ +#define __DPLL_H__ + +#include <uapi/linux/dpll.h> +#include <linux/device.h> +#include <linux/netlink.h> + +struct dpll_device; +struct dpll_pin; + +struct dpll_device_ops { + int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_mode *mode, struct netlink_ext_ack *extack); + bool (*mode_supported)(const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_mode mode, + struct netlink_ext_ack *extack); + int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, + enum dpll_lock_status *status, + struct netlink_ext_ack *extack); + int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, + s32 *temp, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_ops { + int (*frequency_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u64 frequency, + struct netlink_ext_ack *extack); + int (*frequency_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 *frequency, struct netlink_ext_ack *extack); + int (*direction_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const enum dpll_pin_direction direction, + struct netlink_ext_ack *extack); + int (*direction_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + enum dpll_pin_direction *direction, + struct netlink_ext_ack *extack); + int (*state_on_pin_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, enum dpll_pin_state *state, + struct netlink_ext_ack *extack); + int (*state_on_pin_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_pin *parent_pin, + void *parent_pin_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*state_on_dpll_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, + void *dpll_priv, + const enum dpll_pin_state state, + struct netlink_ext_ack *extack); + int (*prio_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u32 *prio, struct netlink_ext_ack *extack); + int (*prio_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + const u32 prio, struct netlink_ext_ack *extack); +}; + +struct dpll_pin_frequency { + u64 min; + u64 max; +}; + +#define DPLL_PIN_FREQUENCY_RANGE(_min, _max) \ + { \ + .min = _min, \ + .max = _max, \ + } + +#define DPLL_PIN_FREQUENCY(_val) DPLL_PIN_FREQUENCY_RANGE(_val, _val) +#define DPLL_PIN_FREQUENCY_1PPS \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_1_HZ) +#define DPLL_PIN_FREQUENCY_10MHZ \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_MHZ) +#define DPLL_PIN_FREQUENCY_IRIG_B \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_10_KHZ) +#define DPLL_PIN_FREQUENCY_DCF77 \ + DPLL_PIN_FREQUENCY(DPLL_PIN_FREQUENCY_77_5_KHZ) + +struct dpll_pin_properties { + const char *board_label; + const char *panel_label; + const char *package_label; + enum dpll_pin_type type; + unsigned long capabilities; + u32 freq_supported_num; + struct dpll_pin_frequency *freq_supported; +}; + +#if IS_ENABLED(CONFIG_DPLL) +size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); +int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +#else +static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +{ + return 0; +} + +static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +{ + return 0; +} +#endif + +struct dpll_device * +dpll_device_get(u64 clock_id, u32 dev_driver_id, struct module *module); + +void dpll_device_put(struct dpll_device *dpll); + +int dpll_device_register(struct dpll_device *dpll, enum dpll_type type, + const struct dpll_device_ops *ops, void *priv); + +void dpll_device_unregister(struct dpll_device *dpll, + const struct dpll_device_ops *ops, void *priv); + +struct dpll_pin * +dpll_pin_get(u64 clock_id, u32 dev_driver_id, struct module *module, + const struct dpll_pin_properties *prop); + +int dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_put(struct dpll_pin *pin); + +int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, + const struct dpll_pin_ops *ops, void *priv); + +int dpll_device_change_ntf(struct dpll_device *dpll); + +int dpll_pin_change_ntf(struct dpll_pin *pin); + +#endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 761af6b3cf2b..bcd2bc15ff56 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -117,21 +117,25 @@ struct ctl_table_header; /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, DST, IMM) \ +#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU64_IMM(OP, DST, IMM) \ + BPF_ALU64_IMM_OFF(OP, DST, IMM, 0) -#define BPF_ALU32_IMM(OP, DST, IMM) \ +#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ - .off = 0, \ + .off = OFF, \ .imm = IMM }) +#define BPF_ALU32_IMM(OP, DST, IMM) \ + BPF_ALU32_IMM_OFF(OP, DST, IMM, 0) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ @@ -143,6 +147,16 @@ struct ctl_table_header; .off = 0, \ .imm = LEN }) +/* Byte Swap, bswap16/32/64 */ + +#define BPF_BSWAP(DST, LEN) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE), \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = LEN }) + /* Short form of mov, dst_reg = src_reg */ #define BPF_MOV64_REG(DST, SRC) \ @@ -179,6 +193,24 @@ struct ctl_table_header; .off = 0, \ .imm = IMM }) +/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */ + +#define BPF_MOVSX64_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_MOVSX32_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Special form of mov32, used for doing explicit zero extension on dst. */ #define BPF_ZEXT_REG(DST) \ ((struct bpf_insn) { \ @@ -263,6 +295,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */ + +#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ @@ -694,7 +736,7 @@ static inline void bpf_compute_and_save_data_end( cb->data_end = skb->data + skb_headlen(skb); } -/* Restore data saved by bpf_compute_data_pointers(). */ +/* Restore data saved by bpf_compute_and_save_data_end(). */ static inline void bpf_restore_data_end( struct sk_buff *skb, void *saved_data_end) { @@ -912,6 +954,8 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +bool bpf_jit_supports_exceptions(void); +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) @@ -1127,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, @@ -1194,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } +static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +{ + return NULL; +} + static inline const char * bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) @@ -1285,6 +1335,7 @@ struct bpf_sock_addr_kern { */ u64 tmp_reg; void *t_ctx; /* Attach type specific context. */ + u32 uaddrlen; }; struct bpf_sock_ops_kern { diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index af8a771a053c..e400ff757f13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -213,28 +213,9 @@ struct ipv6_pinfo { __be32 flow_label; __u32 frag_size; - /* - * Packed in 16bits. - * Omit one shift by putting the signed field at MSB. - */ -#if defined(__BIG_ENDIAN_BITFIELD) - __s16 hop_limit:9; - __u16 __unused_1:7; -#else - __u16 __unused_1:7; - __s16 hop_limit:9; -#endif + s16 hop_limit; + u8 mcast_hops; -#if defined(__BIG_ENDIAN_BITFIELD) - /* Packed in 16bits. */ - __s16 mcast_hops:9; - __u16 __unused_2:6, - mc_loop:1; -#else - __u16 mc_loop:1, - __unused_2:6; - __s16 mcast_hops:9; -#endif int ucast_oif; int mcast_oif; @@ -262,21 +243,11 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, - repflow:1, - pmtudisc:3, - padding:1, /* 1 bit hole */ - srcprefs:3, /* 001: prefer temporary address + __u8 srcprefs; /* 001: prefer temporary address * 010: prefer public address * 100: prefer care-of address */ - dontfrag:1, - autoflowlabel:1, - autoflowlabel_set:1, - mc_all:1, - recverr_rfc4884:1, - rtalert_isolate:1; + __u8 pmtudisc; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; @@ -293,6 +264,18 @@ struct ipv6_pinfo { struct inet6_cork cork; }; +/* We currently use available bits from inet_sk(sk)->inet_flags, + * this could change in the future. + */ +#define inet6_test_bit(nr, sk) \ + test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_set_bit(nr, sk) \ + set_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_clear_bit(nr, sk) \ + clear_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) +#define inet6_assign_bit(nr, sk, val) \ + assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val) + /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ struct raw6_sock { /* inet_sock has to be the first member of raw6_sock */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 71fa9a40fb5a..72cb693b075b 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -285,8 +285,10 @@ static inline bool kasan_check_byte(const void *address) #if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); +asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} +static inline void kasan_unpoison_task_stack_below(const void *watermark) {} #endif #ifdef CONFIG_KASAN_GENERIC diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d5be378fa8c..8fbe22de16ef 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,7 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_UPLINK_NETDEV, MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3033bbaeac81..92434814c855 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -155,6 +155,8 @@ enum { MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, + MLX5_REG_MSECQ = 0x9155, + MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..dd8421d021cf 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10176,7 +10176,9 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 mirc[0x1]; u8 regs_97_to_96[0x2]; - u8 regs_95_to_64[0x20]; + u8 regs_95_to_87[0x09]; + u8 synce_registers[0x2]; + u8 regs_84_to_64[0x15]; u8 regs_63_to_32[0x20]; @@ -12549,4 +12551,59 @@ struct mlx5_ifc_modify_page_track_obj_in_bits { struct mlx5_ifc_page_track_bits obj_context; }; +struct mlx5_ifc_msecq_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x12]; + u8 network_option[0x2]; + u8 local_ssm_code[0x4]; + u8 local_enhanced_ssm_code[0x8]; + + u8 local_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + +enum { + MLX5_MSEES_FIELD_SELECT_ENABLE = BIT(0), + MLX5_MSEES_FIELD_SELECT_ADMIN_STATUS = BIT(1), + MLX5_MSEES_FIELD_SELECT_ADMIN_FREQ_MEASURE = BIT(2), +}; + +enum mlx5_msees_admin_status { + MLX5_MSEES_ADMIN_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_ADMIN_STATUS_TRACK = 0x1, +}; + +enum mlx5_msees_oper_status { + MLX5_MSEES_OPER_STATUS_FREE_RUNNING = 0x0, + MLX5_MSEES_OPER_STATUS_SELF_TRACK = 0x1, + MLX5_MSEES_OPER_STATUS_OTHER_TRACK = 0x2, + MLX5_MSEES_OPER_STATUS_HOLDOVER = 0x3, + MLX5_MSEES_OPER_STATUS_FAIL_HOLDOVER = 0x4, + MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, +}; + +struct mlx5_ifc_msees_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 lp_msb[0x2]; + u8 reserved_at_14[0xc]; + + u8 field_select[0x20]; + + u8 admin_status[0x4]; + u8 oper_status[0x4]; + u8 ho_acq[0x1]; + u8 reserved_at_49[0xc]; + u8 admin_freq_measure[0x1]; + u8 oper_freq_measure[0x1]; + u8 failure_reason[0x9]; + + u8 frequency_diff[0x20]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0896aaa91dd7..7e520c14eb8c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,6 +79,8 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +/* DPLL specific */ +struct dpll_pin; typedef u32 xdp_features_t; @@ -917,6 +919,7 @@ struct net_device_path { u8 queue; u16 wcid; u8 bss; + u8 amsdu; } mtk_wdma; }; }; @@ -2049,6 +2052,9 @@ enum netdev_ml_priv_type { * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * + * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, + * where the clock is recovered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2405,6 +2411,10 @@ struct net_device { struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; + +#if IS_ENABLED(CONFIG_DPLL) + struct dpll_pin *dpll_pin; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -3940,6 +3950,18 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void netdev_dpll_pin_clear(struct net_device *dev); + +static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_DPLL) + return dev->dpll_pin; +#else + return NULL; +#endif +} + struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h index e838a2b90440..17a87c1a55d7 100644 --- a/include/linux/pds/pds_core_if.h +++ b/include/linux/pds/pds_core_if.h @@ -79,6 +79,7 @@ enum pds_core_status_code { PDS_RC_EVFID = 31, /* VF ID does not exist */ PDS_RC_BAD_FW = 32, /* FW file is invalid or corrupted */ PDS_RC_ECLIENT = 33, /* No such client id */ + PDS_RC_BAD_PCI = 255, /* Broken PCI when reading status */ }; /** diff --git a/include/linux/phy.h b/include/linux/phy.h index 1351b802ffcf..3cc52826f18e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1736,6 +1736,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_config_aneg(struct phy_device *phydev); +int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); int phy_speed_down(struct phy_device *phydev, bool sync); diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index b2b28180dff7..a476648858a6 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -10,6 +10,7 @@ #define MTK_WED_TX_QUEUES 2 #define MTK_WED_RX_QUEUES 2 +#define MTK_WED_RX_PAGE_QUEUES 3 #define WED_WO_STA_REC 0x6 @@ -45,7 +46,7 @@ enum mtk_wed_wo_cmd { MTK_WED_WO_CMD_WED_END }; -struct mtk_rxbm_desc { +struct mtk_wed_bm_desc { __le32 buf0; __le32 token; } __packed __aligned(4); @@ -76,6 +77,11 @@ struct mtk_wed_wo_rx_stats { __le32 rx_drop_cnt; }; +struct mtk_wed_buf { + void *p; + dma_addr_t phy_addr; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -94,17 +100,20 @@ struct mtk_wed_device { struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_rro_ring[MTK_WED_RX_QUEUES]; + struct mtk_wed_ring rx_page_ring[MTK_WED_RX_PAGE_QUEUES]; + struct mtk_wed_ring ind_cmd_ring; struct { int size; - void **pages; + struct mtk_wed_buf *pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; } tx_buf_ring; struct { int size; - struct mtk_rxbm_desc *desc; + struct mtk_wed_bm_desc *desc; dma_addr_t desc_phys; } rx_buf_ring; @@ -114,6 +123,13 @@ struct mtk_wed_device { dma_addr_t fdbk_phys; } rro; + struct { + int size; + struct mtk_wed_buf *pages; + struct mtk_wed_bm_desc *desc; + dma_addr_t desc_phys; + } hw_rro; + /* filled by driver: */ struct { union { @@ -123,6 +139,7 @@ struct mtk_wed_device { enum mtk_wed_bus_tye bus_type; void __iomem *base; u32 phy_base; + u32 id; u32 wpdma_phys; u32 wpdma_int; @@ -131,18 +148,35 @@ struct mtk_wed_device { u32 wpdma_txfree; u32 wpdma_rx_glo; u32 wpdma_rx; + u32 wpdma_rx_rro[MTK_WED_RX_QUEUES]; + u32 wpdma_rx_pg; bool wcid_512; + bool hw_rro; + bool msi; u16 token_start; unsigned int nbuf; unsigned int rx_nbuf; unsigned int rx_npkt; unsigned int rx_size; + unsigned int amsdu_max_len; u8 tx_tbit[MTK_WED_TX_QUEUES]; u8 rx_tbit[MTK_WED_RX_QUEUES]; + u8 rro_rx_tbit[MTK_WED_RX_QUEUES]; + u8 rx_pg_tbit[MTK_WED_RX_PAGE_QUEUES]; u8 txfree_tbit; + u8 amsdu_max_subframes; + + struct { + u8 se_group_nums; + u16 win_size; + u16 particular_sid; + u32 ack_sn_addr; + dma_addr_t particular_se_phys; + dma_addr_t addr_elem_phys[1024]; + } ind_cmd; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); @@ -182,6 +216,14 @@ struct mtk_wed_ops { void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask); int (*setup_tc)(struct mtk_wed_device *wed, struct net_device *dev, enum tc_setup_type type, void *type_data); + void (*start_hw_rro)(struct mtk_wed_device *dev, u32 irq_mask, + bool reset); + void (*rro_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + void (*msdu_pg_rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs); + int (*ind_rx_ring_setup)(struct mtk_wed_device *dev, + void __iomem *regs); }; extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops; @@ -206,16 +248,27 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } -static inline bool -mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +static inline bool mtk_wed_get_rx_capa(struct mtk_wed_device *dev) { #ifdef CONFIG_NET_MEDIATEK_SOC_WED + if (dev->version == 3) + return dev->wlan.hw_rro; + return dev->version != 1; #else return false; #endif } +static inline bool mtk_wed_is_amsdu_supported(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version == 3; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) @@ -242,6 +295,15 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) #define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) \ (_dev)->ops->setup_tc(_dev, _netdev, _type, _type_data) +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) \ + (_dev)->ops->start_hw_rro(_dev, _mask, _reset) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->rro_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) \ + (_dev)->ops->msdu_pg_rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) \ + (_dev)->ops->ind_rx_ring_setup(_dev, _regs) + #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -261,6 +323,10 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_stop(_dev) do {} while (0) #define mtk_wed_device_dma_reset(_dev) do {} while (0) #define mtk_wed_device_setup_tc(_dev, _netdev, _type, _type_data) -EOPNOTSUPP +#define mtk_wed_device_start_hw_rro(_dev, _mask, _reset) do {} while (0) +#define mtk_wed_device_rro_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_msdu_pg_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_ind_rx_ring_setup(_dev, _regs) -ENODEV #endif #endif diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index ce89cc3e4913..c0079a7574ae 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -139,6 +139,7 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u32 weight; + bool coe_unsupported; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3c5efeeb024f..e15452df9804 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -377,6 +377,14 @@ struct tcp_sock { * Total data bytes retransmitted */ u32 total_retrans; /* Total retransmits for entire connection */ + u32 rto_stamp; /* Start time (ms) of last CA_Loss recovery */ + u16 total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + u16 total_rto_recoveries; /* Total number of RTO recoveries, + * including any unfinished recovery. + */ + u32 total_rto_time; /* ms spent in (completed) RTO recoveries. */ u32 urg_seq; /* Seq of received urgent pointer */ unsigned int keepalive_time; /* time before keep alive takes place */ @@ -463,15 +471,17 @@ enum tsq_enum { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + TCP_ACK_DEFERRED, /* TX pure ack is deferred */ }; enum tsq_flags { - TSQF_THROTTLED = (1UL << TSQ_THROTTLED), - TSQF_QUEUED = (1UL << TSQ_QUEUED), - TCPF_TSQ_DEFERRED = (1UL << TCP_TSQ_DEFERRED), - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), + TSQF_THROTTLED = BIT(TSQ_THROTTLED), + TSQF_QUEUED = BIT(TSQ_QUEUED), + TCPF_TSQ_DEFERRED = BIT(TCP_TSQ_DEFERRED), + TCPF_WRITE_TIMER_DEFERRED = BIT(TCP_WRITE_TIMER_DEFERRED), + TCPF_DELACK_TIMER_DEFERRED = BIT(TCP_DELACK_TIMER_DEFERRED), + TCPF_MTU_REDUCED_DEFERRED = BIT(TCP_MTU_REDUCED_DEFERRED), + TCPF_ACK_DEFERRED = BIT(TCP_ACK_DEFERRED), }; #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..5eb88a66eb68 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/include/linux/udp.h b/include/linux/udp.h index 43c1fb2d2c21..d04188714dca 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) return (num + net_hash_mix(net)) & mask; } +enum { + UDP_FLAGS_CORK, /* Cork is required */ + UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */ + UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */ + UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */ + UDP_FLAGS_ACCEPT_FRAGLIST, + UDP_FLAGS_ACCEPT_L4, + UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */ + UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */ + UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */ +}; + struct udp_sock { /* inet_sock has to be the first member */ struct inet_sock inet; #define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0] #define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] #define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node + + unsigned long udp_flags; + int pending; /* Any pending frames ? */ - unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ - unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1, /* This socket enabled encap - * processing; UDP tunnels and - * different encapsulation layer set - * this - */ - gro_enabled:1, /* Request GRO aggregation */ - accept_udp_l4:1, - accept_udp_fraglist:1; + /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -62,12 +67,6 @@ struct udp_sock { */ __u16 pcslen; __u16 pcrlen; -/* indicator bits used by pcflag: */ -#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ -#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ -#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ - __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ - __u8 unused[3]; /* * For encapsulation sockets. */ @@ -95,28 +94,39 @@ struct udp_sock { int forward_threshold; }; +#define udp_test_bit(nr, sk) \ + test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_set_bit(nr, sk) \ + set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_test_and_set_bit(nr, sk) \ + test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_clear_bit(nr, sk) \ + clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags) +#define udp_assign_bit(nr, sk, val) \ + assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) + #define UDP_MAX_SEGMENTS (1 << 6UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_tx = val; + udp_assign_bit(NO_CHECK6_TX, sk, val); } static inline void udp_set_no_check6_rx(struct sock *sk, bool val) { - udp_sk(sk)->no_check6_rx = val; + udp_assign_bit(NO_CHECK6_RX, sk, val); } -static inline bool udp_get_no_check6_tx(struct sock *sk) +static inline bool udp_get_no_check6_tx(const struct sock *sk) { - return udp_sk(sk)->no_check6_tx; + return udp_test_bit(NO_CHECK6_TX, sk); } -static inline bool udp_get_no_check6_rx(struct sock *sk) +static inline bool udp_get_no_check6_rx(const struct sock *sk) { - return udp_sk(sk)->no_check6_rx; + return udp_test_bit(NO_CHECK6_RX, sk); } static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, @@ -135,10 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) if (!skb_is_gso(skb)) return false; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && + !udp_test_bit(ACCEPT_L4, sk)) return true; - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist) + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && + !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; return false; @@ -146,8 +158,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) static inline void udp_allow_gso(struct sock *sk) { - udp_sk(sk)->accept_udp_l4 = 1; - udp_sk(sk)->accept_udp_fraglist = 1; + udp_set_bit(ACCEPT_L4, sk); + udp_set_bit(ACCEPT_FRAGLIST, sk); } #define udp_portaddr_for_each_entry(__sk, list) \ diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c58453699ee9..ebb3ce63d64d 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -12,6 +12,7 @@ struct virtio_vsock_skb_cb { bool reply; bool tap_delivered; + u32 offset; }; #define VIRTIO_VSOCK_SKB_CB(skb) ((struct virtio_vsock_skb_cb *)((skb)->cb)) @@ -159,6 +160,15 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct sk_buff *skb); + + /* Used in MSG_ZEROCOPY mode. Checks, that provided data + * (number of buffers) could be transmitted with zerocopy + * mode. If this callback is not implemented for the current + * transport - this means that this transport doesn't need + * extra checks and can perform zerocopy transmission by + * default. + */ + bool (*can_msgzerocopy)(int bufs_num); }; ssize_t diff --git a/include/net/devlink.h b/include/net/devlink.h index 29fd1b4ee654..fad8e36e3d98 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -150,6 +150,7 @@ struct devlink_port { struct devlink_rate *devlink_rate; struct devlink_linecard *linecard; + u32 rel_index; }; struct devlink_port_new_attrs { @@ -1697,6 +1698,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); +int devl_port_fn_devlink_set(struct devlink_port *devlink_port, + struct devlink *fn_devlink); struct devlink_rate * devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, struct devlink_rate *parent); @@ -1717,8 +1720,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); void devlink_linecard_activate(struct devlink_linecard *linecard); void devlink_linecard_deactivate(struct devlink_linecard *linecard); -void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, - struct devlink *nested_devlink); +int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink); int devl_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -1918,6 +1921,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, void devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter); +int devl_nested_devlink_set(struct devlink *devlink, + struct devlink *nested_devlink); bool devlink_is_reload_failed(const struct devlink *devlink); void devlink_remote_reload_actions_performed(struct devlink *devlink, enum devlink_reload_limit limit, diff --git a/include/net/dst.h b/include/net/dst.h index 78884429deed..f8b8599a0600 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -392,10 +392,10 @@ static inline int dst_discard(struct sk_buff *skb) { return dst_discard_out(&init_net, skb->sk, skb); } -void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags); struct dst_entry *dst_destroy(struct dst_entry *dst); void dst_dev_put(struct dst_entry *dst); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 2de0e4d4a027..98e11958cdff 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -268,6 +268,16 @@ enum { INET_FLAGS_NODEFRAG = 17, INET_FLAGS_BIND_ADDRESS_NO_PORT = 18, INET_FLAGS_DEFER_CONNECT = 19, + INET_FLAGS_MC6_LOOP = 20, + INET_FLAGS_RECVERR6_RFC4884 = 21, + INET_FLAGS_MC6_ALL = 22, + INET_FLAGS_AUTOFLOWLABEL_SET = 23, + INET_FLAGS_AUTOFLOWLABEL = 24, + INET_FLAGS_DONTFRAG = 25, + INET_FLAGS_RECVERR6 = 26, + INET_FLAGS_REPFLOW = 27, + INET_FLAGS_RTALERT_ISOLATE = 28, + INET_FLAGS_SNDFLOW = 29, }; /* cmsg flags for inet */ diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b32539bb0fb0..28b065790261 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -53,13 +53,12 @@ struct route_info { */ static inline int rt6_srcprefs2flags(unsigned int srcprefs) { - /* No need to bitmask because srcprefs have only 3 bits. */ - return srcprefs << 3; + return (srcprefs & IPV6_PREFER_SRC_MASK) << 3; } static inline unsigned int rt6_flags2srcprefs(int flags) { - return (flags >> 3) & 7; + return (flags >> 3) & IPV6_PREFER_SRC_MASK; } static inline bool rt6_need_strict(const struct in6_addr *daddr) @@ -266,7 +265,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; - if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) { + if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) { mtu = READ_ONCE(dst->dev->mtu); mtu -= lwtunnel_headroom(dst->lwtstate, mtu); } else { @@ -277,14 +276,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb) static inline bool ip6_sk_accept_pmtu(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE && - inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc != IPV6_PMTUDISC_INTERFACE && + pmtudisc != IPV6_PMTUDISC_OMIT; } static inline bool ip6_sk_ignore_df(const struct sock *sk) { - return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO || - inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT; + u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc); + + return pmtudisc < IPV6_PMTUDISC_DO || + pmtudisc == IPV6_PMTUDISC_OMIT; } static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c6932d1a3fa8..b3444c8a6f74 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6) } static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6, - const struct ipv6_pinfo *np) + const struct sock *sk) { *ipc6 = (struct ipcm6_cookie) { .hlimit = -1, - .tclass = np->tclass, - .dontfrag = np->dontfrag, + .tclass = inet6_sk(sk)->tclass, + .dontfrag = inet6_test_bit(DONTFRAG, sk), }; } @@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); +bool ip6_autoflowlabel(struct net *net, const struct sock *sk); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { @@ -914,9 +914,9 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, int hlimit; if (ipv6_addr_is_multicast(&fl6->daddr)) - hlimit = np->mcast_hops; + hlimit = READ_ONCE(np->mcast_hops); else - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); return hlimit; @@ -1303,15 +1303,16 @@ static inline int ip6_sock_set_v6only(struct sock *sk) static inline void ip6_sock_set_recverr(struct sock *sk) { - lock_sock(sk); - inet6_sk(sk)->recverr = true; - release_sock(sk); + inet6_set_bit(RECVERR6, sk); } -static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) +#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \ + IPV6_PREFER_SRC_COA) + +static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) { + unsigned int prefmask = ~IPV6_PREFER_SRC_MASK; unsigned int pref = 0; - unsigned int prefmask = ~0; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC | @@ -1361,20 +1362,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) return -EINVAL; } - inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref; + WRITE_ONCE(inet6_sk(sk)->srcprefs, + (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref); return 0; } -static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) -{ - int ret; - - lock_sock(sk); - ret = __ip6_sock_set_addr_preferences(sk, val); - release_sock(sk); - return ret; -} - static inline void ip6_sock_set_recvpktinfo(struct sock *sk) { lock_sock(sk); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..21da31e1dff5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7a41c4791536..d96d05b08819 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; + u8 sysctl_tcp_backlog_ack_defer; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/include/net/sock.h b/include/net/sock.h index 92f7ea62a915..3f2ecef884f8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1821,12 +1821,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk) static inline void sock_release_ownership(struct sock *sk) { - if (sock_owned_by_user_nocheck(sk)) { - sk->sk_lock.owned = 0; + DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk)); + sk->sk_lock.owned = 0; - /* The sk_lock has mutex_unlock() semantics: */ - mutex_release(&sk->sk_lock.dep_map, _RET_IP_); - } + /* The sk_lock has mutex_unlock() semantics: */ + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); } /* no reclassification while locks are held */ @@ -2237,7 +2236,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) } } -bool sk_mc_loop(struct sock *sk); +bool sk_mc_loop(const struct sock *sk); static inline bool sk_can_gso(const struct sock *sk) { diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 0ca9b7a11baf..29251c3519cf 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) } #endif -static inline void udp_tunnel_encap_enable(struct socket *sock) +static inline void udp_tunnel_encap_enable(struct sock *sk) { - struct udp_sock *up = udp_sk(sock->sk); - - if (up->encap_enabled) + if (udp_test_and_set_bit(ENCAP_ENABLED, sk)) return; - up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) - if (sock->sk->sk_family == PF_INET6) + if (READ_ONCE(sk->sk_family) == PF_INET6) ipv6_stub->udpv6_encap_enable(); #endif udp_encap_enable(); diff --git a/include/net/udplite.h b/include/net/udplite.h index bd33ff2b8f42..786919d29f8d 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { - const struct udp_sock *up = udp_sk(skb->sk); const int off = skb_transport_offset(skb); + const struct sock *sk = skb->sk; int len = skb->len - off; - if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) { - if (0 < up->pcslen) - len = up->pcslen; - udp_hdr(skb)->len = htons(up->pcslen); + if (udp_test_bit(UDPLITE_SEND_CC, sk)) { + u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); + + if (pcslen < len) { + if (pcslen > 0) + len = pcslen; + udp_hdr(skb)->len = htons(pcslen); + } } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ diff --git a/include/net/xdp.h b/include/net/xdp.h index de08c8e0d134..349c36fb5fd8 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -383,14 +383,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE +/* Define the relationship between xdp-rx-metadata kfunc and + * various other entities: + * - xdp_rx_metadata enum + * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml) + * - kfunc name + * - xdp_metadata_ops field + */ #define XDP_METADATA_KFUNC_xxx \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ - bpf_xdp_metadata_rx_timestamp) \ + NETDEV_XDP_RX_METADATA_TIMESTAMP, \ + bpf_xdp_metadata_rx_timestamp, \ + xmo_rx_timestamp) \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ - bpf_xdp_metadata_rx_hash) \ + NETDEV_XDP_RX_METADATA_HASH, \ + bpf_xdp_metadata_rx_hash, \ + xmo_rx_hash) \ -enum { -#define XDP_METADATA_KFUNC(name, _) name, +enum xdp_rx_metadata { +#define XDP_METADATA_KFUNC(name, _, __, ___) name, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC MAX_XDP_METADATA_KFUNC, diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 1617af380162..69b472604b86 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -14,6 +14,8 @@ #include <linux/mm.h> #include <net/sock.h> +#define XDP_UMEM_SG_FLAG (1 << 1) + struct net_device; struct xsk_queue; struct xdp_buff; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 363c7d510554..98d7aa78adda 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2166,7 +2166,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) proto = sk->sk_protocol; if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; + return inet6_test_bit(DONTFRAG, sk); return false; } diff --git a/include/trace/events/vsock_virtio_transport_common.h b/include/trace/events/vsock_virtio_transport_common.h index d0b3f0ea9ba1..f1ebe36787c3 100644 --- a/include/trace/events/vsock_virtio_transport_common.h +++ b/include/trace/events/vsock_virtio_transport_common.h @@ -43,7 +43,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __u32 len, __u16 type, __u16 op, - __u32 flags + __u32 flags, + bool zcopy ), TP_ARGS( src_cid, src_port, @@ -51,7 +52,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt, len, type, op, - flags + flags, + zcopy ), TP_STRUCT__entry( __field(__u32, src_cid) @@ -62,6 +64,7 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __field(__u16, type) __field(__u16, op) __field(__u32, flags) + __field(bool, zcopy) ), TP_fast_assign( __entry->src_cid = src_cid; @@ -72,14 +75,15 @@ TRACE_EVENT(virtio_transport_alloc_pkt, __entry->type = type; __entry->op = op; __entry->flags = flags; + __entry->zcopy = zcopy; ), - TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x", + TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x zcopy=%s", __entry->src_cid, __entry->src_port, __entry->dst_cid, __entry->dst_port, __entry->len, show_type(__entry->type), show_op(__entry->op), - __entry->flags) + __entry->flags, __entry->zcopy ? "true" : "false") ); TRACE_EVENT(virtio_transport_recv_pkt, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0448700890f7..7ba61b75bc0e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -932,7 +932,14 @@ enum bpf_map_type { */ BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs + * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE + + * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * deprecated. + */ + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, @@ -1040,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2697,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2936,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by @@ -3257,6 +3269,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -5089,6 +5106,8 @@ union bpf_attr { * **BPF_F_TIMER_ABS** * Start the timer in absolute expire value instead of the * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. * * Return * 0 on success. @@ -6525,6 +6544,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ @@ -6540,6 +6560,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ @@ -6953,6 +6974,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6965,6 +6987,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6999,6 +7022,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -7300,9 +7326,11 @@ struct bpf_core_relo { * Flags to control bpf_timer_start() behaviour. * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. */ enum { BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), }; /* BPF numbers iterator state */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 03875e078be8..cd4b82458d1b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -680,6 +680,7 @@ enum devlink_port_function_attr { DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ + DEVLINK_PORT_FN_ATTR_DEVLINK, /* nested */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h new file mode 100644 index 000000000000..20ef0718f8dc --- /dev/null +++ b/include/uapi/linux/dpll.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/dpll.yaml */ +/* YNL-GEN uapi header */ + +#ifndef _UAPI_LINUX_DPLL_H +#define _UAPI_LINUX_DPLL_H + +#define DPLL_FAMILY_NAME "dpll" +#define DPLL_FAMILY_VERSION 1 + +/** + * enum dpll_mode - working modes a dpll can support, differentiates if and how + * dpll selects one of its inputs to syntonize with it, valid values for + * DPLL_A_MODE attribute + * @DPLL_MODE_MANUAL: input can be only selected by sending a request to dpll + * @DPLL_MODE_AUTOMATIC: highest prio input pin auto selected by dpll + */ +enum dpll_mode { + DPLL_MODE_MANUAL = 1, + DPLL_MODE_AUTOMATIC, + + /* private: */ + __DPLL_MODE_MAX, + DPLL_MODE_MAX = (__DPLL_MODE_MAX - 1) +}; + +/** + * enum dpll_lock_status - provides information of dpll device lock status, + * valid values for DPLL_A_LOCK_STATUS attribute + * @DPLL_LOCK_STATUS_UNLOCKED: dpll was not yet locked to any valid input (or + * forced by setting DPLL_A_MODE to DPLL_MODE_DETACHED) + * @DPLL_LOCK_STATUS_LOCKED: dpll is locked to a valid signal, but no holdover + * available + * @DPLL_LOCK_STATUS_LOCKED_HO_ACQ: dpll is locked and holdover acquired + * @DPLL_LOCK_STATUS_HOLDOVER: dpll is in holdover state - lost a valid lock or + * was forced by disconnecting all the pins (latter possible only when dpll + * lock-state was already DPLL_LOCK_STATUS_LOCKED_HO_ACQ, if dpll lock-state + * was not DPLL_LOCK_STATUS_LOCKED_HO_ACQ, the dpll's lock-state shall remain + * DPLL_LOCK_STATUS_UNLOCKED) + */ +enum dpll_lock_status { + DPLL_LOCK_STATUS_UNLOCKED = 1, + DPLL_LOCK_STATUS_LOCKED, + DPLL_LOCK_STATUS_LOCKED_HO_ACQ, + DPLL_LOCK_STATUS_HOLDOVER, + + /* private: */ + __DPLL_LOCK_STATUS_MAX, + DPLL_LOCK_STATUS_MAX = (__DPLL_LOCK_STATUS_MAX - 1) +}; + +#define DPLL_TEMP_DIVIDER 1000 + +/** + * enum dpll_type - type of dpll, valid values for DPLL_A_TYPE attribute + * @DPLL_TYPE_PPS: dpll produces Pulse-Per-Second signal + * @DPLL_TYPE_EEC: dpll drives the Ethernet Equipment Clock + */ +enum dpll_type { + DPLL_TYPE_PPS = 1, + DPLL_TYPE_EEC, + + /* private: */ + __DPLL_TYPE_MAX, + DPLL_TYPE_MAX = (__DPLL_TYPE_MAX - 1) +}; + +/** + * enum dpll_pin_type - defines possible types of a pin, valid values for + * DPLL_A_PIN_TYPE attribute + * @DPLL_PIN_TYPE_MUX: aggregates another layer of selectable pins + * @DPLL_PIN_TYPE_EXT: external input + * @DPLL_PIN_TYPE_SYNCE_ETH_PORT: ethernet port PHY's recovered clock + * @DPLL_PIN_TYPE_INT_OSCILLATOR: device internal oscillator + * @DPLL_PIN_TYPE_GNSS: GNSS recovered clock + */ +enum dpll_pin_type { + DPLL_PIN_TYPE_MUX = 1, + DPLL_PIN_TYPE_EXT, + DPLL_PIN_TYPE_SYNCE_ETH_PORT, + DPLL_PIN_TYPE_INT_OSCILLATOR, + DPLL_PIN_TYPE_GNSS, + + /* private: */ + __DPLL_PIN_TYPE_MAX, + DPLL_PIN_TYPE_MAX = (__DPLL_PIN_TYPE_MAX - 1) +}; + +/** + * enum dpll_pin_direction - defines possible direction of a pin, valid values + * for DPLL_A_PIN_DIRECTION attribute + * @DPLL_PIN_DIRECTION_INPUT: pin used as a input of a signal + * @DPLL_PIN_DIRECTION_OUTPUT: pin used to output the signal + */ +enum dpll_pin_direction { + DPLL_PIN_DIRECTION_INPUT = 1, + DPLL_PIN_DIRECTION_OUTPUT, + + /* private: */ + __DPLL_PIN_DIRECTION_MAX, + DPLL_PIN_DIRECTION_MAX = (__DPLL_PIN_DIRECTION_MAX - 1) +}; + +#define DPLL_PIN_FREQUENCY_1_HZ 1 +#define DPLL_PIN_FREQUENCY_10_KHZ 10000 +#define DPLL_PIN_FREQUENCY_77_5_KHZ 77500 +#define DPLL_PIN_FREQUENCY_10_MHZ 10000000 + +/** + * enum dpll_pin_state - defines possible states of a pin, valid values for + * DPLL_A_PIN_STATE attribute + * @DPLL_PIN_STATE_CONNECTED: pin connected, active input of phase locked loop + * @DPLL_PIN_STATE_DISCONNECTED: pin disconnected, not considered as a valid + * input + * @DPLL_PIN_STATE_SELECTABLE: pin enabled for automatic input selection + */ +enum dpll_pin_state { + DPLL_PIN_STATE_CONNECTED = 1, + DPLL_PIN_STATE_DISCONNECTED, + DPLL_PIN_STATE_SELECTABLE, + + /* private: */ + __DPLL_PIN_STATE_MAX, + DPLL_PIN_STATE_MAX = (__DPLL_PIN_STATE_MAX - 1) +}; + +/** + * enum dpll_pin_capabilities - defines possible capabilities of a pin, valid + * flags on DPLL_A_PIN_CAPABILITIES attribute + * @DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE: pin direction can be changed + * @DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE: pin priority can be changed + * @DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE: pin state can be changed + */ +enum dpll_pin_capabilities { + DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE = 1, + DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE = 2, + DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE = 4, +}; + +enum dpll_a { + DPLL_A_ID = 1, + DPLL_A_MODULE_NAME, + DPLL_A_PAD, + DPLL_A_CLOCK_ID, + DPLL_A_MODE, + DPLL_A_MODE_SUPPORTED, + DPLL_A_LOCK_STATUS, + DPLL_A_TEMP, + DPLL_A_TYPE, + + __DPLL_A_MAX, + DPLL_A_MAX = (__DPLL_A_MAX - 1) +}; + +enum dpll_a_pin { + DPLL_A_PIN_ID = 1, + DPLL_A_PIN_PARENT_ID, + DPLL_A_PIN_MODULE_NAME, + DPLL_A_PIN_PAD, + DPLL_A_PIN_CLOCK_ID, + DPLL_A_PIN_BOARD_LABEL, + DPLL_A_PIN_PANEL_LABEL, + DPLL_A_PIN_PACKAGE_LABEL, + DPLL_A_PIN_TYPE, + DPLL_A_PIN_DIRECTION, + DPLL_A_PIN_FREQUENCY, + DPLL_A_PIN_FREQUENCY_SUPPORTED, + DPLL_A_PIN_FREQUENCY_MIN, + DPLL_A_PIN_FREQUENCY_MAX, + DPLL_A_PIN_PRIO, + DPLL_A_PIN_STATE, + DPLL_A_PIN_CAPABILITIES, + DPLL_A_PIN_PARENT_DEVICE, + DPLL_A_PIN_PARENT_PIN, + + __DPLL_A_PIN_MAX, + DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) +}; + +enum dpll_cmd { + DPLL_CMD_DEVICE_ID_GET = 1, + DPLL_CMD_DEVICE_GET, + DPLL_CMD_DEVICE_SET, + DPLL_CMD_DEVICE_CREATE_NTF, + DPLL_CMD_DEVICE_DELETE_NTF, + DPLL_CMD_DEVICE_CHANGE_NTF, + DPLL_CMD_PIN_ID_GET, + DPLL_CMD_PIN_GET, + DPLL_CMD_PIN_SET, + DPLL_CMD_PIN_CREATE_NTF, + DPLL_CMD_PIN_DELETE_NTF, + DPLL_CMD_PIN_CHANGE_NTF, + + __DPLL_CMD_MAX, + DPLL_CMD_MAX = (__DPLL_CMD_MAX - 1) +}; + +#define DPLL_MCGRP_MONITOR "monitor" + +#endif /* _UAPI_LINUX_DPLL_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ce3117df9cec..fac351a93aed 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -376,7 +376,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, - + IFLA_DPLL_PIN, __IFLA_MAX }; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index c1634b95c223..2943a151d4f1 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -38,11 +38,27 @@ enum netdev_xdp_act { NETDEV_XDP_ACT_MASK = 127, }; +/** + * enum netdev_xdp_rx_metadata + * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW + * timestamp via bpf_xdp_metadata_rx_timestamp(). + * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet + * hash via bpf_xdp_metadata_rx_hash(). + */ +enum netdev_xdp_rx_metadata { + NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, + NETDEV_XDP_RX_METADATA_HASH = 2, + + /* private: */ + NETDEV_XDP_RX_METADATA_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, + NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 879eeb0a084b..d1d08da6331a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -289,6 +289,18 @@ struct tcp_info { */ __u32 tcpi_rehash; /* PLB or timeout triggered rehash attempts */ + + __u16 tcpi_total_rto; /* Total number of RTO timeouts, including + * SYN/SYN-ACK and recurring timeouts. + */ + __u16 tcpi_total_rto_recoveries; /* Total number of RTO + * recoveries, including any + * unfinished recovery. + */ + __u32 tcpi_total_rto_time; /* Total time spent in RTO recoveries + * in milliseconds, including any + * unfinished recovery. + */ }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 96856f130cbf..833faa04461b 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -793,8 +793,6 @@ __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); - BTF_TYPE_EMIT(struct btf_iter_num); - /* start == end is legit, it's an empty range and we'll just get NULL * on first (and any subsequent) bpf_iter_num_next() call */ diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index fdc3e8705a3c..db6176fb64dc 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -615,7 +615,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map) if (st_map->links) bpf_struct_ops_map_put_progs(st_map); bpf_map_area_free(st_map->links); - bpf_jit_free_exec(st_map->image); + if (st_map->image) { + bpf_jit_free_exec(st_map->image); + bpf_jit_uncharge_modmem(PAGE_SIZE); + } bpf_map_area_free(st_map->uvalue); bpf_map_area_free(st_map); } @@ -657,6 +660,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) struct bpf_struct_ops_map *st_map; const struct btf_type *t, *vt; struct bpf_map *map; + int ret; st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); if (!st_ops) @@ -681,12 +685,27 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) st_map->st_ops = st_ops; map = &st_map->map; + ret = bpf_jit_charge_modmem(PAGE_SIZE); + if (ret) { + __bpf_struct_ops_map_free(map); + return ERR_PTR(ret); + } + + st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!st_map->image) { + /* __bpf_struct_ops_map_free() uses st_map->image as flag + * for "charged or not". In this case, we need to unchange + * here. + */ + bpf_jit_uncharge_modmem(PAGE_SIZE); + __bpf_struct_ops_map_free(map); + return ERR_PTR(-ENOMEM); + } st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); st_map->links = bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *), NUMA_NO_NODE); - st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); - if (!st_map->uvalue || !st_map->links || !st_map->image) { + if (!st_map->uvalue || !st_map->links) { __bpf_struct_ops_map_free(map); return ERR_PTR(-ENOMEM); } @@ -907,4 +926,3 @@ err_out: kfree(link); return err; } - diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8090d7fb11ef..15d71d2986d3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, type = BPF_KPTR_UNREF; else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) type = BPF_KPTR_REF; + else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_PERCPU; else return -EINVAL; @@ -3308,10 +3310,10 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, return BTF_FIELD_FOUND; } -static const char *btf_find_decl_tag_value(const struct btf *btf, - const struct btf_type *pt, - int comp_idx, const char *tag_key) +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key) { + const char *value = NULL; int i; for (i = 1; i < btf_nr_types(btf); i++) { @@ -3325,9 +3327,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf, continue; if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len)) continue; - return __btf_name_by_offset(btf, t->name_off) + len; + /* Prevent duplicate entries for same type */ + if (value) + return ERR_PTR(-EEXIST); + value = __btf_name_by_offset(btf, t->name_off) + len; } - return NULL; + if (!value) + return ERR_PTR(-ENOENT); + return value; } static int @@ -3345,7 +3352,7 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, if (t->size != sz) return BTF_FIELD_IGNORE; value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:"); - if (!value_type) + if (IS_ERR(value_type)) return -EINVAL; node_field_name = strstr(value_type, ":"); if (!node_field_name) @@ -3457,6 +3464,7 @@ static int btf_find_struct_field(const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: ret = btf_find_kptr(btf, member_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3523,6 +3531,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: ret = btf_find_kptr(btf, var_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) @@ -3783,6 +3792,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); if (ret < 0) goto end; @@ -6949,7 +6959,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, * (either PTR_TO_CTX or SCALAR_VALUE). */ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs) + struct bpf_reg_state *regs, bool is_ex_cb) { struct bpf_verifier_log *log = &env->log; struct bpf_prog *prog = env->prog; @@ -7006,7 +7016,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, tname, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL; } - /* check that function returns int */ + /* check that function returns int, exception cb also requires this */ t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); @@ -7055,6 +7065,14 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, i, btf_type_str(t), tname); return -EINVAL; } + /* We have already ensured that the callback returns an integer, just + * like all global subprogs. We need to determine it only has a single + * scalar argument. + */ + if (is_ex_cb && (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE)) { + bpf_log(log, "exception cb only supports single integer argument\n"); + return -EINVAL; + } return 0; } @@ -7832,6 +7850,7 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: return BTF_KFUNC_HOOK_CGROUP_SKB; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 03b3d4492980..74ad2215e1ba 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1450,18 +1450,22 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); * provided by user sockaddr * @sk: sock struct that will use sockaddr * @uaddr: sockaddr struct provided by user + * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is + * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX + * uaddr. * @atype: The type of program to be executed * @t_ctx: Pointer to attach type specific context * @flags: Pointer to u32 which contains higher bits of BPF program * return value (OR'ed together). * - * socket is expected to be of type INET or INET6. + * socket is expected to be of type INET, INET6 or UNIX. * * This function will return %-EPERM if an attached program is found and * returned value != 1 during execution. In all other cases, 0 is returned. */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, + int *uaddrlen, enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags) @@ -1473,21 +1477,31 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, }; struct sockaddr_storage unspec; struct cgroup *cgrp; + int ret; /* Check socket family since not all sockets represent network * endpoint (e.g. AF_UNIX). */ - if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 && + sk->sk_family != AF_UNIX) return 0; if (!ctx.uaddr) { memset(&unspec, 0, sizeof(unspec)); ctx.uaddr = (struct sockaddr *)&unspec; + ctx.uaddrlen = 0; + } else { + ctx.uaddrlen = *uaddrlen; } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, - 0, flags); + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, + 0, flags); + + if (!ret && uaddr) + *uaddrlen = ctx.uaddrlen; + + return ret; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); @@ -2520,10 +2534,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_get_retval_proto; @@ -2535,10 +2552,13 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return NULL; default: return &bpf_set_retval_proto; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4e3ce0542e31..08626b519ce2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -64,8 +64,8 @@ #define OFF insn->off #define IMM insn->imm -struct bpf_mem_alloc bpf_global_ma; -bool bpf_global_ma_set; +struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; +bool bpf_global_ma_set, bpf_global_percpu_ma_set; /* No hurry in this branch * @@ -212,7 +212,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, const struct bpf_line_info *linfo; void **jited_linfo; - if (!prog->aux->jited_linfo) + if (!prog->aux->jited_linfo || prog->aux->func_idx > prog->aux->func_cnt) /* Userspace did not provide linfo */ return; @@ -539,7 +539,7 @@ static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; - for (i = 0; i < fp->aux->func_cnt; i++) + for (i = 0; i < fp->aux->real_func_cnt; i++) bpf_prog_kallsyms_del(fp->aux->func[i]); } @@ -589,7 +589,7 @@ bpf_prog_ksym_set_name(struct bpf_prog *prog) sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); /* prog->aux->name will be ignored if full btf name is available */ - if (prog->aux->func_info_cnt) { + if (prog->aux->func_info_cnt && prog->aux->func_idx < prog->aux->func_info_cnt) { type = btf_type_by_id(prog->aux->btf, prog->aux->func_info[prog->aux->func_idx].type_id); func_name = btf_name_by_offset(prog->aux->btf, type->name_off); @@ -623,7 +623,11 @@ static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) if (val < ksym->start) return -1; - if (val >= ksym->end) + /* Ensure that we detect return addresses as part of the program, when + * the final instruction is a call for a program part of the stack + * trace. Therefore, do val > ksym->end instead of val >= ksym->end. + */ + if (val > ksym->end) return 1; return 0; @@ -733,7 +737,7 @@ bool is_bpf_text_address(unsigned long addr) return ret; } -static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) +struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) { struct bpf_ksym *ksym = bpf_ksym_find(addr); @@ -1208,7 +1212,7 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog, if (!extra_pass) addr = NULL; else if (prog->aux->func && - off >= 0 && off < prog->aux->func_cnt) + off >= 0 && off < prog->aux->real_func_cnt) addr = (u8 *)prog->aux->func[off]->bpf_func; else return -EINVAL; @@ -2721,7 +2725,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) #endif if (aux->dst_trampoline) bpf_trampoline_put(aux->dst_trampoline); - for (i = 0; i < aux->func_cnt; i++) { + for (i = 0; i < aux->real_func_cnt; i++) { /* We can just unlink the subprog poke descriptor table as * it was originally linked to the main program and is also * released along with it. @@ -2729,7 +2733,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) aux->func[i]->aux->poke_tab = NULL; bpf_jit_free(aux->func[i]); } - if (aux->func_cnt) { + if (aux->real_func_cnt) { kfree(aux->func); bpf_prog_unlock_free(aux->prog); } else { @@ -2914,6 +2918,15 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len) return -ENOTSUPP; } +bool __weak bpf_jit_supports_exceptions(void) +{ + return false; +} + +void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) +{ +} + #ifdef CONFIG_BPF_SYSCALL static int __init bpf_global_ma_init(void) { @@ -2921,7 +2934,9 @@ static int __init bpf_global_ma_init(void) ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); bpf_global_ma_set = !ret; - return ret; + ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true); + bpf_global_percpu_ma_set = !ret; + return !bpf_global_ma_set || !bpf_global_percpu_ma_set; } late_initcall(bpf_global_ma_init); #endif diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 8bd3812fb8df..62a53ebfedf9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -22,6 +22,7 @@ #include <linux/security.h> #include <linux/btf_ids.h> #include <linux/bpf_mem_alloc.h> +#include <linux/kasan.h> #include "../../lib/kstrtox.h" @@ -1271,7 +1272,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla if (in_nmi()) return -EOPNOTSUPP; - if (flags > BPF_F_TIMER_ABS) + if (flags & ~(BPF_F_TIMER_ABS | BPF_F_TIMER_CPU_PIN)) return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; @@ -1285,6 +1286,9 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla else mode = HRTIMER_MODE_REL_SOFT; + if (flags & BPF_F_TIMER_CPU_PIN) + mode |= HRTIMER_MODE_PINNED; + hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); out: __bpf_spin_unlock_irqrestore(&timer->lock); @@ -1902,6 +1906,14 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign) return p; } +__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign) +{ + u64 size = local_type_id__k; + + /* The verifier has ensured that meta__ign must be NULL */ + return bpf_mem_alloc(&bpf_global_percpu_ma, size); +} + /* Must be called under migrate_disable(), as required by bpf_mem_free */ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) { @@ -1930,6 +1942,12 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) __bpf_obj_drop_impl(p, meta ? meta->record : NULL); } +__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign) +{ + /* The verifier has ensured that meta__ign must be NULL */ + bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc); +} + __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign) { struct btf_struct_meta *meta = meta__ign; @@ -2435,6 +2453,49 @@ __bpf_kfunc void bpf_rcu_read_unlock(void) rcu_read_unlock(); } +struct bpf_throw_ctx { + struct bpf_prog_aux *aux; + u64 sp; + u64 bp; + int cnt; +}; + +static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp) +{ + struct bpf_throw_ctx *ctx = cookie; + struct bpf_prog *prog; + + if (!is_bpf_text_address(ip)) + return !ctx->cnt; + prog = bpf_prog_ksym_find(ip); + ctx->cnt++; + if (bpf_is_subprog(prog)) + return true; + ctx->aux = prog->aux; + ctx->sp = sp; + ctx->bp = bp; + return false; +} + +__bpf_kfunc void bpf_throw(u64 cookie) +{ + struct bpf_throw_ctx ctx = {}; + + arch_bpf_stack_walk(bpf_stack_walker, &ctx); + WARN_ON_ONCE(!ctx.aux); + if (ctx.aux) + WARN_ON_ONCE(!ctx.aux->exception_boundary); + WARN_ON_ONCE(!ctx.bp); + WARN_ON_ONCE(!ctx.cnt); + /* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning + * deeper stack depths than ctx.sp as we do not return from bpf_throw, + * which skips compiler generated instrumentation to do the same. + */ + kasan_unpoison_task_stack_below((void *)(long)ctx.sp); + ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp); + WARN(1, "A call to BPF exception callback should never return\n"); +} + __diag_pop(); BTF_SET8_START(generic_btf_ids) @@ -2442,7 +2503,9 @@ BTF_SET8_START(generic_btf_ids) BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) @@ -2462,6 +2525,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_throw) BTF_SET8_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { @@ -2488,6 +2552,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_iter_task_vma_new, KF_ITER_NEW | KF_RCU) +BTF_ID_FLAGS(func, bpf_iter_task_vma_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index d93ddac283d4..39ea316c55e7 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -526,15 +526,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; + /* room for llist_node and per-cpu pointer */ + if (percpu) + percpu_size = LLIST_NODE_SZ + sizeof(void *); + if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); if (!pc) return -ENOMEM; - if (percpu) - /* room for llist_node and per-cpu pointer */ - percpu_size = LLIST_NODE_SZ + sizeof(void *); - else + if (!percpu) size += LLIST_NODE_SZ; /* room for llist_node */ unit_size = size; @@ -555,10 +556,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) return 0; } - /* size == 0 && percpu is an invalid combination */ - if (WARN_ON_ONCE(percpu)) - return -EINVAL; - pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; @@ -572,6 +569,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) c = &cc->cache[i]; c->unit_size = sizes[i]; c->objcg = objcg; + c->percpu_size = percpu_size; c->tgt = c; init_refill_work(c); @@ -782,12 +780,17 @@ static void notrace *unit_alloc(struct bpf_mem_cache *c) } } local_dec(&c->active); - local_irq_restore(flags); WARN_ON(cnt < 0); if (cnt < c->low_watermark) irq_work_raise(c); + /* Enable IRQ after the enqueue of irq work completes, so irq work + * will run after IRQ is enabled and free_llist may be refilled by + * irq work before other task preempts current task. + */ + local_irq_restore(flags); + return llnode; } @@ -823,11 +826,16 @@ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr) llist_add(llnode, &c->free_llist_extra); } local_dec(&c->active); - local_irq_restore(flags); if (cnt > c->high_watermark) /* free few objects from current cpu into global kmalloc pool */ irq_work_raise(c); + /* Enable IRQ after irq_work_raise() completes, otherwise when current + * task is preempted by task which does unit_alloc(), unit_alloc() may + * return NULL unexpectedly because irq work is already pending but can + * not been triggered and free_llist can not be refilled timely. + */ + local_irq_restore(flags); } static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr) @@ -845,10 +853,10 @@ static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr) llist_add(llnode, &c->free_llist_extra_rcu); } local_dec(&c->active); - local_irq_restore(flags); if (!atomic_read(&c->call_rcu_in_progress)) irq_work_raise(c); + local_irq_restore(flags); } /* Called from BPF program or from sys_bpf syscall. diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 87d6693d8233..1a4fec330eaa 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -234,7 +234,14 @@ int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr) attr->prog_type != BPF_PROG_TYPE_XDP) return -EINVAL; - if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY) + if (attr->prog_flags & ~(BPF_F_XDP_DEV_BOUND_ONLY | BPF_F_XDP_HAS_FRAGS)) + return -EINVAL; + + /* Frags are allowed only if program is dev-bound-only, but not + * if it is requesting bpf offload. + */ + if (attr->prog_flags & BPF_F_XDP_HAS_FRAGS && + !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)) return -EINVAL; if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS && @@ -847,10 +854,11 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id) if (!ops) goto out; - if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP)) - p = ops->xmo_rx_timestamp; - else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH)) - p = ops->xmo_rx_hash; +#define XDP_METADATA_KFUNC(name, _, __, xmo) \ + if (func_id == bpf_xdp_metadata_kfunc_id(name)) p = ops->xmo; + XDP_METADATA_KFUNC_xxx +#undef XDP_METADATA_KFUNC + out: up_read(&bpf_devs_lock); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 458bb80b14d5..d6b277482085 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -28,7 +28,7 @@ struct bpf_stack_map { void *elems; struct pcpu_freelist freelist; u32 n_buckets; - struct stack_map_bucket *buckets[]; + struct stack_map_bucket *buckets[] __counted_by(n_buckets); }; static inline bool stack_map_use_build_id(struct bpf_map *map) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d77b2f8b9364..341f8cb4405c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -514,6 +514,7 @@ void btf_record_free(struct btf_record *rec) switch (rec->fields[i].type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: if (rec->fields[i].kptr.module) module_put(rec->fields[i].kptr.module); btf_put(rec->fields[i].kptr.btf); @@ -560,6 +561,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) switch (fields[i].type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: btf_get(fields[i].kptr.btf); if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) { ret = -ENXIO; @@ -650,6 +652,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) WRITE_ONCE(*(u64 *)field_ptr, 0); break; case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0); if (!xchgd_field) break; @@ -1045,6 +1048,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: case BPF_REFCOUNT: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH && @@ -2442,14 +2446,19 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return 0; default: return -EINVAL; @@ -2745,7 +2754,7 @@ free_used_maps: * period before we can tear down JIT memory since symbols * are already exposed under kallsyms. */ - __bpf_prog_put_noref(prog, prog->aux->func_cnt); + __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); return err; free_prog_sec: free_uid(prog->aux->user); @@ -3370,7 +3379,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) static int bpf_perf_link_fill_common(const struct perf_event *event, char __user *uname, u32 ulen, u64 *probe_offset, u64 *probe_addr, - u32 *fd_type) + u32 *fd_type, unsigned long *missed) { const char *buf; u32 prog_id; @@ -3381,7 +3390,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, return -EINVAL; err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, - probe_offset, probe_addr); + probe_offset, probe_addr, missed); if (err) return err; if (!uname) @@ -3404,6 +3413,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, static int bpf_perf_link_fill_kprobe(const struct perf_event *event, struct bpf_link_info *info) { + unsigned long missed; char __user *uname; u64 addr, offset; u32 ulen, type; @@ -3412,7 +3422,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, &missed); if (err) return err; if (type == BPF_FD_TYPE_KRETPROBE) @@ -3421,6 +3431,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KPROBE; info->perf_event.kprobe.offset = offset; + info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; @@ -3440,7 +3451,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, NULL); if (err) return err; @@ -3476,7 +3487,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; - return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL); + return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } static int bpf_perf_link_fill_perf_event(const struct perf_event *event, @@ -3672,14 +3683,19 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; case BPF_CGROUP_SOCK_OPS: return BPF_PROG_TYPE_SOCK_OPS; @@ -3945,14 +3961,19 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_POST_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: case BPF_CGROUP_SYSCTL: @@ -4818,7 +4839,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, - &probe_addr); + &probe_addr, NULL); if (!err) err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c4ab9d6cdbe9..fef17628341f 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -7,7 +7,9 @@ #include <linux/fs.h> #include <linux/fdtable.h> #include <linux/filter.h> +#include <linux/bpf_mem_alloc.h> #include <linux/btf_ids.h> +#include <linux/mm_types.h> #include "mmap_unlock_work.h" static const char * const iter_task_type_names[] = { @@ -35,16 +37,13 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm u32 *tid, bool skip_if_dup_files) { - struct task_struct *task, *next_task; + struct task_struct *task; struct pid *pid; - u32 saved_tid; + u32 next_tid; if (!*tid) { /* The first time, the iterator calls this function. */ pid = find_pid_ns(common->pid, common->ns); - if (!pid) - return NULL; - task = get_pid_task(pid, PIDTYPE_TGID); if (!task) return NULL; @@ -66,44 +65,27 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm return task; } - pid = find_pid_ns(common->pid_visiting, common->ns); - if (!pid) - return NULL; - - task = get_pid_task(pid, PIDTYPE_PID); + task = find_task_by_pid_ns(common->pid_visiting, common->ns); if (!task) return NULL; retry: - if (!pid_alive(task)) { - put_task_struct(task); - return NULL; - } - - next_task = next_thread(task); - put_task_struct(task); - if (!next_task) - return NULL; + task = next_thread(task); - saved_tid = *tid; - *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns); - if (!*tid || *tid == common->pid) { + next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns); + if (!next_tid || next_tid == common->pid) { /* Run out of tasks of a process. The tasks of a * thread_group are linked as circular linked list. */ - *tid = saved_tid; return NULL; } - get_task_struct(next_task); - common->pid_visiting = *tid; - - if (skip_if_dup_files && task->files == task->group_leader->files) { - task = next_task; + if (skip_if_dup_files && task->files == task->group_leader->files) goto retry; - } - return next_task; + *tid = common->pid_visiting = next_tid; + get_task_struct(task); + return task; } static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, @@ -823,6 +805,95 @@ const struct bpf_func_proto bpf_find_vma_proto = { .arg5_type = ARG_ANYTHING, }; +struct bpf_iter_task_vma_kern_data { + struct task_struct *task; + struct mm_struct *mm; + struct mmap_unlock_irq_work *work; + struct vma_iterator vmi; +}; + +struct bpf_iter_task_vma { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + +/* Non-opaque version of bpf_iter_task_vma */ +struct bpf_iter_task_vma_kern { + struct bpf_iter_task_vma_kern_data *data; +} __attribute__((aligned(8))); + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); + +__bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, + struct task_struct *task, u64 addr) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + bool irq_work_busy = false; + int err; + + BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma)); + + /* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized + * before, so non-NULL kit->data doesn't point to previously + * bpf_mem_alloc'd bpf_iter_task_vma_kern_data + */ + kit->data = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_iter_task_vma_kern_data)); + if (!kit->data) + return -ENOMEM; + + kit->data->task = get_task_struct(task); + kit->data->mm = task->mm; + if (!kit->data->mm) { + err = -ENOENT; + goto err_cleanup_iter; + } + + /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */ + irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work); + if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) { + err = -EBUSY; + goto err_cleanup_iter; + } + + vma_iter_init(&kit->data->vmi, kit->data->mm, addr); + return 0; + +err_cleanup_iter: + if (kit->data->task) + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + /* NULL kit->data signals failed bpf_iter_task_vma initialization */ + kit->data = NULL; + return err; +} + +__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (!kit->data) /* bpf_iter_task_vma_new failed */ + return NULL; + return vma_next(&kit->data->vmi); +} + +__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (kit->data) { + bpf_mmap_unlock_mm(kit->data->work, kit->data->mm); + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + } +} + +__diag_pop(); + DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); static void do_mmap_read_unlock(struct irq_work *entry) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 53ff50cac61e..e97aeda3a86b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -415,8 +415,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut goto out; } - /* clear all bits except SHARE_IPMODIFY */ - tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY; + /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */ + tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX); if (tlinks[BPF_TRAMP_FEXIT].nr_links || tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 873ade146f3d..bb58987e4844 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -304,7 +304,7 @@ struct bpf_kfunc_call_arg_meta { /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, * generally to pass info about user-defined local kptr types to later * verification logic - * bpf_obj_drop + * bpf_obj_drop/bpf_percpu_obj_drop * Record the local kptr type to be drop'd * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) * Record the local kptr type to be refcount_incr'd and use @@ -543,6 +543,7 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) } static bool is_callback_calling_kfunc(u32 btf_id); +static bool is_bpf_throw_kfunc(struct bpf_insn *insn); static bool is_callback_calling_function(enum bpf_func_id func_id) { @@ -1341,6 +1342,50 @@ static void scrub_spilled_slot(u8 *stype) *stype = STACK_MISC; } +static void print_scalar_ranges(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + const char **sep) +{ + struct { + const char *name; + u64 val; + bool omit; + } minmaxs[] = { + {"smin", reg->smin_value, reg->smin_value == S64_MIN}, + {"smax", reg->smax_value, reg->smax_value == S64_MAX}, + {"umin", reg->umin_value, reg->umin_value == 0}, + {"umax", reg->umax_value, reg->umax_value == U64_MAX}, + {"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN}, + {"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX}, + {"umin32", reg->u32_min_value, reg->u32_min_value == 0}, + {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX}, + }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)]; + bool neg1, neg2; + + for (m1 = &minmaxs[0]; m1 < mend; m1++) { + if (m1->omit) + continue; + + neg1 = m1->name[0] == 's' && (s64)m1->val < 0; + + verbose(env, "%s%s=", *sep, m1->name); + *sep = ","; + + for (m2 = m1 + 2; m2 < mend; m2 += 2) { + if (m2->omit || m2->val != m1->val) + continue; + /* don't mix negatives with positives */ + neg2 = m2->name[0] == 's' && (s64)m2->val < 0; + if (neg2 != neg1) + continue; + m2->omit = true; + verbose(env, "%s=", m2->name); + } + + verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val); + } +} + static void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state, bool print_all) @@ -1404,34 +1449,13 @@ static void print_verifier_state(struct bpf_verifier_env *env, */ verbose_a("imm=%llx", reg->var_off.value); } else { - if (reg->smin_value != reg->umin_value && - reg->smin_value != S64_MIN) - verbose_a("smin=%lld", (long long)reg->smin_value); - if (reg->smax_value != reg->umax_value && - reg->smax_value != S64_MAX) - verbose_a("smax=%lld", (long long)reg->smax_value); - if (reg->umin_value != 0) - verbose_a("umin=%llu", (unsigned long long)reg->umin_value); - if (reg->umax_value != U64_MAX) - verbose_a("umax=%llu", (unsigned long long)reg->umax_value); + print_scalar_ranges(env, reg, &sep); if (!tnum_is_unknown(reg->var_off)) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose_a("var_off=%s", tn_buf); } - if (reg->s32_min_value != reg->smin_value && - reg->s32_min_value != S32_MIN) - verbose_a("s32_min=%d", (int)(reg->s32_min_value)); - if (reg->s32_max_value != reg->smax_value && - reg->s32_max_value != S32_MAX) - verbose_a("s32_max=%d", (int)(reg->s32_max_value)); - if (reg->u32_min_value != reg->umin_value && - reg->u32_min_value != U32_MIN) - verbose_a("u32_min=%d", (int)(reg->u32_min_value)); - if (reg->u32_max_value != reg->umax_value && - reg->u32_max_value != U32_MAX) - verbose_a("u32_max=%d", (int)(reg->u32_max_value)); } #undef verbose_a @@ -1515,7 +1539,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->in_async_callback_fn) verbose(env, " async_cb"); verbose(env, "\n"); - mark_verifier_state_clean(env); + if (!print_all) + mark_verifier_state_clean(env); } static inline u32 vlog_alignment(u32 pos) @@ -1748,7 +1773,9 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt; - /* if dst has more stack frames then src frame, free them */ + /* if dst has more stack frames then src frame, free them, this is also + * necessary in case of exceptional exits using bpf_throw. + */ for (i = src->curframe + 1; i <= dst_state->curframe; i++) { free_func_state(dst_state->frame[i]); dst_state->frame[i] = NULL; @@ -2454,6 +2481,68 @@ static int add_subprog(struct bpf_verifier_env *env, int off) return env->subprog_cnt - 1; } +static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env) +{ + struct bpf_prog_aux *aux = env->prog->aux; + struct btf *btf = aux->btf; + const struct btf_type *t; + u32 main_btf_id, id; + const char *name; + int ret, i; + + /* Non-zero func_info_cnt implies valid btf */ + if (!aux->func_info_cnt) + return 0; + main_btf_id = aux->func_info[0].type_id; + + t = btf_type_by_id(btf, main_btf_id); + if (!t) { + verbose(env, "invalid btf id for main subprog in func_info\n"); + return -EINVAL; + } + + name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:"); + if (IS_ERR(name)) { + ret = PTR_ERR(name); + /* If there is no tag present, there is no exception callback */ + if (ret == -ENOENT) + ret = 0; + else if (ret == -EEXIST) + verbose(env, "multiple exception callback tags for main subprog\n"); + return ret; + } + + ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC); + if (ret < 0) { + verbose(env, "exception callback '%s' could not be found in BTF\n", name); + return ret; + } + id = ret; + t = btf_type_by_id(btf, id); + if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + verbose(env, "exception callback '%s' must have global linkage\n", name); + return -EINVAL; + } + ret = 0; + for (i = 0; i < aux->func_info_cnt; i++) { + if (aux->func_info[i].type_id != id) + continue; + ret = aux->func_info[i].insn_off; + /* Further func_info and subprog checks will also happen + * later, so assume this is the right insn_off for now. + */ + if (!ret) { + verbose(env, "invalid exception callback insn_off in func_info: 0\n"); + ret = -EINVAL; + } + } + if (!ret) { + verbose(env, "exception callback type id not found in func_info\n"); + ret = -EINVAL; + } + return ret; +} + #define MAX_KFUNC_DESCS 256 #define MAX_KFUNC_BTFS 256 @@ -2793,8 +2882,8 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, static int add_subprog_and_kfunc(struct bpf_verifier_env *env) { struct bpf_subprog_info *subprog = env->subprog_info; + int i, ret, insn_cnt = env->prog->len, ex_cb_insn; struct bpf_insn *insn = env->prog->insnsi; - int i, ret, insn_cnt = env->prog->len; /* Add entry function. */ ret = add_subprog(env, 0); @@ -2820,6 +2909,26 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return ret; } + ret = bpf_find_exception_callback_insn_off(env); + if (ret < 0) + return ret; + ex_cb_insn = ret; + + /* If ex_cb_insn > 0, this means that the main program has a subprog + * marked using BTF decl tag to serve as the exception callback. + */ + if (ex_cb_insn) { + ret = add_subprog(env, ex_cb_insn); + if (ret < 0) + return ret; + for (i = 1; i < env->subprog_cnt; i++) { + if (env->subprog_info[i].start != ex_cb_insn) + continue; + env->exception_callback_subprog = i; + break; + } + } + /* Add a fake 'exit' subprog which could simplify subprog iteration * logic. 'subprog_cnt' should not be increased. */ @@ -2868,7 +2977,7 @@ next: if (i == subprog_end - 1) { /* to avoid fall-through from one subprog into another * the last insn of the subprog should be either exit - * or unconditional jump back + * or unconditional jump back or bpf_throw call */ if (code != (BPF_JMP | BPF_EXIT) && code != (BPF_JMP32 | BPF_JA) && @@ -3029,7 +3138,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, if (class == BPF_LDX) { if (t != SRC_OP) - return BPF_SIZE(code) == BPF_DW; + return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX; /* LDX source must be ptr. */ return true; } @@ -4999,6 +5108,8 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, perm_flags |= PTR_UNTRUSTED; } else { perm_flags = PTR_MAYBE_NULL | MEM_ALLOC; + if (kptr_field->type == BPF_KPTR_PERCPU) + perm_flags |= MEM_PERCPU; } if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) @@ -5042,7 +5153,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, */ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, kptr_field->kptr.btf, kptr_field->kptr.btf_id, - kptr_field->type == BPF_KPTR_REF)) + kptr_field->type != BPF_KPTR_UNREF)) goto bad_type; return 0; bad_type: @@ -5086,7 +5197,18 @@ static bool rcu_safe_kptr(const struct btf_field *field) { const struct btf_field_kptr *kptr = &field->kptr; - return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id); + return field->type == BPF_KPTR_PERCPU || + (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id)); +} + +static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field) +{ + if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) { + if (kptr_field->type != BPF_KPTR_PERCPU) + return PTR_MAYBE_NULL | MEM_RCU; + return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU; + } + return PTR_MAYBE_NULL | PTR_UNTRUSTED; } static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, @@ -5112,7 +5234,8 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, /* We only allow loading referenced kptr, since it will be marked as * untrusted, similar to unreferenced kptr. */ - if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) { + if (class != BPF_LDX && + (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) { verbose(env, "store to referenced kptr disallowed\n"); return -EACCES; } @@ -5123,10 +5246,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, * value from map as PTR_TO_BTF_ID, with the correct type. */ mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf, - kptr_field->kptr.btf_id, - rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ? - PTR_MAYBE_NULL | MEM_RCU : - PTR_MAYBE_NULL | PTR_UNTRUSTED); + kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field)); /* For mark_ptr_or_null_reg */ val_reg->id = ++env->id_gen; } else if (class == BPF_STX) { @@ -5180,6 +5300,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, switch (field->type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: if (src != ACCESS_DIRECT) { verbose(env, "kptr cannot be accessed indirectly by helper\n"); return -EACCES; @@ -5647,6 +5768,27 @@ continue_func: for (; i < subprog_end; i++) { int next_insn, sidx; + if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) { + bool err = false; + + if (!is_bpf_throw_kfunc(insn + i)) + continue; + if (subprog[idx].is_cb) + err = true; + for (int c = 0; c < frame && !err; c++) { + if (subprog[ret_prog[c]].is_cb) { + err = true; + break; + } + } + if (!err) + continue; + verbose(env, + "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n", + i, idx); + return -EINVAL; + } + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ @@ -5669,6 +5811,10 @@ continue_func: /* async callbacks don't increase bpf prog stack size unless called directly */ if (!bpf_pseudo_call(insn + i)) continue; + if (subprog[sidx].is_exception_cb) { + verbose(env, "insn %d cannot call exception cb directly\n", i); + return -EINVAL; + } } i = next_insn; idx = sidx; @@ -5690,8 +5836,13 @@ continue_func: * tail call counter throughout bpf2bpf calls combined with tailcalls */ if (tail_call_reachable) - for (j = 0; j < frame; j++) + for (j = 0; j < frame; j++) { + if (subprog[ret_prog[j]].is_exception_cb) { + verbose(env, "cannot tail call within exception cb\n"); + return -EINVAL; + } subprog[ret_prog[j]].tail_call_reachable = true; + } if (subprog[0].tail_call_reachable) env->prog->aux->tail_call_reachable = true; @@ -6207,7 +6358,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, } if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) && - !reg->ref_obj_id) { + !(reg->type & MEM_RCU) && !reg->ref_obj_id) { verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n"); return -EFAULT; } @@ -7318,7 +7469,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, verbose(env, "off=%d doesn't point to kptr\n", kptr_off); return -EACCES; } - if (kptr_field->type != BPF_KPTR_REF) { + if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) { verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off); return -EACCES; } @@ -7751,6 +7902,7 @@ static const struct bpf_reg_types btf_ptr_types = { static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU, + PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU, PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED, } }; @@ -7829,8 +7981,10 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, if (base_type(arg_type) == ARG_PTR_TO_MEM) type &= ~DYNPTR_TYPE_FLAG_MASK; - if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) { type &= ~MEM_ALLOC; + type &= ~MEM_PERCPU; + } for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { expected = compatible->types[i]; @@ -7913,6 +8067,7 @@ found: break; } case PTR_TO_BTF_ID | MEM_ALLOC: + case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC: if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && meta->func_id != BPF_FUNC_kptr_xchg) { verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); @@ -7924,6 +8079,7 @@ found: } break; case PTR_TO_BTF_ID | MEM_PERCPU: + case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU: case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED: /* Handled by helper specific checks */ break; @@ -8900,6 +9056,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn * callbacks */ if (set_callee_state_cb != set_callee_state) { + env->subprog_info[subprog].is_cb = true; if (bpf_pseudo_kfunc_call(insn) && !is_callback_calling_kfunc(insn->imm)) { verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", @@ -9289,7 +9446,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) verbose(env, "to caller at %d:\n", *insn_idx); print_verifier_state(env, caller, true); } - /* clear everything in the callee */ + /* clear everything in the callee. In case of exceptional exits using + * bpf_throw, this will be done by copy_verifier_state for extra frames. */ free_func_state(callee); state->frame[state->curframe--] = NULL; return 0; @@ -9413,17 +9571,17 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return 0; } -static int check_reference_leak(struct bpf_verifier_env *env) +static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) { struct bpf_func_state *state = cur_func(env); bool refs_lingering = false; int i; - if (state->frameno && !state->in_callback_fn) + if (!exception_exit && state->frameno && !state->in_callback_fn) return 0; for (i = 0; i < state->acquired_refs; i++) { - if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) + if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno) continue; verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx); @@ -9530,6 +9688,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn int *insn_idx_p) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); + bool returns_cpu_specific_alloc_ptr = false; const struct bpf_func_proto *fn = NULL; enum bpf_return_type ret_type; enum bpf_type_flag ret_flag; @@ -9640,6 +9799,26 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); + } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) { + u32 ref_obj_id = meta.ref_obj_id; + bool in_rcu = in_rcu_cs(env); + struct bpf_func_state *state; + struct bpf_reg_state *reg; + + err = release_reference_state(cur_func(env), ref_obj_id); + if (!err) { + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->ref_obj_id == ref_obj_id) { + if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) { + reg->ref_obj_id = 0; + reg->type &= ~MEM_ALLOC; + reg->type |= MEM_RCU; + } else { + mark_reg_invalid(env, reg); + } + } + })); + } } else if (meta.ref_obj_id) { err = release_reference(env, meta.ref_obj_id); } else if (register_is_null(®s[meta.release_regno])) { @@ -9657,7 +9836,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn switch (func_id) { case BPF_FUNC_tail_call: - err = check_reference_leak(env); + err = check_reference_leak(env, false); if (err) { verbose(env, "tail_call would lead to reference leak\n"); return err; @@ -9768,6 +9947,23 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; } + case BPF_FUNC_per_cpu_ptr: + case BPF_FUNC_this_cpu_ptr: + { + struct bpf_reg_state *reg = ®s[BPF_REG_1]; + const struct btf_type *type; + + if (reg->type & MEM_RCU) { + type = btf_type_by_id(reg->btf, reg->btf_id); + if (!type || !btf_type_is_struct(type)) { + verbose(env, "Helper has invalid btf/btf_id in R1\n"); + return -EFAULT; + } + returns_cpu_specific_alloc_ptr = true; + env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true; + } + break; + } case BPF_FUNC_user_ringbuf_drain: err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_user_ringbuf_callback_state); @@ -9857,14 +10053,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = tsize; } else { - /* MEM_RDONLY may be carried from ret_flag, but it - * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise - * it will confuse the check of PTR_TO_BTF_ID in - * check_mem_access(). - */ - ret_flag &= ~MEM_RDONLY; + if (returns_cpu_specific_alloc_ptr) { + regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU; + } else { + /* MEM_RDONLY may be carried from ret_flag, but it + * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise + * it will confuse the check of PTR_TO_BTF_ID in + * check_mem_access(). + */ + ret_flag &= ~MEM_RDONLY; + regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + } - regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; regs[BPF_REG_0].btf = meta.ret_btf; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } @@ -9880,8 +10080,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (func_id == BPF_FUNC_kptr_xchg) { ret_btf = meta.kptr_field->kptr.btf; ret_btf_id = meta.kptr_field->kptr.btf_id; - if (!btf_is_kernel(ret_btf)) + if (!btf_is_kernel(ret_btf)) { regs[BPF_REG_0].type |= MEM_ALLOC; + if (meta.kptr_field->type == BPF_KPTR_PERCPU) + regs[BPF_REG_0].type |= MEM_PERCPU; + } } else { if (fn->ret_btf_id == BPF_PTR_POISON) { verbose(env, "verifier internal error:"); @@ -10266,6 +10469,9 @@ enum special_kfunc_type { KF_bpf_dynptr_slice, KF_bpf_dynptr_slice_rdwr, KF_bpf_dynptr_clone, + KF_bpf_percpu_obj_new_impl, + KF_bpf_percpu_obj_drop_impl, + KF_bpf_throw, }; BTF_SET_START(special_kfunc_set) @@ -10286,6 +10492,9 @@ BTF_ID(func, bpf_dynptr_from_xdp) BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) +BTF_ID(func, bpf_percpu_obj_new_impl) +BTF_ID(func, bpf_percpu_obj_drop_impl) +BTF_ID(func, bpf_throw) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -10308,6 +10517,9 @@ BTF_ID(func, bpf_dynptr_from_xdp) BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) +BTF_ID(func, bpf_percpu_obj_new_impl) +BTF_ID(func, bpf_percpu_obj_drop_impl) +BTF_ID(func, bpf_throw) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -10625,6 +10837,12 @@ static bool is_callback_calling_kfunc(u32 btf_id) return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } +static bool is_bpf_throw_kfunc(struct bpf_insn *insn) +{ + return bpf_pseudo_kfunc_call(insn) && insn->off == 0 && + insn->imm == special_kfunc_list[KF_bpf_throw]; +} + static bool is_rbtree_lock_required_kfunc(u32 btf_id) { return is_bpf_rbtree_api_kfunc(btf_id); @@ -11002,7 +11220,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } break; case KF_ARG_PTR_TO_ALLOC_BTF_ID: - if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) { + if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) { + verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i); + return -EINVAL; + } + } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) { + if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) { + verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i); + return -EINVAL; + } + } else { verbose(env, "arg#%d expected pointer to allocated object\n", i); return -EINVAL; } @@ -11010,8 +11238,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ verbose(env, "allocated object must be referenced\n"); return -EINVAL; } - if (meta->btf == btf_vmlinux && - meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + if (meta->btf == btf_vmlinux) { meta->arg_btf = reg->btf; meta->arg_btf_id = reg->btf_id; } @@ -11202,6 +11429,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; } case KF_ARG_PTR_TO_CALLBACK: + if (reg->type != PTR_TO_FUNC) { + verbose(env, "arg%d expected pointer to func\n", i); + return -EINVAL; + } meta->subprogno = reg->subprogno; break; case KF_ARG_PTR_TO_REFCOUNTED_KPTR: @@ -11280,6 +11511,8 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env, return 0; } +static int check_return_code(struct bpf_verifier_env *env, int regno); + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -11401,6 +11634,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } + if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { + if (!bpf_jit_supports_exceptions()) { + verbose(env, "JIT does not support calling kfunc %s#%d\n", + func_name, meta.func_id); + return -ENOTSUPP; + } + env->seen_exception = true; + + /* In the case of the default callback, the cookie value passed + * to bpf_throw becomes the return value of the program. + */ + if (!env->exception_callback_subprog) { + err = check_return_code(env, BPF_REG_1); + if (err < 0) + return err; + } + } + for (i = 0; i < CALLER_SAVED_REGS; i++) mark_reg_not_init(env, regs, caller_saved[i]); @@ -11411,6 +11662,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* Only exception is bpf_obj_new_impl */ if (meta.btf != btf_vmlinux || (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] && + meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] && meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) { verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); return -EINVAL; @@ -11424,11 +11676,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) { - if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { + if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] || + meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { + struct btf_struct_meta *struct_meta; struct btf *ret_btf; u32 ret_btf_id; - if (unlikely(!bpf_global_ma_set)) + if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set) + return -ENOMEM; + + if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set) return -ENOMEM; if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) { @@ -11441,24 +11698,38 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* This may be NULL due to user not supplying a BTF */ if (!ret_btf) { - verbose(env, "bpf_obj_new requires prog BTF\n"); + verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n"); return -EINVAL; } ret_t = btf_type_by_id(ret_btf, ret_btf_id); if (!ret_t || !__btf_type_is_struct(ret_t)) { - verbose(env, "bpf_obj_new type ID argument must be of a struct\n"); + verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n"); return -EINVAL; } + struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); + if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { + if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) { + verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n"); + return -EINVAL; + } + + if (struct_meta) { + verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n"); + return -EINVAL; + } + } + mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; + if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) + regs[BPF_REG_0].type |= MEM_PERCPU; insn_aux->obj_new_size = ret_t->size; - insn_aux->kptr_struct_meta = - btf_find_struct_meta(ret_btf, ret_btf_id); + insn_aux->kptr_struct_meta = struct_meta; } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; @@ -11595,7 +11866,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].id = ++env->id_gen; } else if (btf_type_is_void(t)) { if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) { - if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { + if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] || + meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) { insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); @@ -14135,6 +14407,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, !sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); *insn_idx += insn->off; return 0; } else if (pred == 0) { @@ -14147,6 +14421,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx + insn->off + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); return 0; } @@ -14425,7 +14701,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) * gen_ld_abs() may terminate the program at runtime, leading to * reference leak. */ - err = check_reference_leak(env); + err = check_reference_leak(env, false); if (err) { verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); return err; @@ -14474,7 +14750,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } -static int check_return_code(struct bpf_verifier_env *env) +static int check_return_code(struct bpf_verifier_env *env, int regno) { struct tnum enforce_attach_type_range = tnum_unknown; const struct bpf_prog *prog = env->prog; @@ -14486,7 +14762,7 @@ static int check_return_code(struct bpf_verifier_env *env) const bool is_subprog = frame->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ - if (!is_subprog) { + if (!is_subprog || frame->in_exception_callback_fn) { switch (prog_type) { case BPF_PROG_TYPE_LSM: if (prog->expected_attach_type == BPF_LSM_CGROUP) @@ -14508,22 +14784,22 @@ static int check_return_code(struct bpf_verifier_env *env) * of bpf_exit, which means that program wrote * something into it earlier */ - err = check_reg_arg(env, BPF_REG_0, SRC_OP); + err = check_reg_arg(env, regno, SRC_OP); if (err) return err; - if (is_pointer_value(env, BPF_REG_0)) { - verbose(env, "R0 leaks addr as return value\n"); + if (is_pointer_value(env, regno)) { + verbose(env, "R%d leaks addr as return value\n", regno); return -EACCES; } - reg = cur_regs(env) + BPF_REG_0; + reg = cur_regs(env) + regno; if (frame->in_async_callback_fn) { /* enforce return zero from async callbacks like timer */ if (reg->type != SCALAR_VALUE) { - verbose(env, "In async callback the register R0 is not a known value (%s)\n", - reg_type_str(env, reg->type)); + verbose(env, "In async callback the register R%d is not a known value (%s)\n", + regno, reg_type_str(env, reg->type)); return -EINVAL; } @@ -14534,10 +14810,10 @@ static int check_return_code(struct bpf_verifier_env *env) return 0; } - if (is_subprog) { + if (is_subprog && !frame->in_exception_callback_fn) { if (reg->type != SCALAR_VALUE) { - verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", - reg_type_str(env, reg->type)); + verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n", + regno, reg_type_str(env, reg->type)); return -EINVAL; } return 0; @@ -14547,10 +14823,13 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME || env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || - env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) + env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME || + env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME) range = tnum_range(1, 1); if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND || env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND) @@ -14619,8 +14898,8 @@ static int check_return_code(struct bpf_verifier_env *env) } if (reg->type != SCALAR_VALUE) { - verbose(env, "At program exit the register R0 is not a known value (%s)\n", - reg_type_str(env, reg->type)); + verbose(env, "At program exit the register R%d is not a known value (%s)\n", + regno, reg_type_str(env, reg->type)); return -EINVAL; } @@ -14891,8 +15170,8 @@ static int check_cfg(struct bpf_verifier_env *env) { int insn_cnt = env->prog->len; int *insn_stack, *insn_state; - int ret = 0; - int i; + int ex_insn_beg, i, ret = 0; + bool ex_done = false; insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) @@ -14908,6 +15187,7 @@ static int check_cfg(struct bpf_verifier_env *env) insn_stack[0] = 0; /* 0 is the first instruction */ env->cfg.cur_stack = 1; +walk_cfg: while (env->cfg.cur_stack > 0) { int t = insn_stack[env->cfg.cur_stack - 1]; @@ -14934,6 +15214,16 @@ static int check_cfg(struct bpf_verifier_env *env) goto err_free; } + if (env->exception_callback_subprog && !ex_done) { + ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start; + + insn_state[ex_insn_beg] = DISCOVERED; + insn_stack[0] = ex_insn_beg; + env->cfg.cur_stack = 1; + ex_done = true; + goto walk_cfg; + } + for (i = 0; i < insn_cnt; i++) { if (insn_state[i] != EXPLORED) { verbose(env, "unreachable insn %d\n", i); @@ -14971,20 +15261,18 @@ static int check_abnormal_return(struct bpf_verifier_env *env) #define MIN_BPF_FUNCINFO_SIZE 8 #define MAX_FUNCINFO_REC_SIZE 252 -static int check_btf_func(struct bpf_verifier_env *env, - const union bpf_attr *attr, - bpfptr_t uattr) +static int check_btf_func_early(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) { - const struct btf_type *type, *func_proto, *ret_type; - u32 i, nfuncs, urec_size, min_size; u32 krec_size = sizeof(struct bpf_func_info); + const struct btf_type *type, *func_proto; + u32 i, nfuncs, urec_size, min_size; struct bpf_func_info *krecord; - struct bpf_func_info_aux *info_aux = NULL; struct bpf_prog *prog; const struct btf *btf; - bpfptr_t urecord; u32 prev_offset = 0; - bool scalar_return; + bpfptr_t urecord; int ret = -ENOMEM; nfuncs = attr->func_info_cnt; @@ -14994,11 +15282,6 @@ static int check_btf_func(struct bpf_verifier_env *env, return 0; } - if (nfuncs != env->subprog_cnt) { - verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); - return -EINVAL; - } - urec_size = attr->func_info_rec_size; if (urec_size < MIN_BPF_FUNCINFO_SIZE || urec_size > MAX_FUNCINFO_REC_SIZE || @@ -15016,9 +15299,6 @@ static int check_btf_func(struct bpf_verifier_env *env, krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); if (!krecord) return -ENOMEM; - info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); - if (!info_aux) - goto err_free; for (i = 0; i < nfuncs; i++) { ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); @@ -15057,11 +15337,6 @@ static int check_btf_func(struct bpf_verifier_env *env, goto err_free; } - if (env->subprog_info[i].start != krecord[i].insn_off) { - verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); - goto err_free; - } - /* check type_id */ type = btf_type_by_id(btf, krecord[i].type_id); if (!type || !btf_type_is_func(type)) { @@ -15069,12 +15344,77 @@ static int check_btf_func(struct bpf_verifier_env *env, krecord[i].type_id); goto err_free; } - info_aux[i].linkage = BTF_INFO_VLEN(type->info); func_proto = btf_type_by_id(btf, type->type); if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) /* btf_func_check() already verified it during BTF load */ goto err_free; + + prev_offset = krecord[i].insn_off; + bpfptr_add(&urecord, urec_size); + } + + prog->aux->func_info = krecord; + prog->aux->func_info_cnt = nfuncs; + return 0; + +err_free: + kvfree(krecord); + return ret; +} + +static int check_btf_func(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) +{ + const struct btf_type *type, *func_proto, *ret_type; + u32 i, nfuncs, urec_size; + struct bpf_func_info *krecord; + struct bpf_func_info_aux *info_aux = NULL; + struct bpf_prog *prog; + const struct btf *btf; + bpfptr_t urecord; + bool scalar_return; + int ret = -ENOMEM; + + nfuncs = attr->func_info_cnt; + if (!nfuncs) { + if (check_abnormal_return(env)) + return -EINVAL; + return 0; + } + if (nfuncs != env->subprog_cnt) { + verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); + return -EINVAL; + } + + urec_size = attr->func_info_rec_size; + + prog = env->prog; + btf = prog->aux->btf; + + urecord = make_bpfptr(attr->func_info, uattr.is_kernel); + + krecord = prog->aux->func_info; + info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); + if (!info_aux) + return -ENOMEM; + + for (i = 0; i < nfuncs; i++) { + /* check insn_off */ + ret = -EINVAL; + + if (env->subprog_info[i].start != krecord[i].insn_off) { + verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); + goto err_free; + } + + /* Already checked type_id */ + type = btf_type_by_id(btf, krecord[i].type_id); + info_aux[i].linkage = BTF_INFO_VLEN(type->info); + /* Already checked func_proto */ + func_proto = btf_type_by_id(btf, type->type); + ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); scalar_return = btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type); @@ -15087,17 +15427,13 @@ static int check_btf_func(struct bpf_verifier_env *env, goto err_free; } - prev_offset = krecord[i].insn_off; bpfptr_add(&urecord, urec_size); } - prog->aux->func_info = krecord; - prog->aux->func_info_cnt = nfuncs; prog->aux->func_info_aux = info_aux; return 0; err_free: - kvfree(krecord); kfree(info_aux); return ret; } @@ -15110,7 +15446,8 @@ static void adjust_btf_func(struct bpf_verifier_env *env) if (!aux->func_info) return; - for (i = 0; i < env->subprog_cnt; i++) + /* func_info is not available for hidden subprogs */ + for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++) aux->func_info[i].insn_off = env->subprog_info[i].start; } @@ -15314,9 +15651,9 @@ static int check_core_relo(struct bpf_verifier_env *env, return err; } -static int check_btf_info(struct bpf_verifier_env *env, - const union bpf_attr *attr, - bpfptr_t uattr) +static int check_btf_info_early(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) { struct btf *btf; int err; @@ -15336,6 +15673,24 @@ static int check_btf_info(struct bpf_verifier_env *env, } env->prog->aux->btf = btf; + err = check_btf_func_early(env, attr, uattr); + if (err) + return err; + return 0; +} + +static int check_btf_info(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) +{ + int err; + + if (!attr->func_info_cnt && !attr->line_info_cnt) { + if (check_abnormal_return(env)) + return -EINVAL; + return 0; + } + err = check_btf_func(env, attr, uattr); if (err) return err; @@ -16438,6 +16793,7 @@ static int do_check(struct bpf_verifier_env *env) int prev_insn_idx = -1; for (;;) { + bool exception_exit = false; struct bpf_insn *insn; u8 class; int err; @@ -16652,12 +17008,17 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } } - if (insn->src_reg == BPF_PSEUDO_CALL) + if (insn->src_reg == BPF_PSEUDO_CALL) { err = check_func_call(env, insn, &env->insn_idx); - else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { err = check_kfunc_call(env, insn, &env->insn_idx); - else + if (!err && is_bpf_throw_kfunc(insn)) { + exception_exit = true; + goto process_bpf_exit_full; + } + } else { err = check_helper_call(env, insn, &env->insn_idx); + } if (err) return err; @@ -16687,7 +17048,7 @@ static int do_check(struct bpf_verifier_env *env) verbose(env, "BPF_EXIT uses reserved fields\n"); return -EINVAL; } - +process_bpf_exit_full: if (env->cur_state->active_lock.ptr && !in_rbtree_lock_required_cb(env)) { verbose(env, "bpf_spin_unlock is missing\n"); @@ -16706,10 +17067,23 @@ static int do_check(struct bpf_verifier_env *env) * function, for which reference_state must * match caller reference state when it exits. */ - err = check_reference_leak(env); + err = check_reference_leak(env, exception_exit); if (err) return err; + /* The side effect of the prepare_func_exit + * which is being skipped is that it frees + * bpf_func_state. Typically, process_bpf_exit + * will only be hit with outermost exit. + * copy_verifier_state in pop_stack will handle + * freeing of any extra bpf_func_state left over + * from not processing all nested function + * exits. We also skip return code checks as + * they are not needed for exceptional exits. + */ + if (exception_exit) + goto process_bpf_exit; + if (state->curframe) { /* exit from nested function */ err = prepare_func_exit(env, &env->insn_idx); @@ -16719,7 +17093,7 @@ static int do_check(struct bpf_verifier_env *env) continue; } - err = check_return_code(env); + err = check_return_code(env, BPF_REG_0); if (err) return err; process_bpf_exit: @@ -18012,6 +18386,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) } func[i]->aux->num_exentries = num_exentries; func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; + func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; + if (!i) + func[i]->aux->exception_boundary = env->seen_exception; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -18051,7 +18428,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) * the call instruction, as an index for this list */ func[i]->aux->func = func; - func[i]->aux->func_cnt = env->subprog_cnt; + func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; + func[i]->aux->real_func_cnt = env->subprog_cnt; } for (i = 0; i < env->subprog_cnt; i++) { old_bpf_func = func[i]->bpf_func; @@ -18097,7 +18475,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->aux->extable = func[0]->aux->extable; prog->aux->num_exentries = func[0]->aux->num_exentries; prog->aux->func = func; - prog->aux->func_cnt = env->subprog_cnt; + prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt; + prog->aux->real_func_cnt = env->subprog_cnt; + prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func; + prog->aux->exception_boundary = func[0]->aux->exception_boundary; bpf_prog_jit_attempt_done(prog); return 0; out_free: @@ -18264,21 +18645,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn->imm = BPF_CALL_IMM(desc->addr); if (insn->off) return 0; - if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) { + if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] || + desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) { struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size; + if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) { + verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n", + insn_idx); + return -EFAULT; + } + insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size); insn_buf[1] = addr[0]; insn_buf[2] = addr[1]; insn_buf[3] = *insn; *cnt = 4; } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] || + desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] || desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; + if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) { + verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n", + insn_idx); + return -EFAULT; + } + if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && !kptr_struct_meta) { verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n", @@ -18319,6 +18714,33 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return 0; } +/* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */ +static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len) +{ + struct bpf_subprog_info *info = env->subprog_info; + int cnt = env->subprog_cnt; + struct bpf_prog *prog; + + /* We only reserve one slot for hidden subprogs in subprog_info. */ + if (env->hidden_subprog_cnt) { + verbose(env, "verifier internal error: only one hidden subprog supported\n"); + return -EFAULT; + } + /* We're not patching any existing instruction, just appending the new + * ones for the hidden subprog. Hence all of the adjustment operations + * in bpf_patch_insn_data are no-ops. + */ + prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len); + if (!prog) + return -ENOMEM; + env->prog = prog; + info[cnt + 1].start = info[cnt].start; + info[cnt].start = prog->len - len + 1; + env->subprog_cnt++; + env->hidden_subprog_cnt++; + return 0; +} + /* Do various post-verification rewrites in a single program pass. * These rewrites simplify JIT and interpreter implementations. */ @@ -18337,6 +18759,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env) struct bpf_map *map_ptr; int i, ret, cnt, delta = 0; + if (env->seen_exception && !env->exception_callback_subprog) { + struct bpf_insn patch[] = { + env->prog->insnsi[insn_cnt - 1], + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }; + + ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch)); + if (ret < 0) + return ret; + prog = env->prog; + insn = prog->insnsi; + + env->exception_callback_subprog = env->subprog_cnt - 1; + /* Don't update insn_cnt, as add_hidden_subprog always appends insns */ + env->subprog_info[env->exception_callback_subprog].is_cb = true; + env->subprog_info[env->exception_callback_subprog].is_async_cb = true; + env->subprog_info[env->exception_callback_subprog].is_exception_cb = true; + } + for (i = 0; i < insn_cnt; i++, insn++) { /* Make divide-by-zero exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || @@ -18606,6 +19048,25 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto patch_call_imm; } + /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */ + if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) { + /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data, + * bpf_mem_alloc() returns a ptr to the percpu data ptr. + */ + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); + insn_buf[1] = *insn; + cnt = 2; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto patch_call_imm; + } + /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup * and other inlining handlers are currently limited to 64 bit * only. @@ -19015,7 +19476,7 @@ static void free_states(struct bpf_verifier_env *env) } } -static int do_check_common(struct bpf_verifier_env *env, int subprog) +static int do_check_common(struct bpf_verifier_env *env, int subprog, bool is_ex_cb) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); struct bpf_verifier_state *state; @@ -19046,7 +19507,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) regs = state->frame[state->curframe]->regs; if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { - ret = btf_prepare_func_args(env, subprog, regs); + ret = btf_prepare_func_args(env, subprog, regs, is_ex_cb); if (ret) goto out; for (i = BPF_REG_1; i <= BPF_REG_5; i++) { @@ -19062,6 +19523,12 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) regs[i].id = ++env->id_gen; } } + if (is_ex_cb) { + state->frame[0]->in_exception_callback_fn = true; + env->subprog_info[subprog].is_cb = true; + env->subprog_info[subprog].is_async_cb = true; + env->subprog_info[subprog].is_exception_cb = true; + } } else { /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; @@ -19126,7 +19593,7 @@ static int do_check_subprogs(struct bpf_verifier_env *env) continue; env->insn_idx = env->subprog_info[i].start; WARN_ON_ONCE(env->insn_idx == 0); - ret = do_check_common(env, i); + ret = do_check_common(env, i, env->exception_callback_subprog == i); if (ret) { return ret; } else if (env->log.level & BPF_LOG_LEVEL) { @@ -19143,7 +19610,7 @@ static int do_check_main(struct bpf_verifier_env *env) int ret; env->insn_idx = 0; - ret = do_check_common(env, 0); + ret = do_check_common(env, 0, false); if (!ret) env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; return ret; @@ -19312,6 +19779,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, bpf_log(log, "Subprog %s doesn't exist\n", tname); return -EINVAL; } + if (aux->func && aux->func[subprog]->aux->exception_cb) { + bpf_log(log, + "%s programs cannot attach to exception callback\n", + prog_extension ? "Extension" : "FENTRY/FEXIT"); + return -EINVAL; + } conservative = aux->func_info_aux[subprog].unreliable; if (prog_extension) { if (conservative) { @@ -19641,6 +20114,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (!tr) return -ENOMEM; + if (tgt_prog && tgt_prog->aux->tail_call_reachable) + tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX; + prog->aux->dst_trampoline = tr; return 0; } @@ -19736,6 +20212,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (!env->explored_states) goto skip_full_check; + ret = check_btf_info_early(env, attr, uattr); + if (ret < 0) + goto skip_full_check; + ret = add_subprog_and_kfunc(env); if (ret < 0) goto skip_full_check; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 868008f56fec..df697c74d519 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -117,6 +117,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) * and don't send kprobe event into ring-buffer, * so return zero here */ + rcu_read_lock(); + bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); + rcu_read_unlock(); ret = 0; goto out; } @@ -2384,7 +2387,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr) + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed) { bool is_tracepoint, is_syscall_tp; struct bpf_prog *prog; @@ -2419,7 +2423,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, #ifdef CONFIG_KPROBE_EVENTS if (flags & TRACE_EVENT_FL_KPROBE) err = bpf_get_kprobe_info(event, fd_type, buf, - probe_offset, probe_addr, + probe_offset, probe_addr, missed, event->attr.type == PERF_TYPE_TRACEPOINT); #endif #ifdef CONFIG_UPROBE_EVENTS @@ -2614,6 +2618,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; info->kprobe_multi.flags = kmulti_link->flags; + info->kprobe_multi.missed = kmulti_link->fp.nmissed; if (!uaddrs) return 0; @@ -2710,6 +2715,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, int err; if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + bpf_prog_inc_misses_counter(link->link.prog); err = 0; goto out; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e834f149695b..a3442db35670 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1249,6 +1249,12 @@ static const struct file_operations kprobe_events_ops = { .write = probes_write, }; +static unsigned long trace_kprobe_missed(struct trace_kprobe *tk) +{ + return trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; +} + /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { @@ -1260,8 +1266,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) return 0; tk = to_trace_kprobe(ev); - nmissed = trace_kprobe_is_return(tk) ? - tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; + nmissed = trace_kprobe_missed(tk); seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), @@ -1607,7 +1612,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, - u64 *probe_addr, bool perf_type_tracepoint) + u64 *probe_addr, unsigned long *missed, + bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; @@ -1626,6 +1632,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, *probe_addr = kallsyms_show_value(current_cred()) ? (unsigned long)tk->rp.kp.addr : 0; *symbol = tk->symbol; + if (missed) + *missed = trace_kprobe_missed(tk); return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index de753403cdaf..9c581d6da843 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -556,7 +556,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long args[SYSCALL_DEFINE_MAXARGS]; } __aligned(8) param; int i; @@ -661,7 +661,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long ret; } __aligned(8) param; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index ecde4216201e..7916503e6a6a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5111,6 +5111,104 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffffffff } } }, + /* MOVSX32 */ + { + "ALU_MOVSX | BPF_B", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x00000000ffffffefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX32_REG(R1, R3, 8), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_MOVSX | BPF_H", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x00000000ffffbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX32_REG(R1, R3, 16), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_MOVSX | BPF_W", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x00000000deadbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX32_REG(R1, R3, 32), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* MOVSX64 REG */ + { + "ALU64_MOVSX | BPF_B", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX64_REG(R1, R3, 8), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOVSX | BPF_H", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX64_REG(R1, R3, 16), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOVSX | BPF_W", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffdeadbeefLL), + BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), + BPF_MOVSX64_REG(R1, R3, 32), + BPF_JMP_REG(BPF_JEQ, R2, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, /* BPF_ALU | BPF_ADD | BPF_X */ { "ALU_ADD_X: 1 + 2 = 3", @@ -6105,6 +6203,106 @@ static struct bpf_test tests[] = { { }, { { 0, 2 } }, }, + /* BPF_ALU | BPF_DIV | BPF_X off=1 (SDIV) */ + { + "ALU_SDIV_X: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG_OFF(BPF_DIV, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_K off=1 (SDIV) */ + { + "ALU_SDIV_K: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU32_IMM_OFF(BPF_DIV, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU64 | BPF_DIV | BPF_X off=1 (SDIV64) */ + { + "ALU64_SDIV_X: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG_OFF(BPF_DIV, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU64 | BPF_DIV | BPF_K off=1 (SDIV64) */ + { + "ALU64_SDIV_K: -6 / 2 = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, -6), + BPF_ALU64_IMM_OFF(BPF_DIV, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_X off=1 (SMOD) */ + { + "ALU_SMOD_X: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG_OFF(BPF_MOD, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_K off=1 (SMOD) */ + { + "ALU_SMOD_K: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU32_IMM_OFF(BPF_MOD, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU64 | BPF_MOD | BPF_X off=1 (SMOD64) */ + { + "ALU64_SMOD_X: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG_OFF(BPF_MOD, R0, R1, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU64 | BPF_MOD | BPF_K off=1 (SMOD64) */ + { + "ALU64_SMOD_X: -7 % 2 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, -7), + BPF_ALU64_IMM_OFF(BPF_MOD, R0, 2, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, /* BPF_ALU | BPF_AND | BPF_X */ { "ALU_AND_X: 3 & 2 = 2", @@ -7837,6 +8035,104 @@ static struct bpf_test tests[] = { { }, { { 0, (u32) (cpu_to_le64(0xfedcba9876543210ULL) >> 32) } }, }, + /* BSWAP */ + { + "BSWAP 16: 0x0123456789abcdef -> 0xefcd", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xefcd } }, + }, + { + "BSWAP 32: 0x0123456789abcdef -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 32), + BPF_ALU64_REG(BPF_MOV, R1, R0), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xefcdab89 } }, + }, + { + "BSWAP 64: 0x0123456789abcdef -> 0x67452301", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x67452301 } }, + }, + { + "BSWAP 64: 0x0123456789abcdef >> 32 -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_BSWAP(R0, 64), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xefcdab89 } }, + }, + /* BSWAP, reversed */ + { + "BSWAP 16: 0xfedcba9876543210 -> 0x1032", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1032 } }, + }, + { + "BSWAP 32: 0xfedcba9876543210 -> 0x10325476", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 32), + BPF_ALU64_REG(BPF_MOV, R1, R0), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x10325476 } }, + }, + { + "BSWAP 64: 0xfedcba9876543210 -> 0x98badcfe", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x98badcfe } }, + }, + { + "BSWAP 64: 0xfedcba9876543210 >> 32 -> 0x10325476", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xfedcba9876543210ULL), + BPF_BSWAP(R0, 64), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x10325476 } }, + }, /* BPF_LDX_MEM B/H/W/DW */ { "BPF_LDX_MEM | BPF_B, base", @@ -8228,6 +8524,67 @@ static struct bpf_test tests[] = { { { 32, 0 } }, .stack_depth = 0, }, + /* BPF_LDX_MEMSX B/H/W */ + { + "BPF_LDX_MEMSX | BPF_B", + .u.insns_int = { + BPF_LD_IMM64(R1, 0xdead0000000000f0ULL), + BPF_LD_IMM64(R2, 0xfffffffffffffff0ULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), +#ifdef __BIG_ENDIAN + BPF_LDX_MEMSX(BPF_B, R0, R10, -1), +#else + BPF_LDX_MEMSX(BPF_B, R0, R10, -8), +#endif + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, + { + "BPF_LDX_MEMSX | BPF_H", + .u.insns_int = { + BPF_LD_IMM64(R1, 0xdead00000000f123ULL), + BPF_LD_IMM64(R2, 0xfffffffffffff123ULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), +#ifdef __BIG_ENDIAN + BPF_LDX_MEMSX(BPF_H, R0, R10, -2), +#else + BPF_LDX_MEMSX(BPF_H, R0, R10, -8), +#endif + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, + { + "BPF_LDX_MEMSX | BPF_W", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x00000000deadbeefULL), + BPF_LD_IMM64(R2, 0xffffffffdeadbeefULL), + BPF_STX_MEM(BPF_DW, R10, R1, -8), +#ifdef __BIG_ENDIAN + BPF_LDX_MEMSX(BPF_W, R0, R10, -4), +#else + BPF_LDX_MEMSX(BPF_W, R0, R10, -8), +#endif + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 8, + }, /* BPF_STX_MEM B/H/W/DW */ { "BPF_STX_MEM | BPF_B", @@ -9474,6 +9831,20 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + /* BPF_JMP32 | BPF_JA */ + { + "JMP32_JA: Unconditional jump: if (true) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP32_IMM(BPF_JA, 0, 1, 0), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JSLT | BPF_K */ { "JMP_JSLT_K: Signed jump: if (-2 < -1) return 1", diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d37831b8511c..8b06bab5c406 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -562,7 +562,6 @@ void kasan_restore_multi_shot(bool enabled); * code. Declared here to avoid warnings about missing declarations. */ -asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); void __asan_register_globals(void *globals, ssize_t size); void __asan_unregister_globals(void *globals, ssize_t size); void __asan_handle_no_return(void); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index faabad6603db..f263f7e91a21 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1136,6 +1136,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc) GFP_KERNEL); if (!monc->monmap) return -ENOMEM; + monc->monmap->num_mon = num_mon; for (i = 0; i < num_mon; i++) { struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i]; @@ -1147,7 +1148,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) inst->name.type = CEPH_ENTITY_TYPE_MON; inst->name.num = cpu_to_le64(i); } - monc->monmap->num_mon = num_mon; return 0; } diff --git a/net/core/dev.c b/net/core/dev.c index 9f3f8930c691..17ba4c5be97e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9052,6 +9052,28 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); +static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ +#if IS_ENABLED(CONFIG_DPLL) + rtnl_lock(); + dev->dpll_pin = dpll_pin; + rtnl_unlock(); +#endif +} + +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + netdev_dpll_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(netdev_dpll_pin_set); + +void netdev_dpll_pin_clear(struct net_device *dev) +{ + netdev_dpll_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(netdev_dpll_pin_clear); + /** * dev_change_proto_down - set carrier according to proto_down. * diff --git a/net/core/dst.c b/net/core/dst.c index 980e2fd2f013..6838d3212c37 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -45,7 +45,7 @@ const struct dst_metrics dst_default_metrics = { EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags) { dst->dev = dev; @@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->tclassid = 0; #endif dst->lwtstate = NULL; - rcuref_init(&dst->__rcuref, initial_ref); + rcuref_init(&dst->__rcuref, 1); INIT_LIST_HEAD(&dst->rt_uncached); dst->__use = 0; dst->lastuse = jiffies; @@ -77,7 +77,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, EXPORT_SYMBOL(dst_init); void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, unsigned short flags) + int initial_obsolete, unsigned short flags) { struct dst_entry *dst; @@ -90,7 +90,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, if (!dst) return NULL; - dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags); + dst_init(dst, ops, dev, initial_obsolete, flags); return dst; } @@ -270,7 +270,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, struct dst_entry *dst; dst = &md_dst->dst; - dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, + dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE, DST_METADATA | DST_NOCOUNT); memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); md_dst->type = type; diff --git a/net/core/filter.c b/net/core/filter.c index a094694899c9..cc2e4babc85f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,7 @@ #include <net/xdp.h> #include <net/mptcp.h> #include <net/netfilter/nf_conntrack_bpf.h> +#include <linux/un.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -5850,6 +5851,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -5992,6 +5996,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -6010,7 +6026,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) @@ -7858,14 +7875,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7876,14 +7898,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_getsockopt_proto; default: return NULL; @@ -8931,8 +8958,8 @@ static bool sock_addr_is_valid_access(int off, int size, if (off % size != 0) return false; - /* Disallow access to IPv6 fields from IPv4 contex and vise - * versa. + /* Disallow access to fields not belonging to the attach type's address + * family. */ switch (off) { case bpf_ctx_range(struct bpf_sock_addr, user_ip4): @@ -11752,6 +11779,27 @@ __bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, return 0; } + +__bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const u8 *sun_path, u32 sun_path__sz) +{ + struct sockaddr_un *un; + + if (sa_kern->sk->sk_family != AF_UNIX) + return -EINVAL; + + /* We do not allow changing the address to unnamed or larger than the + * maximum allowed address size for a unix sockaddr. + */ + if (sun_path__sz == 0 || sun_path__sz > UNIX_PATH_MAX) + return -EINVAL; + + un = (struct sockaddr_un *)sa_kern->uaddr; + memcpy(un->sun_path, sun_path, sun_path__sz); + sa_kern->uaddrlen = offsetof(struct sockaddr_un, sun_path) + sun_path__sz; + + return 0; +} __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11776,6 +11824,10 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_SET8_END(bpf_kfunc_check_set_xdp) +BTF_SET8_START(bpf_kfunc_check_set_sock_addr) +BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) +BTF_SET8_END(bpf_kfunc_check_set_sock_addr) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -11786,6 +11838,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = { .set = &bpf_kfunc_check_set_xdp, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_addr, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11800,7 +11857,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + &bpf_kfunc_set_sock_addr); } late_initcall(bpf_kfunc_init); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index c1aea8b756b6..fe61f85bcf33 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -5,6 +5,7 @@ #include <linux/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <net/xdp.h> #include "netdev-genl-gen.h" @@ -12,15 +13,24 @@ static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { + u64 xdp_rx_meta = 0; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; +#define XDP_METADATA_KFUNC(_, flag, __, xmo) \ + if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \ + xdp_rx_meta |= flag; +XDP_METADATA_KFUNC_xxx +#undef XDP_METADATA_KFUNC + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, - netdev->xdp_features, NETDEV_A_DEV_PAD)) { + netdev->xdp_features, NETDEV_A_DEV_PAD) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + xdp_rx_meta, NETDEV_A_DEV_PAD)) { genlmsg_cancel(rsp, hdr); return -EINVAL; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 53c377d054f0..4363f913ce7c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -57,6 +57,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/addrconf.h> #endif +#include <linux/dpll.h> #include "dev.h" @@ -1055,6 +1056,15 @@ static size_t rtnl_devlink_port_size(const struct net_device *dev) return size; } +static size_t rtnl_dpll_pin_size(const struct net_device *dev) +{ + size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ + + size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev)); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1111,6 +1121,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_prop_list_size(dev) + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + rtnl_devlink_port_size(dev) + + rtnl_dpll_pin_size(dev) + 0; } @@ -1774,6 +1785,28 @@ nest_cancel: return ret; } +static int rtnl_fill_dpll_pin(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *dpll_pin_nest; + int ret; + + dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN); + if (!dpll_pin_nest) + return -EMSGSIZE; + + ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev)); + if (ret < 0) + goto nest_cancel; + + nla_nest_end(skb, dpll_pin_nest); + return 0; + +nest_cancel: + nla_nest_cancel(skb, dpll_pin_nest); + return ret; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1916,6 +1949,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (rtnl_fill_devlink_port(skb, dev)) goto nla_put_failure; + if (rtnl_fill_dpll_pin(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4eaf7ed0d1f4..2198979470ec 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -847,6 +847,8 @@ EXPORT_SYMBOL(__napi_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize) { + DEBUG_NET_WARN_ON_ONCE(size > truesize); + skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; @@ -859,6 +861,8 @@ void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + DEBUG_NET_WARN_ON_ONCE(size > truesize); + skb_frag_size_add(frag, size); skb->len += size; skb->data_len += size; diff --git a/net/core/sock.c b/net/core/sock.c index 16584e2dd648..a5995750c5c5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -759,7 +759,7 @@ out: return ret; } -bool sk_mc_loop(struct sock *sk) +bool sk_mc_loop(const struct sock *sk) { if (dev_recursion_level()) return false; @@ -771,7 +771,7 @@ bool sk_mc_loop(struct sock *sk) return inet_test_bit(MC_LOOP, sk); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - return inet6_sk(sk)->mc_loop; + return inet6_test_bit(MC6_LOOP, sk); #endif } WARN_ON_ONCE(1); @@ -3001,6 +3001,11 @@ void __sk_flush_backlog(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); __release_sock(sk); + + if (sk->sk_prot->release_cb) + INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, + tcp_release_cb, sk); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(__sk_flush_backlog); @@ -3519,11 +3524,9 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); - /* Warning : release_cb() might need to release sk ownership, - * ie call sock_release_ownership(sk) before us. - */ if (sk->sk_prot->release_cb) - sk->sk_prot->release_cb(sk); + INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, + tcp_release_cb, sk); sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) diff --git a/net/core/xdp.c b/net/core/xdp.c index a70670fe9a2d..df4789ab512d 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, __diag_pop(); BTF_SET8_START(xdp_metadata_kfunc_ids) -#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) +#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC BTF_SET8_END(xdp_metadata_kfunc_ids) @@ -752,7 +752,7 @@ static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { }; BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted) -#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str) +#define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c693a570682f..80b956b39252 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -180,7 +180,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { @@ -671,10 +671,10 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { @@ -839,7 +839,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { diff --git a/net/devlink/core.c b/net/devlink/core.c index 6cec4afb01fb..bcbbb952569f 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -16,6 +16,219 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); +static struct devlink *devlinks_xa_get(unsigned long index) +{ + struct devlink *devlink; + + rcu_read_lock(); + devlink = xa_find(&devlinks, &index, index, DEVLINK_REGISTERED); + if (!devlink || !devlink_try_get(devlink)) + devlink = NULL; + rcu_read_unlock(); + return devlink; +} + +/* devlink_rels xarray contains 1:1 relationships between + * devlink object and related nested devlink instance. + * The xarray index is used to get the nested object from + * the nested-in object code. + */ +static DEFINE_XARRAY_FLAGS(devlink_rels, XA_FLAGS_ALLOC1); + +#define DEVLINK_REL_IN_USE XA_MARK_0 + +struct devlink_rel { + u32 index; + refcount_t refcount; + u32 devlink_index; + struct { + u32 devlink_index; + u32 obj_index; + devlink_rel_notify_cb_t *notify_cb; + devlink_rel_cleanup_cb_t *cleanup_cb; + struct work_struct notify_work; + } nested_in; +}; + +static void devlink_rel_free(struct devlink_rel *rel) +{ + xa_erase(&devlink_rels, rel->index); + kfree(rel); +} + +static void __devlink_rel_get(struct devlink_rel *rel) +{ + refcount_inc(&rel->refcount); +} + +static void __devlink_rel_put(struct devlink_rel *rel) +{ + if (refcount_dec_and_test(&rel->refcount)) + devlink_rel_free(rel); +} + +static void devlink_rel_nested_in_notify_work(struct work_struct *work) +{ + struct devlink_rel *rel = container_of(work, struct devlink_rel, + nested_in.notify_work); + struct devlink *devlink; + + devlink = devlinks_xa_get(rel->nested_in.devlink_index); + if (!devlink) + goto rel_put; + if (!devl_trylock(devlink)) { + devlink_put(devlink); + goto reschedule_work; + } + if (!devl_is_registered(devlink)) { + devl_unlock(devlink); + devlink_put(devlink); + goto rel_put; + } + if (!xa_get_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE)) + rel->nested_in.cleanup_cb(devlink, rel->nested_in.obj_index, rel->index); + rel->nested_in.notify_cb(devlink, rel->nested_in.obj_index); + devl_unlock(devlink); + devlink_put(devlink); + +rel_put: + __devlink_rel_put(rel); + return; + +reschedule_work: + schedule_work(&rel->nested_in.notify_work); +} + +static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel) +{ + __devlink_rel_get(rel); + schedule_work(&rel->nested_in.notify_work); +} + +static struct devlink_rel *devlink_rel_alloc(void) +{ + struct devlink_rel *rel; + static u32 next; + int err; + + rel = kzalloc(sizeof(*rel), GFP_KERNEL); + if (!rel) + return ERR_PTR(-ENOMEM); + + err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel, + xa_limit_32b, &next, GFP_KERNEL); + if (err) { + kfree(rel); + return ERR_PTR(err); + } + + refcount_set(&rel->refcount, 1); + INIT_WORK(&rel->nested_in.notify_work, + &devlink_rel_nested_in_notify_work); + return rel; +} + +static void devlink_rel_put(struct devlink *devlink) +{ + struct devlink_rel *rel = devlink->rel; + + if (!rel) + return; + xa_clear_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE); + devlink_rel_nested_in_notify_work_schedule(rel); + __devlink_rel_put(rel); + devlink->rel = NULL; +} + +void devlink_rel_nested_in_clear(u32 rel_index) +{ + xa_clear_mark(&devlink_rels, rel_index, DEVLINK_REL_IN_USE); +} + +int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, + u32 obj_index, devlink_rel_notify_cb_t *notify_cb, + devlink_rel_cleanup_cb_t *cleanup_cb, + struct devlink *devlink) +{ + struct devlink_rel *rel = devlink_rel_alloc(); + + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + + if (IS_ERR(rel)) + return PTR_ERR(rel); + + rel->devlink_index = devlink->index; + rel->nested_in.devlink_index = devlink_index; + rel->nested_in.obj_index = obj_index; + rel->nested_in.notify_cb = notify_cb; + rel->nested_in.cleanup_cb = cleanup_cb; + *rel_index = rel->index; + xa_set_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE); + devlink->rel = rel; + return 0; +} + +void devlink_rel_nested_in_notify(struct devlink *devlink) +{ + struct devlink_rel *rel = devlink->rel; + + if (!rel) + return; + devlink_rel_nested_in_notify_work_schedule(rel); +} + +static struct devlink_rel *devlink_rel_find(unsigned long rel_index) +{ + return xa_find(&devlink_rels, &rel_index, rel_index, + DEVLINK_REL_IN_USE); +} + +static struct devlink *devlink_rel_devlink_get_lock(u32 rel_index) +{ + struct devlink *devlink; + struct devlink_rel *rel; + u32 devlink_index; + + if (!rel_index) + return NULL; + xa_lock(&devlink_rels); + rel = devlink_rel_find(rel_index); + if (rel) + devlink_index = rel->devlink_index; + xa_unlock(&devlink_rels); + if (!rel) + return NULL; + devlink = devlinks_xa_get(devlink_index); + if (!devlink) + return NULL; + devl_lock(devlink); + if (!devl_is_registered(devlink)) { + devl_unlock(devlink); + devlink_put(devlink); + return NULL; + } + return devlink; +} + +int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, + u32 rel_index, int attrtype, + bool *msg_updated) +{ + struct net *net = devlink_net(devlink); + struct devlink *rel_devlink; + int err; + + rel_devlink = devlink_rel_devlink_get_lock(rel_index); + if (!rel_devlink) + return 0; + err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype); + devl_unlock(rel_devlink); + devlink_put(rel_devlink); + if (!err && msg_updated) + *msg_updated = true; + return err; +} + void *devlink_priv(struct devlink *devlink) { return &devlink->priv; @@ -142,6 +355,7 @@ int devl_register(struct devlink *devlink) xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); + devlink_rel_nested_in_notify(devlink); return 0; } @@ -166,6 +380,7 @@ void devl_unregister(struct devlink *devlink) devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + devlink_rel_put(devlink); } EXPORT_SYMBOL_GPL(devl_unregister); @@ -215,6 +430,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->params, XA_FLAGS_ALLOC); xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); + xa_init_flags(&devlink->nested_rels, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->linecard_list); @@ -261,6 +477,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->linecard_list)); WARN_ON(!xa_empty(&devlink->ports)); + xa_destroy(&devlink->nested_rels); xa_destroy(&devlink->snapshot_ids); xa_destroy(&devlink->params); xa_destroy(&devlink->ports); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index bba4ace7d22b..dc8039ca2b38 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -138,6 +138,23 @@ nla_put_failure: return -EMSGSIZE; } +static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink) +{ + unsigned long rel_index; + void *unused; + int err; + + xa_for_each(&devlink->nested_rels, rel_index, unused) { + err = devlink_rel_devlink_handle_put(msg, devlink, + rel_index, + DEVLINK_ATTR_NESTED_DEVLINK, + NULL); + if (err) + return err; + } + return 0; +} + static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) @@ -164,6 +181,10 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, goto dev_stats_nest_cancel; nla_nest_end(msg, dev_stats); + + if (devlink_nl_nested_fill(msg, devlink)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -230,6 +251,34 @@ int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one); } +static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index) +{ + devlink_notify(devlink, DEVLINK_CMD_NEW); +} + +static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index, + u32 rel_index) +{ + xa_erase(&devlink->nested_rels, rel_index); +} + +int devl_nested_devlink_set(struct devlink *devlink, + struct devlink *nested_devlink) +{ + u32 rel_index; + int err; + + err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0, + devlink_rel_notify_cb, + devlink_rel_cleanup_cb, + nested_devlink); + if (err) + return err; + return xa_insert(&devlink->nested_rels, rel_index, + xa_mk_value(0), GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(devl_nested_devlink_set); + void devlink_notify_register(struct devlink *devlink) { devlink_notify(devlink, DEVLINK_CMD_NEW); @@ -372,6 +421,7 @@ static void devlink_reload_netns_change(struct devlink *devlink, devlink_notify_unregister(devlink); write_pnet(&devlink->_net, dest_net); devlink_notify_register(devlink); + devlink_rel_nested_in_notify(devlink); } int devlink_reload(struct devlink *devlink, struct net *dest_net, diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index f6b5fea2e13c..741d1bf1bec8 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -17,6 +17,8 @@ #include "netlink_gen.h" +struct devlink_rel; + #define DEVLINK_REGISTERED XA_MARK_1 #define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ @@ -55,6 +57,8 @@ struct devlink { u8 reload_failed:1; refcount_t refcount; struct rcu_work rwork; + struct devlink_rel *rel; + struct xarray nested_rels; char priv[] __aligned(NETDEV_ALIGN); }; @@ -92,6 +96,20 @@ static inline bool devl_is_registered(struct devlink *devlink) return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); } +typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index); +typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index, + u32 rel_index); + +void devlink_rel_nested_in_clear(u32 rel_index); +int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, + u32 obj_index, devlink_rel_notify_cb_t *notify_cb, + devlink_rel_cleanup_cb_t *cleanup_cb, + struct devlink *devlink); +void devlink_rel_nested_in_notify(struct devlink *devlink); +int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, + u32 rel_index, int attrtype, + bool *msg_updated); + /* Netlink */ #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) @@ -145,6 +163,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return 0; } +int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, + struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); /* Notify */ @@ -206,19 +226,7 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack); /* Linecards */ -struct devlink_linecard { - struct list_head list; - struct devlink *devlink; - unsigned int index; - const struct devlink_linecard_ops *ops; - void *priv; - enum devlink_linecard_state state; - struct mutex state_lock; /* Protects state */ - const char *type; - struct devlink_linecard_type *types; - unsigned int types_count; - struct devlink *nested_devlink; -}; +unsigned int devlink_linecard_index(struct devlink_linecard *linecard); /* Devlink nl cmds */ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info); diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 85c32c314b0f..9ff1813f88c5 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -6,6 +6,25 @@ #include "devl_internal.h" +struct devlink_linecard { + struct list_head list; + struct devlink *devlink; + unsigned int index; + const struct devlink_linecard_ops *ops; + void *priv; + enum devlink_linecard_state state; + struct mutex state_lock; /* Protects state */ + const char *type; + struct devlink_linecard_type *types; + unsigned int types_count; + u32 rel_index; +}; + +unsigned int devlink_linecard_index(struct devlink_linecard *linecard) +{ + return linecard->index; +} + static struct devlink_linecard * devlink_linecard_get_by_index(struct devlink *devlink, unsigned int linecard_index) @@ -46,24 +65,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) return devlink_linecard_get_from_attrs(devlink, info->attrs); } -static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink) -{ - struct nlattr *nested_attr; - - nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); - if (!nested_attr) - return -EMSGSIZE; - if (devlink_nl_put_handle(msg, devlink)) - goto nla_put_failure; - - nla_nest_end(msg, nested_attr); - return 0; - -nla_put_failure: - nla_nest_cancel(msg, nested_attr); - return -EMSGSIZE; -} - struct devlink_linecard_type { const char *type; const void *priv; @@ -111,8 +112,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } - if (linecard->nested_devlink && - devlink_nl_put_nested_handle(msg, linecard->nested_devlink)) + if (devlink_rel_devlink_handle_put(msg, devlink, + linecard->rel_index, + DEVLINK_ATTR_NESTED_DEVLINK, + NULL)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -521,7 +524,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); @@ -540,7 +542,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); void devlink_linecard_provision_fail(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); mutex_unlock(&linecard->state_lock); @@ -588,6 +589,27 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); +static void devlink_linecard_rel_notify_cb(struct devlink *devlink, + u32 linecard_index) +{ + struct devlink_linecard *linecard; + + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (!linecard) + return; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); +} + +static void devlink_linecard_rel_cleanup_cb(struct devlink *devlink, + u32 linecard_index, u32 rel_index) +{ + struct devlink_linecard *linecard; + + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (linecard && linecard->rel_index == rel_index) + linecard->rel_index = 0; +} + /** * devlink_linecard_nested_dl_set - Attach/detach nested devlink * instance to linecard. @@ -595,12 +617,14 @@ EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); * @linecard: devlink linecard * @nested_devlink: devlink instance to attach or NULL to detach */ -void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, - struct devlink *nested_devlink) +int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink) { - mutex_lock(&linecard->state_lock); - linecard->nested_devlink = nested_devlink; - devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - mutex_unlock(&linecard->state_lock); + return devlink_rel_nested_in_add(&linecard->rel_index, + linecard->devlink->index, + linecard->index, + devlink_linecard_rel_notify_cb, + devlink_linecard_rel_cleanup_cb, + nested_devlink); } EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set); diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index fc3e7c029a3b..499304d9de49 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -82,6 +82,32 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, }; +int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, + struct devlink *devlink, int attrtype) +{ + struct nlattr *nested_attr; + + nested_attr = nla_nest_start(msg, attrtype); + if (!nested_attr) + return -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (!net_eq(net, devlink_net(devlink))) { + int id = peernet2id_alloc(net, devlink_net(devlink), + GFP_KERNEL); + + if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id)) + return -EMSGSIZE; + } + + nla_nest_end(msg, nested_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(msg, nested_attr); + return -EMSGSIZE; +} + int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info) { int err; diff --git a/net/devlink/port.c b/net/devlink/port.c index 4763b42885fb..4e9003242448 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -428,6 +428,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (err) goto out; err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); + if (err) + goto out; + err = devlink_rel_devlink_handle_put(msg, port->devlink, + port->rel_index, + DEVLINK_PORT_FN_ATTR_DEVLINK, + &msg_updated); + out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -483,7 +490,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, goto nla_put_failure; if (devlink_port->linecard && nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, - devlink_port->linecard->index)) + devlink_linecard_index(devlink_port->linecard))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -1392,6 +1399,50 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); +static void devlink_port_rel_notify_cb(struct devlink *devlink, u32 port_index) +{ + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_by_index(devlink, port_index); + if (!devlink_port) + return; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); +} + +static void devlink_port_rel_cleanup_cb(struct devlink *devlink, u32 port_index, + u32 rel_index) +{ + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_by_index(devlink, port_index); + if (devlink_port && devlink_port->rel_index == rel_index) + devlink_port->rel_index = 0; +} + +/** + * devl_port_fn_devlink_set - Attach peer devlink + * instance to port function. + * @devlink_port: devlink port + * @fn_devlink: devlink instance to attach + */ +int devl_port_fn_devlink_set(struct devlink_port *devlink_port, + struct devlink *fn_devlink) +{ + ASSERT_DEVLINK_PORT_REGISTERED(devlink_port); + + if (WARN_ON(devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_SF || + devlink_port->attrs.pci_sf.external)) + return -EINVAL; + + return devlink_rel_nested_in_add(&devlink_port->rel_index, + devlink_port->devlink->index, + devlink_port->index, + devlink_port_rel_notify_cb, + devlink_port_rel_cleanup_cb, + fn_devlink); +} +EXPORT_SYMBOL_GPL(devl_port_fn_devlink_set); + /** * devlink_port_linecard_set - Link port with a linecard * @@ -1420,7 +1471,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, case DEVLINK_PORT_FLAVOUR_PHYSICAL: if (devlink_port->linecard) n = snprintf(name, len, "l%u", - devlink_port->linecard->index); + devlink_linecard_index(devlink_port->linecard)); if (n < len) n += snprintf(name + n, len - n, "p%u", attrs->phys.port_number); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2713c9b06c4c..fb81de10d332 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -452,7 +452,7 @@ int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET4_BIND, &flags); if (err) return err; @@ -794,6 +794,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); + int sin_addr_len = sizeof(*sin); sin->sin_family = AF_INET; lock_sock(sk); @@ -806,7 +807,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin_port = inet->inet_dport; sin->sin_addr.s_addr = inet->inet_daddr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETPEERNAME); } else { __be32 addr = inet->inet_rcv_saddr; @@ -814,12 +815,12 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, addr = inet->inet_saddr; sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET4_GETSOCKNAME); } release_sock(sk); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet_getname); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 418e5fb58fd3..76c3ea75b8dd 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2944,8 +2944,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; spin_lock_bh(&state->im->lock); psf = state->im->sources; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 75e0aee35eb7..2887177822c9 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -301,7 +301,7 @@ static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ @@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) * 4.1.3.3. */ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || - (family == AF_INET6 && !inet6_sk(sk)->recverr)) { + (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -899,7 +899,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *ip6 = ipv6_hdr(skb); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -908,7 +907,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; sin6->sin6_flowinfo = 0; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin6->sin6_flowinfo = ip6_flowinfo(ip6); sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b214b5a2e045..e2bf4602b559 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1632,7 +1632,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, { struct rtable *rt; - rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, (noxfrm ? DST_NOXFRM : 0)); if (rt) { @@ -1660,7 +1660,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) { struct rtable *new_rt; - new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, rt->dst.flags); if (new_rt) { @@ -2834,7 +2834,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6ac890b4073f..e7f024d93572 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1367,6 +1367,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, }, { + .procname = "tcp_backlog_ack_defer", + .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, .maxlen = sizeof(u8), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3d3a24f79573..30e8e5d35983 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3814,6 +3814,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wnd = tp->rcv_wnd; info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash; info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; + + info->tcpi_total_rto = tp->total_rto; + info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; + info->tcpi_total_rto_time = tp->total_rto_time; + if (tp->rto_stamp) { + info->tcpi_total_rto_time += tcp_time_stamp_raw() - + tp->rto_stamp; + } + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 804821d6bd4d..2ce14ff2b00c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2101,6 +2101,10 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->undo_marker = 0; tp->undo_retrans = -1; tp->sacked_out = 0; + tp->rto_stamp = 0; + tp->total_rto = 0; + tp->total_rto_recoveries = 0; + tp->total_rto_time = 0; } static inline void tcp_init_undo(struct tcp_sock *tp) @@ -2839,6 +2843,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) tcp_set_ca_state(sk, TCP_CA_Recovery); } +static void tcp_update_rto_time(struct tcp_sock *tp) +{ + if (tp->rto_stamp) { + tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp; + tp->rto_stamp = 0; + } +} + /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ @@ -3043,6 +3055,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, break; case TCP_CA_Loss: tcp_process_loss(sk, flag, num_dupack, rexmit); + if (icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_rto_time(tp); tcp_identify_packet_loss(sk, ack_flag); if (!(icsk->icsk_ca_state == TCP_CA_Open || (*ack_flag & FLAG_LOST_RETRANS))) @@ -5567,6 +5581,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) { + /* If we are running from __release_sock() in user context, + * Defer the ack until tcp_release_cb(). + */ + if (sock_owned_by_user_nocheck(sk) && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) { + set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags); + return; + } send_now: tcp_send_ack(sk); return; @@ -6450,22 +6472,24 @@ reset_and_undo: static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req; /* If we are still handling the SYNACK RTO, see if timestamp ECR allows * undo. If peer SACKs triggered fast recovery, we can't undo here. */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) - tcp_try_undo_loss(sk, false); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out) + tcp_try_undo_recovery(sk); /* Reset rtx states to prevent spurious retransmits_timed_out() */ - tcp_sk(sk)->retrans_stamp = 0; + tcp_update_rto_time(tp); + tp->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, * we no longer need req so release it. */ - req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, + req = rcu_dereference_protected(tp->fastopen_rsk, lockdep_sock_is_held(sk)); reqsk_fastopen_remove(sk, req, false); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4167e8a48b60..38892ae83e17 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -194,7 +194,7 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr, &addr_len); } /* This will initiate an outgoing connection. */ @@ -3264,6 +3264,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; + net->ipv4.sysctl_tcp_backlog_ack_defer = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b98d476f1594..eee8ab1bfa0e 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -565,6 +565,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->undo_marker = treq->snt_isn; newtp->retrans_stamp = div_u64(treq->snt_synack, USEC_PER_SEC / TCP_TS_HZ); + newtp->total_rto = req->num_timeout; + newtp->total_rto_recoveries = 1; + newtp->total_rto_time = tcp_time_stamp_raw() - + newtp->retrans_stamp; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0723460753c..3b833fd76b24 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1076,7 +1076,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t) #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED) + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -1100,16 +1101,6 @@ void tcp_release_cb(struct sock *sk) tcp_tsq_write(sk); __sock_put(sk); } - /* Here begins the tricky part : - * We are called from release_sock() with : - * 1) BH disabled - * 2) sk_lock.slock spinlock held - * 3) socket owned by us (sk->sk_lock.owned == 1) - * - * But following code is meant to be called from BH handlers, - * so we should keep BH disabled, but early release socket ownership - */ - sock_release_ownership(sk); if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); @@ -1123,6 +1114,8 @@ void tcp_release_cb(struct sock *sk) inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } + if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) + tcp_send_ack(sk); } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 984ab4a0421e..3f61c6a70a1f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -394,7 +394,7 @@ static void tcp_probe_timer(struct sock *sk) if (user_timeout && (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= msecs_to_jiffies(user_timeout)) - goto abort; + goto abort; } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { @@ -415,6 +415,19 @@ abort: tcp_write_err(sk); } } +static void tcp_update_rto_stats(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (!icsk->icsk_retransmits) { + tp->total_rto_recoveries++; + tp->rto_stamp = tcp_time_stamp(tp); + } + icsk->icsk_retransmits++; + tp->total_rto++; +} + /* * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. @@ -447,7 +460,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp(tp); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, @@ -575,7 +588,7 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f39b9c844580..7b21a51dd25a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -714,7 +714,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) iph->saddr, uh->source, skb->dev->ifindex, inet_sdif(skb), udptable, NULL); - if (!sk || udp_sk(sk)->encap_type) { + if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udp_encap_needed_key)) { sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, @@ -1051,7 +1051,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) u8 tos, scope; __be16 dport; int err, is_udplite = IS_UDPLITE(sk); - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; @@ -1143,7 +1143,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, - (struct sockaddr *)usin, &ipc.addr); + (struct sockaddr *)usin, + &msg->msg_namelen, + &ipc.addr); if (err) goto out_free; if (usin) { @@ -1315,11 +1317,11 @@ void udp_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_push_pending_frames(sk); release_sock(sk); } @@ -1865,10 +1867,11 @@ try_again: *addr_len = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, - (struct sockaddr *)sin); + (struct sockaddr *)sin, + addr_len); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (inet_cmsg_flags(inet)) @@ -1904,7 +1907,7 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } EXPORT_SYMBOL(udp_pre_connect); @@ -2081,7 +2084,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { + if (static_branch_unlikely(&udp_encap_needed_key) && + READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* @@ -2119,7 +2123,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); /* * MIB statistics other than incrementing the error count are @@ -2132,7 +2137,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * delivery of packets with coverage values less than a value * provided by the application." */ - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; @@ -2143,9 +2148,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) * that it wants x while sender emits packets of smaller size y. * Therefore the above ...()->partial_cov statement is essential. */ - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } @@ -2618,7 +2623,7 @@ void udp_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (udp_test_bit(ENCAP_ENABLED, sk)) static_branch_dec(&udp_encap_needed_key); } } @@ -2658,9 +2663,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: if (val != 0) { - WRITE_ONCE(up->corkflag, 1); + udp_set_bit(CORK, sk); } else { - WRITE_ONCE(up->corkflag, 0); + udp_clear_bit(CORK, sk); lock_sock(sk); push_pending_frames(sk); release_sock(sk); @@ -2675,17 +2680,17 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; + WRITE_ONCE(up->encap_rcv, + ipv6_stub->xfrm6_udp_encap_rcv); else #endif - up->encap_rcv = xfrm4_udp_encap_rcv; + WRITE_ONCE(up->encap_rcv, + xfrm4_udp_encap_rcv); #endif fallthrough; case UDP_ENCAP_L2TPINUDP: - up->encap_type = val; - lock_sock(sk); - udp_tunnel_encap_enable(sk->sk_socket); - release_sock(sk); + WRITE_ONCE(up->encap_type, val); + udp_tunnel_encap_enable(sk); break; default: err = -ENOPROTOOPT; @@ -2694,11 +2699,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_NO_CHECK6_TX: - up->no_check6_tx = valbool; + udp_set_no_check6_tx(sk, valbool); break; case UDP_NO_CHECK6_RX: - up->no_check6_rx = valbool; + udp_set_no_check6_rx(sk, valbool); break; case UDP_SEGMENT: @@ -2708,14 +2713,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - lock_sock(sk); /* when enabling GRO, accept the related GSO packet type */ if (valbool) - udp_tunnel_encap_enable(sk->sk_socket); - up->gro_enabled = valbool; - up->accept_udp_l4 = valbool; - release_sock(sk); + udp_tunnel_encap_enable(sk); + udp_assign_bit(GRO_ENABLED, sk, valbool); + udp_assign_bit(ACCEPT_L4, sk, valbool); break; /* @@ -2730,8 +2733,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcslen = val; - up->pcflag |= UDPLITE_SEND_CC; + WRITE_ONCE(up->pcslen, val); + udp_set_bit(UDPLITE_SEND_CC, sk); break; /* The receiver specifies a minimum checksum coverage value. To make @@ -2744,8 +2747,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, val = 8; else if (val > USHRT_MAX) val = USHRT_MAX; - up->pcrlen = val; - up->pcflag |= UDPLITE_RECV_CC; + WRITE_ONCE(up->pcrlen, val); + udp_set_bit(UDPLITE_RECV_CC, sk); break; default: @@ -2783,19 +2786,19 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: - val = READ_ONCE(up->corkflag); + val = udp_test_bit(CORK, sk); break; case UDP_ENCAP: - val = up->encap_type; + val = READ_ONCE(up->encap_type); break; case UDP_NO_CHECK6_TX: - val = up->no_check6_tx; + val = udp_get_no_check6_tx(sk); break; case UDP_NO_CHECK6_RX: - val = up->no_check6_rx; + val = udp_get_no_check6_rx(sk); break; case UDP_SEGMENT: @@ -2803,17 +2806,17 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, break; case UDP_GRO: - val = up->gro_enabled; + val = udp_test_bit(GRO_ENABLED, sk); break; /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: - val = up->pcslen; + val = READ_ONCE(up->pcslen); break; case UDPLITE_RECV_CSCOV: - val = up->pcrlen; + val = READ_ONCE(up->pcrlen); break; default: diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0f46b3c2e4ac..6c95d28d0c4a 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -557,10 +557,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, NAPI_GRO_CB(skb)->is_flist = 0; if (!sk || !udp_sk(sk)->gro_receive) { if (skb->dev->features & NETIF_F_GRO_FRAGLIST) - NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1; if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || - (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) + (sk && udp_test_bit(GRO_ENABLED, sk)) || NAPI_GRO_CB(skb)->is_flist) return call_gro_receive(udp_gro_receive_segment, head, skb); /* no GRO, be sure flush the current packet */ diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 9b18f371af0d..1e7e4aecdc48 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -78,7 +78,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_receive = cfg->gro_receive; udp_sk(sk)->gro_complete = cfg->gro_complete; - udp_tunnel_encap_enable(sock); + udp_tunnel_encap_enable(sk); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 39ecdad1b50c..af37af3ab727 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -21,7 +21,6 @@ EXPORT_SYMBOL(udplite_table); static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index eac206a290d0..183f6dc37242 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -85,11 +85,11 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct udphdr *uh; struct iphdr *iph; int iphlen, len; - __u8 *udpdata; __be32 *udpdata32; - __u16 encap_type = up->encap_type; + u16 encap_type; + encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 368824fe9719..c35d302a3da9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -217,10 +217,11 @@ lookup_protocol: inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; - np->mc_loop = 1; - np->mc_all = 1; + inet6_set_bit(MC6_LOOP, sk); + inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; - np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; + inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & + FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); @@ -453,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ - err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, + err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; @@ -519,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; + int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -536,9 +538,9 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -546,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; - BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, + BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); - return sizeof(*sin); + return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); @@ -1060,6 +1062,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 41ebc4e57473..cc6a502db39d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) struct flowi6 fl6; int err = 0; - if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (np->flow_label & IPV6_FLOWLABEL_MASK)) { flowlabel = fl6_sock_lookup(sk, np->flow_label); if (IS_ERR(flowlabel)) return -EINVAL; @@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (ipv6_addr_any(&usin->sin6_addr)) { @@ -305,11 +306,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb, void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { - struct ipv6_pinfo *np = inet6_sk(sk); struct icmp6hdr *icmph = icmp6_hdr(skb); struct sock_exterr_skb *serr; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = skb_clone(skb, GFP_ATOMIC); @@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __skb_pull(skb, payload - skb->data); - if (inet6_sk(sk)->recverr_rfc4884) + if (inet6_test_bit(RECVERR6_RFC4884, sk)) ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); skb_reset_transport_header(skb); @@ -344,12 +344,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { - const struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct ipv6hdr *iph; struct sk_buff *skb; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); @@ -493,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), struct ipv6hdr, daddr); sin->sin6_addr = ip6h->daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = ip6_flowinfo(ip6h); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 93a594a901d1..8fb4a791881a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); @@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) msg.offset = 0; msg.type = type; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b3ca4beb4405..eca07e10e21f 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return 0; } - if (np->repflow) { + if (inet6_test_bit(REPFLOW, sk)) { freq->flr_label = np->flow_label; return 0; } @@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) if (freq->flr_flags & IPV6_FL_F_REFLECT) { if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - if (!np->repflow) + if (!inet6_test_bit(REPFLOW, sk)) return -ESRCH; np->flow_label = 0; - np->repflow = 0; + inet6_clear_bit(REPFLOW, sk); return 0; } @@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - np->repflow = 1; + inet6_set_bit(REPFLOW, sk); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 54fc4c711f2c..951ba8089b5b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -232,12 +232,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_output); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { - if (!np->autoflowlabel_set) + if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); - else - return np->autoflowlabel; + return inet6_test_bit(AUTOFLOWLABEL, sk); } /* @@ -309,12 +308,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * Fill in the IPv6 header */ if (np) - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -369,9 +368,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { - struct ipv6_pinfo *np = inet6_sk(sk); - if (np && np->rtalert_isolate && + if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } @@ -881,9 +879,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, mtu = IPV6_MIN_MTU; } - if (np && np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; + if (np) { + u32 frag_size = READ_ONCE(np->frag_size); + + if (frag_size && frag_size < mtu) + mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; @@ -1113,7 +1113,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, + sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, &fl6->saddr); rcu_read_unlock(); @@ -1392,7 +1392,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); - unsigned int mtu; + unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so @@ -1436,15 +1436,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } + + frag_size = READ_ONCE(np->frag_size); + if (frag_size && frag_size < mtu) + mtu = frag_size; + cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; @@ -1935,7 +1936,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; @@ -1978,7 +1978,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; @@ -2091,7 +2091,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, return ERR_PTR(err); } if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_sk(sk)->dontfrag; + ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0e2a0847b387..7d661735cb9d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -415,6 +415,101 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + /* Handle options that can be set without locking the socket. */ + switch (optname) { + case IPV6_UNICAST_HOPS: + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->hop_limit, val); + return 0; + case IPV6_MULTICAST_LOOP: + if (optlen < sizeof(int)) + return -EINVAL; + if (val != valbool) + return -EINVAL; + inet6_assign_bit(MC6_LOOP, sk, valbool); + return 0; + case IPV6_MULTICAST_HOPS: + if (sk->sk_type == SOCK_STREAM) + return retv; + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->mcast_hops, + val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); + return 0; + case IPV6_MTU: + if (optlen < sizeof(int)) + return -EINVAL; + if (val && val < IPV6_MIN_MTU) + return -EINVAL; + WRITE_ONCE(np->frag_size, val); + return 0; + case IPV6_MINHOPCOUNT: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 255) + return -EINVAL; + + if (val) + static_branch_enable(&ip6_min_hopcount); + + /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount + * while we are changing it. + */ + WRITE_ONCE(np->min_hopcount, val); + return 0; + case IPV6_RECVERR_RFC4884: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 1) + return -EINVAL; + inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); + return 0; + case IPV6_MULTICAST_ALL: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(MC6_ALL, sk, valbool); + return 0; + case IPV6_AUTOFLOWLABEL: + inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); + inet6_set_bit(AUTOFLOWLABEL_SET, sk); + return 0; + case IPV6_DONTFRAG: + inet6_assign_bit(DONTFRAG, sk, valbool); + return 0; + case IPV6_RECVERR: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RECVERR6, sk, valbool); + if (!val) + skb_errqueue_purge(&sk->sk_error_queue); + return 0; + case IPV6_ROUTER_ALERT_ISOLATE: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); + return 0; + case IPV6_MTU_DISCOVER: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) + return -EINVAL; + WRITE_ONCE(np->pmtudisc, val); + return 0; + case IPV6_FLOWINFO_SEND: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(SNDFLOW, sk, valbool); + return 0; + case IPV6_ADDR_PREFERENCES: + if (optlen < sizeof(int)) + return -EINVAL; + return ip6_sock_set_addr_preferences(sk, val); + } if (needs_rtnl) rtnl_lock(); sockopt_lock_sock(sk); @@ -733,34 +828,7 @@ done: } break; } - case IPV6_UNICAST_HOPS: - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->hop_limit = val; - retv = 0; - break; - case IPV6_MULTICAST_HOPS: - if (sk->sk_type == SOCK_STREAM) - break; - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); - retv = 0; - break; - - case IPV6_MULTICAST_LOOP: - if (optlen < sizeof(int)) - goto e_inval; - if (val != valbool) - goto e_inval; - np->mc_loop = valbool; - retv = 0; - break; case IPV6_UNICAST_IF: { @@ -862,13 +930,6 @@ done: retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } - case IPV6_MULTICAST_ALL: - if (optlen < sizeof(int)) - goto e_inval; - np->mc_all = valbool; - retv = 0; - break; - case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: if (in_compat_syscall()) @@ -896,42 +957,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_ROUTER_ALERT_ISOLATE: - if (optlen < sizeof(int)) - goto e_inval; - np->rtalert_isolate = valbool; - retv = 0; - break; - case IPV6_MTU_DISCOVER: - if (optlen < sizeof(int)) - goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) - goto e_inval; - np->pmtudisc = val; - retv = 0; - break; - case IPV6_MTU: - if (optlen < sizeof(int)) - goto e_inval; - if (val && val < IPV6_MIN_MTU) - goto e_inval; - np->frag_size = val; - retv = 0; - break; - case IPV6_RECVERR: - if (optlen < sizeof(int)) - goto e_inval; - np->recverr = valbool; - if (!val) - skb_errqueue_purge(&sk->sk_error_queue); - retv = 0; - break; - case IPV6_FLOWINFO_SEND: - if (optlen < sizeof(int)) - goto e_inval; - np->sndflow = valbool; - retv = 0; - break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; @@ -943,47 +968,10 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; - case IPV6_ADDR_PREFERENCES: - if (optlen < sizeof(int)) - goto e_inval; - retv = __ip6_sock_set_addr_preferences(sk, val); - break; - case IPV6_MINHOPCOUNT: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 255) - goto e_inval; - - if (val) - static_branch_enable(&ip6_min_hopcount); - - /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount - * while we are changing it. - */ - WRITE_ONCE(np->min_hopcount, val); - retv = 0; - break; - case IPV6_DONTFRAG: - np->dontfrag = valbool; - retv = 0; - break; - case IPV6_AUTOFLOWLABEL: - np->autoflowlabel = valbool; - np->autoflowlabel_set = 1; - retv = 0; - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; - case IPV6_RECVERR_RFC4884: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 1) - goto e_inval; - np->recverr_rfc4884 = valbool; - retv = 0; - break; } unlock: @@ -1180,7 +1168,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { @@ -1197,7 +1186,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxflow) { @@ -1347,9 +1337,9 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) - val = np->hop_limit; + val = READ_ONCE(np->hop_limit); else - val = np->mcast_hops; + val = READ_ONCE(np->mcast_hops); if (val < 0) { rcu_read_lock(); @@ -1365,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_MULTICAST_LOOP: - val = np->mc_loop; + val = inet6_test_bit(MC6_LOOP, sk); break; case IPV6_MULTICAST_IF: @@ -1373,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MULTICAST_ALL: - val = np->mc_all; + val = inet6_test_bit(MC6_ALL, sk); break; case IPV6_UNICAST_IF: @@ -1381,15 +1371,15 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MTU_DISCOVER: - val = np->pmtudisc; + val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: - val = np->recverr; + val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: - val = np->sndflow; + val = inet6_test_bit(SNDFLOW, sk); break; case IPV6_FLOWLABEL_MGR: @@ -1424,33 +1414,35 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_ADDR_PREFERENCES: + { + u8 srcprefs = READ_ONCE(np->srcprefs); val = 0; - if (np->srcprefs & IPV6_PREFER_SRC_TMP) + if (srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; - else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + else if (srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } - if (np->srcprefs & IPV6_PREFER_SRC_COA) + if (srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; - + } case IPV6_MINHOPCOUNT: - val = np->min_hopcount; + val = READ_ONCE(np->min_hopcount); break; case IPV6_DONTFRAG: - val = np->dontfrag; + val = inet6_test_bit(DONTFRAG, sk); break; case IPV6_AUTOFLOWLABEL: - val = ip6_autoflowlabel(sock_net(sk), np); + val = ip6_autoflowlabel(sock_net(sk), sk); break; case IPV6_RECVFRAGSIZE: @@ -1458,11 +1450,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_ROUTER_ALERT_ISOLATE: - val = np->rtalert_isolate; + val = inet6_test_bit(RTALERT_ISOLATE, sk); break; case IPV6_RECVERR_RFC4884: - val = np->recverr_rfc4884; + val = inet6_test_bit(RECVERR6_RFC4884, sk); break; default: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ce25bcb9974..99e28b444a4c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, } if (!mc) { rcu_read_unlock(); - return np->mc_all; + return inet6_test_bit(MC6_ALL, sk); } psl = rcu_dereference(mc->sflist); if (!psl) { @@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb, hdr->payload_len = htons(len); hdr->nexthdr = proto; - hdr->hop_limit = inet6_sk(sk)->hop_limit; + hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit); hdr->saddr = *saddr; hdr->daddr = *daddr; @@ -3011,8 +3011,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; psf = rcu_dereference(state->im->mca_sources); } out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 553c8664e0a7..679443d7ecb5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -500,7 +500,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, csum_partial(icmp6h, skb->len, 0)); - ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); + ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); @@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net) np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ - np->mc_loop = 0; + inet6_clear_bit(MC6_LOOP, sk); return 0; } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5831aaa53d75..d2098dd4ceae 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) @@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; } daddr = &(u->sin6_addr); - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) oif = u->sin6_scope_id; @@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 42fcec3ecf5e..a2aa54a2baae 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -300,26 +301,26 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ - if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) + if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); - harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); + harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); return; } - if (np->recverr) { + if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } - if (np->recverr || harderr) { + if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } @@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; @@ -668,7 +668,7 @@ out: error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: - if (err == -ENOBUFS && !np->recverr) + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } @@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); @@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c687b357e6a..b132feae3393 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 1, DST_OBSOLETE_FORCE_CHK, flags); + DST_OBSOLETE_FORCE_CHK, flags); if (rt) { rt6_info_init(rt); @@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net, if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) - flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); + flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs)); return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } @@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, + rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 44b6949d72b2..4eb0c6386aca 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -135,7 +135,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, sock_owned_by_me(sk); - return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -163,7 +163,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { @@ -508,7 +508,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tcp_ld_RTO_revert(sk, seq); } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { @@ -548,7 +548,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && ireq->pktopts) + if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? @@ -797,7 +797,7 @@ static void tcp_v6_init_req(struct request_sock *req, (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || - np->rxopt.bits.rxohlim || np->repflow)) { + np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } @@ -1055,10 +1055,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { - const struct ipv6_pinfo *np = tcp_inet6_sk(sk); - trace_tcp_send_reset(sk, skb); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); priority = sk->sk_priority; txhash = sk->sk_txhash; @@ -1247,7 +1245,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; newnp->rcv_flowinfo = 0; - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = 0; /* @@ -1320,7 +1318,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Set ToS of the new socket based upon the value of incoming SYN. @@ -1542,10 +1540,11 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = tcp_v6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, + ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { tcp_v6_restore_cb(opt_skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 86b5d509a468..622b10a549f7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -410,10 +410,11 @@ try_again: *addr_len = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, - (struct sockaddr *)sin6); + (struct sockaddr *)sin6, + addr_len); } - if (udp_sk(sk)->gro_enabled) + if (udp_test_bit(GRO_ENABLED, sk)) udp_cmsg_recv(msg, sk, skb); if (np->rxopt.all) @@ -571,7 +572,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, NULL); - if (!sk || udp_sk(sk)->encap_type) { + if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udpv6_encap_needed_key)) { sk = __udp6_lib_err_encap(net, hdr, offset, uh, @@ -598,7 +599,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); - if (np->pmtudisc != IPV6_PMTUDISC_DONT) + if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { @@ -619,7 +620,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!np->recverr) { + if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -688,7 +689,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { + if (static_branch_unlikely(&udpv6_encap_needed_key) && + READ_ONCE(up->encap_type)) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); /* @@ -726,16 +728,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) { + u16 pcrlen = READ_ONCE(up->pcrlen); - if (up->pcrlen == 0) { /* full coverage was set */ + if (pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + if (UDP_SKB_CB(skb)->cscov < pcrlen) { net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + UDP_SKB_CB(skb)->cscov, pcrlen); goto drop; } } @@ -858,7 +861,7 @@ start_lookup: /* If zero checksum and no_check is not on for * the socket then skip it. */ - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) continue; if (!first) { first = sk; @@ -980,7 +983,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); - if (!uh->check && !udp_sk(sk)->no_check6_rx) { + if (!uh->check && !udp_get_no_check6_rx(sk)) { if (refcounted) sock_put(sk); goto report_csum_error; @@ -1002,7 +1005,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* Unicast */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { - if (!uh->check && !udp_sk(sk)->no_check6_rx) + if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; return udp6_unicast_rcv_skb(sk, skb, uh); } @@ -1155,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; - return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len); } /** @@ -1241,7 +1244,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (udp_sk(sk)->no_check6_tx) { + if (udp_get_no_check6_tx(sk)) { kfree_skb(skb); return -EINVAL; } @@ -1262,7 +1265,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, if (is_udplite) csum = udplite_csum(skb); - else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ + else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ @@ -1281,7 +1284,7 @@ csum_partial: send: err = ip6_send_skb(skb); if (err) { - if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; @@ -1332,7 +1335,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -1427,7 +1430,7 @@ do_udp_sendmsg: fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); @@ -1508,6 +1511,7 @@ do_udp_sendmsg: if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, + &addr_len, &fl6->saddr); if (err) goto out_no_dst; @@ -1593,7 +1597,7 @@ back_from_confirm: do_append_data: if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, (struct rt6_info *)dst, @@ -1606,7 +1610,7 @@ do_append_data: up->pending = 0; if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: @@ -1644,11 +1648,11 @@ static void udpv6_splice_eof(struct socket *sock) struct sock *sk = sock->sk; struct udp_sock *up = udp_sk(sk); - if (!up->pending || READ_ONCE(up->corkflag)) + if (!up->pending || udp_test_bit(CORK, sk)) return; lock_sock(sk); - if (up->pending && !READ_ONCE(up->corkflag)) + if (up->pending && !udp_test_bit(CORK, sk)) udp_v6_push_pending_frames(sk); release_sock(sk); } @@ -1670,7 +1674,7 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) { + if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 267d491e9707..a60bec9b14f1 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -17,7 +17,6 @@ static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); return 0; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4907ab241d6b..4156387248e4 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -81,14 +81,14 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct ipv6hdr *ip6h; int len; int ip6hlen = sizeof(struct ipv6hdr); - __u8 *udpdata; __be32 *udpdata32; - __u16 encap_type = up->encap_type; + u16 encap_type; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_udp_encap_rcv(sk, skb); + encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 03608d3ded4b..8d21ff25f160 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1139,9 +1139,9 @@ static void l2tp_tunnel_destruct(struct sock *sk) switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* No longer an encapsulation socket. See net/ipv4/udp.c */ - (udp_sk(sk))->encap_type = 0; - (udp_sk(sk))->encap_rcv = NULL; - (udp_sk(sk))->encap_destroy = NULL; + WRITE_ONCE(udp_sk(sk)->encap_type, 0); + udp_sk(sk)->encap_rcv = NULL; + udp_sk(sk)->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 11f3d375cec0..bb373e249237 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -431,7 +431,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; lsa->l2tp_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) lsa->l2tp_flowinfo = np->flow_label; } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -528,7 +528,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; daddr = &lsa->l2tp_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK; if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); @@ -620,7 +620,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index a4af3b7675ef..c3a60fd19c37 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -309,7 +309,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { /* IEEE802.11ac-2013 Table E-4 */ - u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; + static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; struct cfg80211_chan_def uc = sta->tdls_chandef; enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(&sta->deflink); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 4174076c66fa..05d47c0e5ec3 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val) static void set_mcast_loop(struct sock *sk, u_char loop) { /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ - lock_sock(sk); inet_assign_bit(MC_LOOP, sk, loop); #ifdef CONFIG_IP_VS_IPV6 - if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - + if (READ_ONCE(sk->sk_family) == AF_INET6) { /* IPV6_MULTICAST_LOOP */ - np->mc_loop = loop ? 1 : 0; + inet6_assign_bit(MC6_LOOP, sk, loop); } #endif - release_sock(sk); } /* @@ -1326,7 +1322,7 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_HOPS */ - np->mcast_hops = ttl; + WRITE_ONCE(np->mcast_hops, ttl); } #endif release_sock(sk); @@ -1345,7 +1341,7 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MTU_DISCOVER */ - np->pmtudisc = val; + WRITE_ONCE(np->pmtudisc, val); } #endif release_sock(sk); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fd66014d8a76..5f8094acd056 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -873,7 +873,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; @@ -890,7 +890,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1e20bbd687f1..1424bfeaca73 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -375,9 +375,9 @@ out: static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { [TCA_ROUTE4_CLASSID] = { .type = NLA_U32 }, - [TCA_ROUTE4_TO] = { .type = NLA_U32 }, - [TCA_ROUTE4_FROM] = { .type = NLA_U32 }, - [TCA_ROUTE4_IIF] = { .type = NLA_U32 }, + [TCA_ROUTE4_TO] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_FROM] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_IIF] = NLA_POLICY_MAX(NLA_U32, 0x7FFF), }; static int route4_set_parms(struct net *net, struct tcf_proto *tp, @@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, return err; if (tb[TCA_ROUTE4_TO]) { - if (new && handle & 0x8000) + if (new && handle & 0x8000) { + NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; + } to = nla_get_u32(tb[TCA_ROUTE4_TO]); - if (to > 0xFF) - return -EINVAL; nhandle = to; } + if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM], + "'from' and 'fromif' are mutually exclusive"); + return -EINVAL; + } + if (tb[TCA_ROUTE4_FROM]) { - if (tb[TCA_ROUTE4_IIF]) - return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); - if (id > 0xFF) - return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); - if (id > 0x7FFF) - return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; if (handle && new) { nhandle |= handle & 0x7F00; - if (nhandle != handle) + if (nhandle != handle) { + NL_SET_ERR_MSG_FMT(extack, + "Handle mismatch constructed: %x (expected: %x)", + handle, nhandle); return -EINVAL; + } } if (!nhandle) { @@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct route4_filter __rcu **fp; struct route4_filter *fold, *f1, *pfp, *f = NULL; struct route4_bucket *b; - struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_ROUTE4_MAX + 1]; unsigned int h, th; int err; @@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if (opt == NULL) + if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) { + NL_SET_ERR_MSG_MOD(extack, "Missing options"); return -EINVAL; + } - err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS], route4_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index a9bd0a235890..ce63414185fd 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, unsigned long orig_dst; sch_frag_prepare_frag(skb, xmit); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; @@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, sch_frag_prepare_frag(skb, xmit); memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 43f2731bf590..5c0ed5909d85 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; - struct ipv6_pinfo *np; int err = 0; switch (type) { @@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, break; } - np = inet6_sk(sk); icmpv6_err_convert(type, code, &err); - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { @@ -298,7 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK) fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK); - if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); diff --git a/net/tipc/link.c b/net/tipc/link.c index e33b4f29f77c..d0143823658d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1446,7 +1446,7 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, p = (struct tipc_gap_ack_blks *)msg_data(hdr); sz = ntohs(p->len); /* Sanity check */ - if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) { + if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) { /* Good, check if the desired type exists */ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) goto ok; @@ -1533,7 +1533,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; /* Total len */ - len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt); + len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt)); ga->len = htons(len); return len; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e9d1e83a859d..9634dfd636fd 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1491,7 +1491,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, */ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); aead_size = ALIGN(aead_size, __alignof__(*dctx)); - mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout), + mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)), sk->sk_allocation); if (!mem) { err = -ENOMEM; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e8a04a13668..e10d07c76044 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,6 +116,7 @@ #include <linux/freezer.h> #include <linux/file.h> #include <linux/btf_ids.h> +#include <linux/bpf-cgroup.h> #include "scm.h" @@ -1381,6 +1382,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { @@ -1490,6 +1495,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); @@ -1770,6 +1779,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); + + if (peer) + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETPEERNAME); + else + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETSOCKNAME); } sock_put(sk); out: @@ -1922,6 +1938,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; + + err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen, + NULL); + if (err) + goto out; } else { sunaddr = NULL; err = -ENOTCONN; @@ -2390,9 +2413,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - if (msg->msg_name) + if (msg->msg_name) { unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen); + } + if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) @@ -2744,6 +2772,11 @@ unlock: DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); + + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + state->msg->msg_name, + &state->msg->msg_namelen); + sunaddr = NULL; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 020cf17ab7e4..013b65241b65 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1921,6 +1921,9 @@ out_err: err = total_written; } out: + if (sk->sk_type == SOCK_STREAM) + err = sk_stream_error(sk, msg->msg_flags, err); + release_sock(sk); return err; } diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index b80bf681327b..fdb95da12029 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -63,6 +63,17 @@ struct virtio_vsock { u32 guest_cid; bool seqpacket_allow; + + /* These fields are used only in tx path in function + * 'virtio_transport_send_pkt_work()', so to save + * stack space in it, place both of them here. Each + * pointer from 'out_sgs' points to the corresponding + * element in 'out_bufs' - this is initialized in + * 'virtio_vsock_probe()'. Both fields are protected + * by 'tx_lock'. +1 is needed for packet header. + */ + struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1]; + struct scatterlist out_bufs[MAX_SKB_FRAGS + 1]; }; static u32 virtio_transport_get_local_cid(void) @@ -100,8 +111,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; for (;;) { - struct scatterlist hdr, buf, *sgs[2]; int ret, in_sg = 0, out_sg = 0; + struct scatterlist **sgs; struct sk_buff *skb; bool reply; @@ -111,12 +122,43 @@ virtio_transport_send_pkt_work(struct work_struct *work) virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); - - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); - sgs[out_sg++] = &hdr; - if (skb->len > 0) { - sg_init_one(&buf, skb->data, skb->len); - sgs[out_sg++] = &buf; + sgs = vsock->out_sgs; + sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), + sizeof(*virtio_vsock_hdr(skb))); + out_sg++; + + if (!skb_is_nonlinear(skb)) { + if (skb->len > 0) { + sg_init_one(sgs[out_sg], skb->data, skb->len); + out_sg++; + } + } else { + struct skb_shared_info *si; + int i; + + /* If skb is nonlinear, then its buffer must contain + * only header and nothing more. Data is stored in + * the fragged part. + */ + WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); + + si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) { + skb_frag_t *skb_frag = &si->frags[i]; + void *va; + + /* We will use 'page_to_virt()' for the userspace page + * here, because virtio or dma-mapping layers will call + * 'virt_to_phys()' later to fill the buffer descriptor. + * We don't touch memory at "virtual" address of this page. + */ + va = page_to_virt(skb_frag->bv_page); + sg_init_one(sgs[out_sg], + va + skb_frag->bv_offset, + skb_frag->bv_len); + out_sg++; + } } ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); @@ -413,6 +455,37 @@ static void virtio_vsock_rx_done(struct virtqueue *vq) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +static bool virtio_transport_can_msgzerocopy(int bufs_num) +{ + struct virtio_vsock *vsock; + bool res = false; + + rcu_read_lock(); + + vsock = rcu_dereference(the_virtio_vsock); + if (vsock) { + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; + + /* Check that tx queue is large enough to keep whole + * data to send. This is needed, because when there is + * not enough free space in the queue, current skb to + * send will be reinserted to the head of tx list of + * the socket to retry transmission later, so if skb + * is bigger than whole queue, it will be reinserted + * again and again, thus blocking other skbs to be sent. + * Each page of the user provided buffer will be added + * as a single buffer to the tx virtqueue, so compare + * number of pages against maximum capacity of the queue. + */ + if (bufs_num <= vq->num_max) + res = true; + } + + rcu_read_unlock(); + + return res; +} + static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { @@ -462,6 +535,7 @@ static struct virtio_transport virtio_transport = { }, .send_pkt = virtio_transport_send_pkt, + .can_msgzerocopy = virtio_transport_can_msgzerocopy, }; static bool virtio_transport_seqpacket_allow(u32 remote_cid) @@ -635,6 +709,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev) { struct virtio_vsock *vsock = NULL; int ret; + int i; ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); if (ret) @@ -677,6 +752,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev) if (ret < 0) goto out; + for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++) + vsock->out_sgs[i] = &vsock->out_bufs[i]; + rcu_assign_pointer(the_virtio_vsock, vsock); virtio_vsock_vqs_start(vsock); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 352d042b130b..e22c81435ef7 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -37,27 +37,89 @@ virtio_transport_get_ops(struct vsock_sock *vsk) return container_of(t, struct virtio_transport, transport); } -/* Returns a new packet on success, otherwise returns NULL. - * - * If NULL is returned, errp is set to a negative errno. - */ -static struct sk_buff * -virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len; - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - void *payload; - int err; +static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, + struct virtio_vsock_pkt_info *info, + size_t pkt_len) +{ + struct iov_iter *iov_iter; - skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); - if (!skb) - return NULL; + if (!info->msg) + return false; + + iov_iter = &info->msg->msg_iter; + + if (iov_iter->iov_offset) + return false; + + /* We can't send whole iov. */ + if (iov_iter->count > pkt_len) + return false; + + /* Check that transport can send data in zerocopy mode. */ + t_ops = virtio_transport_get_ops(info->vsk); + + if (t_ops->can_msgzerocopy) { + int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); + int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); + + /* +1 is for packet header. */ + return t_ops->can_msgzerocopy(pages_to_send + 1); + } + + return true; +} + +static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, + struct sk_buff *skb, + struct msghdr *msg, + bool zerocopy) +{ + struct ubuf_info *uarg; + + if (msg->msg_ubuf) { + uarg = msg->msg_ubuf; + net_zcopy_get(uarg); + } else { + struct iov_iter *iter = &msg->msg_iter; + struct ubuf_info_msgzc *uarg_zc; + + uarg = msg_zerocopy_realloc(sk_vsock(vsk), + iter->count, + NULL); + if (!uarg) + return -1; + + uarg_zc = uarg_to_msgzc(uarg); + uarg_zc->zerocopy = zerocopy ? 1 : 0; + } + + skb_zcopy_init(skb, uarg); + + return 0; +} + +static int virtio_transport_fill_skb(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t len, + bool zcopy) +{ + if (zcopy) + return __zerocopy_sg_from_iter(info->msg, NULL, skb, + &info->msg->msg_iter, + len); + + return memcpy_from_msg(skb_put(skb, len), info->msg, len); +} + +static void virtio_transport_init_hdr(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t payload_len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_vsock_hdr *hdr; hdr = virtio_vsock_hdr(skb); hdr->type = cpu_to_le16(info->type); @@ -67,43 +129,28 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, hdr->src_port = cpu_to_le32(src_port); hdr->dst_port = cpu_to_le32(dst_port); hdr->flags = cpu_to_le32(info->flags); - hdr->len = cpu_to_le32(len); - - if (info->msg && len > 0) { - payload = skb_put(skb, len); - err = memcpy_from_msg(payload, info->msg, len); - if (err) - goto out; - - if (msg_data_left(info->msg) == 0 && - info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); - - if (info->msg->msg_flags & MSG_EOR) - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); - } - } + hdr->len = cpu_to_le32(payload_len); +} - if (info->reply) - virtio_vsock_skb_set_reply(skb); +static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, + void *dst, + size_t len) +{ + struct iov_iter iov_iter = { 0 }; + struct kvec kvec; + size_t to_copy; - trace_virtio_transport_alloc_pkt(src_cid, src_port, - dst_cid, dst_port, - len, - info->type, - info->op, - info->flags); + kvec.iov_base = dst; + kvec.iov_len = len; - if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) { - WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n"); - goto out; - } + iov_iter.iter_type = ITER_KVEC; + iov_iter.kvec = &kvec; + iov_iter.nr_segs = 1; - return skb; + to_copy = min_t(size_t, len, skb->len); -out: - kfree_skb(skb); - return NULL; + skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &iov_iter, to_copy); } /* Packet capture */ @@ -114,7 +161,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct af_vsockmon_hdr *hdr; struct sk_buff *skb; size_t payload_len; - void *payload_buf; /* A packet could be split to fit the RX buffer, so we can retrieve * the payload length from the header and the buffer pointer taking @@ -122,7 +168,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) */ pkt_hdr = virtio_vsock_hdr(pkt); payload_len = pkt->len; - payload_buf = pkt->data; skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, GFP_ATOMIC); @@ -165,7 +210,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); if (payload_len) { - skb_put_data(skb, payload_buf, payload_len); + if (skb_is_nonlinear(pkt)) { + void *data = skb_put(skb, payload_len); + + virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); + } else { + skb_put_data(skb, pkt->data, payload_len); + } } return skb; @@ -189,6 +240,82 @@ static u16 virtio_transport_get_type(struct sock *sk) return VIRTIO_VSOCK_TYPE_SEQPACKET; } +/* Returns new sk_buff on success, otherwise returns NULL. */ +static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, + size_t payload_len, + bool zcopy, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct vsock_sock *vsk; + struct sk_buff *skb; + size_t skb_len; + + skb_len = VIRTIO_VSOCK_SKB_HEADROOM; + + if (!zcopy) + skb_len += payload_len; + + skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); + if (!skb) + return NULL; + + virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, + dst_cid, dst_port); + + vsk = info->vsk; + + /* If 'vsk' != NULL then payload is always present, so we + * will never call '__zerocopy_sg_from_iter()' below without + * setting skb owner in 'skb_set_owner_w()'. The only case + * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message + * without payload. + */ + WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); + + /* Set owner here, because '__zerocopy_sg_from_iter()' uses + * owner of skb without check to update 'sk_wmem_alloc'. + */ + if (vsk) + skb_set_owner_w(skb, sk_vsock(vsk)); + + if (info->msg && payload_len > 0) { + int err; + + err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); + if (err) + goto out; + + if (msg_data_left(info->msg) == 0 && + info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { + struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); + + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + + if (info->msg->msg_flags & MSG_EOR) + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + } + } + + if (info->reply) + virtio_vsock_skb_set_reply(skb); + + trace_virtio_transport_alloc_pkt(src_cid, src_port, + dst_cid, dst_port, + payload_len, + info->type, + info->op, + info->flags, + zcopy); + + return skb; +out: + kfree_skb(skb); + return NULL; +} + /* This function can only be used on connecting/connected sockets, * since a socket assigned to a transport is required. * @@ -197,10 +324,12 @@ static u16 virtio_transport_get_type(struct sock *sk) static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt_info *info) { + u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; u32 src_cid, src_port, dst_cid, dst_port; const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; u32 pkt_len = info->pkt_len; + bool can_zcopy = false; u32 rest_len; int ret; @@ -229,15 +358,30 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) return pkt_len; + if (info->msg) { + /* If zerocopy is not enabled by 'setsockopt()', we behave as + * there is no MSG_ZEROCOPY flag set. + */ + if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) + info->msg->msg_flags &= ~MSG_ZEROCOPY; + + if (info->msg->msg_flags & MSG_ZEROCOPY) + can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); + + if (can_zcopy) + max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, + (MAX_SKB_FRAGS * PAGE_SIZE)); + } + rest_len = pkt_len; do { struct sk_buff *skb; size_t skb_len; - skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len); + skb_len = min(max_skb_len, rest_len); - skb = virtio_transport_alloc_skb(info, skb_len, + skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, src_cid, src_port, dst_cid, dst_port); if (!skb) { @@ -245,6 +389,21 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } + /* We process buffer part by part, allocating skb on + * each iteration. If this is last skb for this buffer + * and MSG_ZEROCOPY mode is in use - we must allocate + * completion for the current syscall. + */ + if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && + skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { + if (virtio_transport_init_zcopy_skb(vsk, skb, + info->msg, + can_zcopy)) { + ret = -ENOMEM; + break; + } + } + virtio_transport_inc_tx_pkt(vvs, skb); ret = t_ops->send_pkt(skb); @@ -364,9 +523,10 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; @@ -410,25 +570,27 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, while (total < len && !skb_queue_empty(&vvs->rx_queue)) { skb = skb_peek(&vvs->rx_queue); - bytes = len - total; - if (bytes > skb->len) - bytes = skb->len; + bytes = min_t(size_t, len - total, + skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, + VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; spin_lock_bh(&vvs->rx_lock); total += bytes; - skb_pull(skb, bytes); - if (skb->len == 0) { + VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; + + if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); virtio_transport_dec_rx_pkt(vvs, pkt_len); @@ -492,9 +654,10 @@ virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) return err; @@ -553,11 +716,13 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, int err; /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes_to_copy); + err = skb_copy_datagram_iter(skb, 0, + &msg->msg_iter, + bytes_to_copy); if (err) { /* Copy of message failed. Rest of * fragments will be freed without copy. @@ -954,7 +1119,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!t) return -ENOTCONN; - reply = virtio_transport_alloc_skb(&info, 0, + reply = virtio_transport_alloc_skb(&info, 0, false, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), le64_to_cpu(hdr->src_cid), diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 55f8b9b0e06d..7482d0aca504 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1228,7 +1228,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->dev = dev; xs->zc = xs->umem->zc; - xs->sg = !!(flags & XDP_USE_SG); + xs->sg = !!(xs->umem->flags & XDP_UMEM_SG_FLAG); xs->queue_id = qid; xp_add_xsk(xs->pool, xs); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index b3f7b310811e..49cb9f9a09be 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -170,6 +170,9 @@ int xp_assign_dev(struct xsk_buff_pool *pool, if (err) return err; + if (flags & XDP_USE_SG) + pool->umem->flags |= XDP_UMEM_SG_FLAG; + if (flags & XDP_USE_NEED_WAKEUP) pool->uses_need_wakeup = true; /* Tx needs to be explicitly woken up the first time. Also diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d24b4d4f620e..5cdd3bca3637 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2566,7 +2566,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { memset_after(xdst, 0, u.dst); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4ccf4236031c..90af76fa9dd8 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -169,12 +169,16 @@ endif TPROGS_CFLAGS += -Wall -O2 TPROGS_CFLAGS += -Wmissing-prototypes TPROGS_CFLAGS += -Wstrict-prototypes +TPROGS_CFLAGS += $(call try-run,\ + printf "int main() { return 0; }" |\ + $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include TPROGS_CFLAGS += -I$(srctree)/tools/perf +TPROGS_CFLAGS += -I$(srctree)/tools/lib TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 ifdef SYSROOT @@ -314,6 +318,9 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \ $(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) $(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h +# Override includes for trace_helpers.o because __must_check won't be defined +# in our include path. +$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check= -include $(BPF_SAMPLES_PATH)/Makefile.target diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 7a788bb837fc..7a09ac74fac0 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -17,9 +17,9 @@ static void usage(const char *cmd) { - printf("USAGE: %s [-i num_progs] [-h]\n", cmd); - printf(" -i num_progs # number of progs of the test\n"); - printf(" -h # help\n"); + printf("USAGE: %s [-i nr_tests] [-h]\n", cmd); + printf(" -i nr_tests # rounds of test to run\n"); + printf(" -h # help\n"); } static void verify_map(int map_id) @@ -45,14 +45,14 @@ static void verify_map(int map_id) } } -static int test(char *filename, int num_progs) +static int test(char *filename, int nr_tests) { - int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0; - struct bpf_link *links[num_progs * 4]; - struct bpf_object *objs[num_progs]; + int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0; + struct bpf_link **links = NULL; + struct bpf_object *objs[nr_tests]; struct bpf_program *prog; - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { objs[i] = bpf_object__open_file(filename, NULL); if (libbpf_get_error(objs[i])) { fprintf(stderr, "opening BPF object file failed\n"); @@ -60,6 +60,19 @@ static int test(char *filename, int num_progs) goto cleanup; } + /* One-time initialization */ + if (!links) { + int nr_progs = 0; + + bpf_object__for_each_program(prog, objs[i]) + nr_progs += 1; + + links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *)); + + if (!links) + goto cleanup; + } + /* load BPF program */ if (bpf_object__load(objs[i])) { fprintf(stderr, "loading BPF object file failed\n"); @@ -101,14 +114,18 @@ static int test(char *filename, int num_progs) close(fd); /* verify the map */ - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { verify_map(map0_fds[i]); verify_map(map1_fds[i]); } cleanup: - for (j--; j >= 0; j--) - bpf_link__destroy(links[j]); + if (links) { + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + free(links); + } for (i--; i >= 0; i--) bpf_object__close(objs[i]); @@ -117,13 +134,13 @@ cleanup: int main(int argc, char **argv) { - int opt, num_progs = 1; + int opt, nr_tests = 1; char filename[256]; while ((opt = getopt(argc, argv, "i:h")) != -1) { switch (opt) { case 'i': - num_progs = atoi(optarg); + nr_tests = atoi(optarg); break; case 'h': default: @@ -134,5 +151,5 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - return test(filename, num_progs); + return test(filename, nr_tests); } diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index bd015ec9847b..2ce900f66d6e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -36,11 +36,14 @@ CGROUP COMMANDS | **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | | **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | | **cgroup_inet4_connect** | **cgroup_inet6_connect** | -| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** | +| **cgroup_unix_connect** | **cgroup_inet4_getpeername** | +| **cgroup_inet6_getpeername** | **cgroup_unix_getpeername** | | **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | -| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** | +| **cgroup_unix_getsockname** | **cgroup_udp4_sendmsg** | +| **cgroup_udp6_sendmsg** | **cgroup_unix_sendmsg** | | **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | -| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** | +| **cgroup_unix_recvmsg** | **cgroup_sysctl** | +| **cgroup_getsockopt** | **cgroup_setsockopt** | | **cgroup_inet_sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } @@ -102,21 +105,28 @@ DESCRIPTION **post_bind6** return from bind(2) for an inet6 socket (since 4.17); **connect4** call to connect(2) for an inet4 socket (since 4.17); **connect6** call to connect(2) for an inet6 socket (since 4.17); + **connect_unix** call to connect(2) for a unix socket (since 6.7); **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp4 socket (since 4.18); **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp6 socket (since 4.18); + **sendmsg_unix** call to sendto(2), sendmsg(2), sendmmsg(2) for + an unconnected unix socket (since 6.7); **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp4 socket (since 5.2); **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp6 socket (since 5.2); + **recvmsg_unix** call to recvfrom(2), recvmsg(2), recvmmsg(2) for + an unconnected unix socket (since 6.7); **sysctl** sysctl access (since 5.2); **getsockopt** call to getsockopt (since 5.3); **setsockopt** call to setsockopt (since 5.3); **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8); **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); + **getpeername_unix** call to getpeername(2) for a unix socket (since 6.7); **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). + **getsockname_unix** call to getsockname(2) for a unix socket (since 6.7); **sock_release** closing an userspace inet socket (since 5.9). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index dcae81bd27ed..58e6a5b10ef7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -47,9 +47,11 @@ PROG COMMANDS | **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** | | **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | -| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | -| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | -| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | +| **cgroup/connect4** | **cgroup/connect6** | **cgroup/connect_unix** | +| **cgroup/getpeername4** | **cgroup/getpeername6** | **cgroup/getpeername_unix** | +| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/getsockname_unix** | +| **cgroup/sendmsg4** | **cgroup/sendmsg6** | **cgroup/sendmsg_unix** | +| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/recvmsg_unix** | **cgroup/sysctl** | | **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 085bf18f3659..6e4f7ce6bc01 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -480,13 +480,13 @@ _bpftool() action tracepoint raw_tracepoint \ xdp perf_event cgroup/skb cgroup/sock \ cgroup/dev lwt_in lwt_out lwt_xmit \ - lwt_seg6local sockops sk_skb sk_msg \ - lirc_mode2 cgroup/bind4 cgroup/bind6 \ - cgroup/connect4 cgroup/connect6 \ - cgroup/getpeername4 cgroup/getpeername6 \ - cgroup/getsockname4 cgroup/getsockname6 \ - cgroup/sendmsg4 cgroup/sendmsg6 \ - cgroup/recvmsg4 cgroup/recvmsg6 \ + lwt_seg6local sockops sk_skb sk_msg lirc_mode2 \ + cgroup/bind4 cgroup/bind6 \ + cgroup/connect4 cgroup/connect6 cgroup/connect_unix \ + cgroup/getpeername4 cgroup/getpeername6 cgroup/getpeername_unix \ + cgroup/getsockname4 cgroup/getsockname6 cgroup/getsockname_unix \ + cgroup/sendmsg4 cgroup/sendmsg6 cgroup/sendmsg_unix \ + cgroup/recvmsg4 cgroup/recvmsg6 cgroup/recvmsg_unix \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ cgroup/setsockopt cgroup/sock_release struct_ops \ diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index ac846b0805b4..af6898c0f388 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -28,13 +28,15 @@ " cgroup_device | cgroup_inet4_bind |\n" \ " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \ " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \ - " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \ - " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \ - " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \ - " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \ - " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \ - " cgroup_getsockopt | cgroup_setsockopt |\n" \ - " cgroup_inet_sock_release }" + " cgroup_inet6_connect | cgroup_unix_connect |\n" \ + " cgroup_inet4_getpeername | cgroup_inet6_getpeername |\n" \ + " cgroup_unix_getpeername | cgroup_inet4_getsockname |\n" \ + " cgroup_inet6_getsockname | cgroup_unix_getsockname |\n" \ + " cgroup_udp4_sendmsg | cgroup_udp6_sendmsg |\n" \ + " cgroup_unix_sendmsg | cgroup_udp4_recvmsg |\n" \ + " cgroup_udp6_recvmsg | cgroup_unix_recvmsg |\n" \ + " cgroup_sysctl | cgroup_getsockopt |\n" \ + " cgroup_setsockopt | cgroup_inet_sock_release }" static unsigned int query_flags; static struct btf *btf_vmlinux; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 2883660d6b67..ee3ce2b8000d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -708,17 +708,22 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h codegen("\ \n\ - skel->%1$s = skel_prep_map_data((void *)\"\\ \n\ - ", ident); + { \n\ + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + "); mmap_data = bpf_map__initial_value(map, &mmap_size); print_hex(mmap_data, mmap_size); codegen("\ \n\ - \", %1$zd, %2$zd); \n\ - if (!skel->%3$s) \n\ - goto cleanup; \n\ - skel->maps.%3$s.initial_value = (__u64) (long) skel->%3$s;\n\ - ", bpf_map_mmap_sz(map), mmap_size, ident); + \"; \n\ + \n\ + skel->%1$s = skel_prep_map_data((void *)data, %2$zd,\n\ + sizeof(data) - 1);\n\ + if (!skel->%1$s) \n\ + goto cleanup; \n\ + skel->maps.%1$s.initial_value = (__u64) (long) skel->%1$s;\n\ + } \n\ + ", ident, bpf_map_mmap_sz(map)); } codegen("\ \n\ @@ -733,32 +738,30 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h { \n\ struct bpf_load_and_run_opts opts = {}; \n\ int err; \n\ - \n\ - opts.ctx = (struct bpf_loader_ctx *)skel; \n\ - opts.data_sz = %2$d; \n\ - opts.data = (void *)\"\\ \n\ + static const char opts_data[] __attribute__((__aligned__(8))) = \"\\\n\ ", - obj_name, opts.data_sz); + obj_name); print_hex(opts.data, opts.data_sz); codegen("\ \n\ \"; \n\ + static const char opts_insn[] __attribute__((__aligned__(8))) = \"\\\n\ "); - - codegen("\ - \n\ - opts.insns_sz = %d; \n\ - opts.insns = (void *)\"\\ \n\ - ", - opts.insns_sz); print_hex(opts.insns, opts.insns_sz); codegen("\ \n\ \"; \n\ + \n\ + opts.ctx = (struct bpf_loader_ctx *)skel; \n\ + opts.data_sz = sizeof(opts_data) - 1; \n\ + opts.data = (void *)opts_data; \n\ + opts.insns_sz = sizeof(opts_insn) - 1; \n\ + opts.insns = (void *)opts_insn; \n\ + \n\ err = bpf_load_and_run(&opts); \n\ if (err < 0) \n\ return err; \n\ - ", obj_name); + "); bpf_object__for_each_map(map, obj) { const char *mmap_flags; @@ -1209,7 +1212,7 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \n\ - s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ + s->data = %1$s__elf_bytes(&s->data_sz); \n\ \n\ obj->skeleton = s; \n\ return 0; \n\ @@ -1218,12 +1221,12 @@ static int do_skeleton(int argc, char **argv) return err; \n\ } \n\ \n\ - static inline const void *%2$s__elf_bytes(size_t *sz) \n\ + static inline const void *%1$s__elf_bytes(size_t *sz) \n\ { \n\ - *sz = %1$d; \n\ - return (const void *)\"\\ \n\ - " - , file_sz, obj_name); + static const char data[] __attribute__((__aligned__(8))) = \"\\\n\ + ", + obj_name + ); /* embed contents of BPF object file */ print_hex(obj_data, file_sz); @@ -1231,6 +1234,9 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ \"; \n\ + \n\ + *sz = sizeof(data) - 1; \n\ + return (const void *)data; \n\ } \n\ \n\ #ifdef __cplusplus \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 2e5c231e08ac..4b1407b05056 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -265,6 +265,7 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_bool_field(json_wtr, "retprobe", info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN); jsonw_uint_field(json_wtr, "func_cnt", info->kprobe_multi.count); + jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed); jsonw_name(json_wtr, "funcs"); jsonw_start_array(json_wtr); addrs = u64_to_ptr(info->kprobe_multi.addrs); @@ -301,6 +302,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_string_field(wtr, "func", u64_to_ptr(info->perf_event.kprobe.func_name)); jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset); + jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed); } static void @@ -641,6 +643,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) else printf("\n\tkprobe.multi "); printf("func_cnt %u ", info->kprobe_multi.count); + if (info->kprobe_multi.missed) + printf("missed %llu ", info->kprobe_multi.missed); addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs); qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64); @@ -683,6 +687,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info) printf("%s", buf); if (info->perf_event.kprobe.offset) printf("+%#x", info->perf_event.kprobe.offset); + if (info->perf_event.kprobe.missed) + printf(" missed %llu", info->perf_event.kprobe.missed); printf(" "); } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8443a149dd17..7ec4f5671e7a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2475,9 +2475,10 @@ static int do_help(int argc, char **argv) " sk_reuseport | flow_dissector | cgroup/sysctl |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" - " cgroup/getpeername4 | cgroup/getpeername6 |\n" - " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" - " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" + " cgroup/connect_unix | cgroup/getpeername4 | cgroup/getpeername6 |\n" + " cgroup/getpeername_unix | cgroup/getsockname4 | cgroup/getsockname6 |\n" + " cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n" + " cgroup/sendmsg°unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0448700890f7..7ba61b75bc0e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -932,7 +932,14 @@ enum bpf_map_type { */ BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs + * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE + + * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * deprecated. + */ + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, @@ -1040,6 +1047,11 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, __MAX_BPF_ATTACH_TYPE }; @@ -2697,8 +2709,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: @@ -2936,8 +2948,8 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. * It supports the same set of *optname*\ s that is supported by @@ -3257,6 +3269,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -5089,6 +5106,8 @@ union bpf_attr { * **BPF_F_TIMER_ABS** * Start the timer in absolute expire value instead of the * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. * * Return * 0 on success. @@ -6525,6 +6544,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ @@ -6540,6 +6560,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ @@ -6953,6 +6974,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6965,6 +6987,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -6999,6 +7022,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -7300,9 +7326,11 @@ struct bpf_core_relo { * Flags to control bpf_timer_start() behaviour. * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. */ enum { BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), }; /* BPF numbers iterator state */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index c1634b95c223..2943a151d4f1 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -38,11 +38,27 @@ enum netdev_xdp_act { NETDEV_XDP_ACT_MASK = 127, }; +/** + * enum netdev_xdp_rx_metadata + * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW + * timestamp via bpf_xdp_metadata_rx_timestamp(). + * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet + * hash via bpf_xdp_metadata_rx_hash(). + */ +enum netdev_xdp_rx_metadata { + NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, + NETDEV_XDP_RX_METADATA_HASH = 2, + + /* private: */ + NETDEV_XDP_RX_METADATA_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, + NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index bbab9ad9dc5a..77ceea575dc7 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -181,6 +181,7 @@ enum libbpf_tristate { #define __ksym __attribute__((section(".ksyms"))) #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) +#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) #define bpf_ksym_exists(sym) ({ \ _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 3803479dbe10..1c13f8e88833 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -362,8 +362,6 @@ struct pt_regs___arm64 { #define __PT_PARM7_REG a6 #define __PT_PARM8_REG a7 -/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ -#define PT_REGS_SYSCALL_REGS(ctx) ctx #define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 8484b563b53d..ee95fd379d4d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -448,6 +448,165 @@ static int btf_parse_type_sec(struct btf *btf) return 0; } +static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id) +{ + const char *s; + + s = btf__str_by_offset(btf, str_off); + if (!s) { + pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id) +{ + const struct btf_type *t; + + t = btf__type_by_id(btf, id); + if (!t) { + pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id) +{ + __u32 kind = btf_kind(t); + int err, i, n; + + err = btf_validate_str(btf, t->name_off, "type name", id); + if (err) + return err; + + switch (kind) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + break; + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + + err = btf_validate_id(btf, a->type, id); + err = err ?: btf_validate_id(btf, a->index_type, id); + if (err) + return err; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "field name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *m = btf_enum(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *m = btf_enum64(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_FUNC: { + const struct btf_type *ft; + + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + ft = btf__type_by_id(btf, t->type); + if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) { + pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type); + return -EINVAL; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *m = btf_params(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "param name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *m = btf_var_secinfos(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + default: + pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); + return -EINVAL; + } + return 0; +} + +/* Validate basic sanity of BTF. It's intentionally less thorough than + * kernel's validation and validates only properties of BTF that libbpf relies + * on to be correct (e.g., valid type IDs, valid string offsets, etc) + */ +static int btf_sanity_check(const struct btf *btf) +{ + const struct btf_type *t; + __u32 i, n = btf__type_cnt(btf); + int err; + + for (i = 1; i < n; i++) { + t = btf_type_by_id(btf, i); + err = btf_validate_type(btf, t, i); + if (err) + return err; + } + return 0; +} + __u32 btf__type_cnt(const struct btf *btf) { return btf->start_id + btf->nr_types; @@ -902,6 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) err = btf_parse_str_sec(btf); err = err ?: btf_parse_type_sec(btf); + err = err ?: btf_sanity_check(btf); if (err) goto done; diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 9d0296c1726a..2a158e8a8b7c 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <libelf.h> #include <gelf.h> #include <fcntl.h> @@ -10,6 +13,17 @@ #define STRERR_BUFSIZE 128 +/* A SHT_GNU_versym section holds 16-bit words. This bit is set if + * the symbol is hidden and can only be seen when referenced using an + * explicit version number. This is a GNU extension. + */ +#define VERSYM_HIDDEN 0x8000 + +/* This is the mask for the rest of the data in a word read from a + * SHT_GNU_versym section. + */ +#define VERSYM_VERSION 0x7fff + int elf_open(const char *binary_path, struct elf_fd *elf_fd) { char errmsg[STRERR_BUFSIZE]; @@ -64,13 +78,18 @@ struct elf_sym { const char *name; GElf_Sym sym; GElf_Shdr sh; + int ver; + bool hidden; }; struct elf_sym_iter { Elf *elf; Elf_Data *syms; + Elf_Data *versyms; + Elf_Data *verdefs; size_t nr_syms; size_t strtabidx; + size_t verdef_strtabidx; size_t next_sym_idx; struct elf_sym sym; int st_type; @@ -111,6 +130,26 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter, iter->nr_syms = iter->syms->d_size / sh.sh_entsize; iter->elf = elf; iter->st_type = st_type; + + /* Version symbol table is meaningful to dynsym only */ + if (sh_type != SHT_DYNSYM) + return 0; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL); + if (!scn) + return 0; + iter->versyms = elf_getdata(scn, 0); + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); + if (!scn) { + pr_debug("elf: failed to find verdef ELF sections in '%s'\n", binary_path); + return -ENOENT; + } + if (!gelf_getshdr(scn, &sh)) + return -EINVAL; + iter->verdef_strtabidx = sh.sh_link; + iter->verdefs = elf_getdata(scn, 0); + return 0; } @@ -119,6 +158,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) struct elf_sym *ret = &iter->sym; GElf_Sym *sym = &ret->sym; const char *name = NULL; + GElf_Versym versym; Elf_Scn *sym_scn; size_t idx; @@ -138,12 +178,80 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) iter->next_sym_idx = idx + 1; ret->name = name; + ret->ver = 0; + ret->hidden = false; + + if (iter->versyms) { + if (!gelf_getversym(iter->versyms, idx, &versym)) + continue; + ret->ver = versym & VERSYM_VERSION; + ret->hidden = versym & VERSYM_HIDDEN; + } return ret; } return NULL; } +static const char *elf_get_vername(struct elf_sym_iter *iter, int ver) +{ + GElf_Verdaux verdaux; + GElf_Verdef verdef; + int offset; + + offset = 0; + while (gelf_getverdef(iter->verdefs, offset, &verdef)) { + if (verdef.vd_ndx != ver) { + if (!verdef.vd_next) + break; + + offset += verdef.vd_next; + continue; + } + + if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux)) + break; + + return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name); + + } + return NULL; +} + +static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym, + const char *name, size_t name_len, const char *lib_ver) +{ + const char *ver_name; + + /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER + * make sure the func part matches the user specified name + */ + if (strncmp(sym->name, name, name_len) != 0) + return false; + + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (sym->name[name_len] != '\0' && sym->name[name_len] != '@') + return false; + + /* If user does not specify symbol version, then we got a match */ + if (!lib_ver) + return true; + + /* If user specifies symbol version, for dynamic symbols, + * get version name from ELF verdef section for comparison. + */ + if (sh_type == SHT_DYNSYM) { + ver_name = elf_get_vername(iter, sym->ver); + if (!ver_name) + return false; + return strcmp(ver_name, lib_ver) == 0; + } + + /* For normal symbols, it is already in form of func@LIB_VER */ + return strcmp(sym->name, name) == 0; +} /* Transform symbol's virtual address (absolute for binaries and relative * for shared libs) into file offset, which is what kernel is expecting @@ -166,7 +274,8 @@ static unsigned long elf_sym_offset(struct elf_sym *sym) long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) { int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; - bool is_shared_lib, is_name_qualified; + const char *at_symbol, *lib_ver; + bool is_shared_lib; long ret = -ENOENT; size_t name_len; GElf_Ehdr ehdr; @@ -179,9 +288,18 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) /* for shared lib case, we do not need to calculate relative offset */ is_shared_lib = ehdr.e_type == ET_DYN; - name_len = strlen(name); - /* Does name specify "@@LIB"? */ - is_name_qualified = strstr(name, "@@") != NULL; + /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */ + at_symbol = strchr(name, '@'); + if (at_symbol) { + name_len = at_symbol - name; + /* skip second @ if it's @@LIB_VER case */ + if (at_symbol[1] == '@') + at_symbol++; + lib_ver = at_symbol + 1; + } else { + name_len = strlen(name); + lib_ver = NULL; + } /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically @@ -201,20 +319,17 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) goto out; while ((sym = elf_sym_iter_next(&iter))) { - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ - if (strncmp(sym->name, name, name_len) != 0) - continue; - /* ...but we don't want a search for "foo" to match 'foo2" also, so any - * additional characters in sname should be of the form "@@LIB". - */ - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') + if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver)) continue; cur_bind = GELF_ST_BIND(sym->sym.st_info); if (ret > 0) { /* handle multiple matches */ - if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { + if (elf_sym_offset(sym) == ret) { + /* same offset, no problem */ + continue; + } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { /* Only accept one non-weak bind. */ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", sym->name, name, binary_path); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 96ff1aa4bf6a..a295f6acf98f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -82,17 +82,22 @@ static const char * const attach_type_name[] = { [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", @@ -436,9 +441,11 @@ struct bpf_program { int fd; bool autoload; bool autoattach; + bool sym_global; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int exception_cb_idx; int prog_ifindex; __u32 attach_btf_obj_fd; @@ -765,6 +772,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->type = BPF_PROG_TYPE_UNSPEC; prog->fd = -1; + prog->exception_cb_idx = -1; /* libbpf's convention for SEC("?abc...") is that it's just like * SEC("abc...") but the corresponding bpf_program starts out with @@ -871,14 +879,16 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) + prog->sym_global = true; + /* if function is a global/weak symbol, but has restricted * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL - && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN - || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) + if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -3142,6 +3152,86 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } } + if (!kernel_supports(obj, FEAT_BTF_DECL_TAG)) + goto skip_exception_cb; + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *prog = &obj->programs[i]; + int j, k, n; + + if (prog_is_subprog(obj, prog)) + continue; + n = btf__type_cnt(obj->btf); + for (j = 1; j < n; j++) { + const char *str = "exception_callback:", *name; + size_t len = strlen(str); + struct btf_type *t; + + t = btf_type_by_id(obj->btf, j); + if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__str_by_offset(obj->btf, t->name_off); + if (strncmp(name, str, len)) + continue; + + t = btf_type_by_id(obj->btf, t->type); + if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", + prog->name); + return -EINVAL; + } + if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off))) + continue; + /* Multiple callbacks are specified for the same prog, + * the verifier will eventually return an error for this + * case, hence simply skip appending a subprog. + */ + if (prog->exception_cb_idx >= 0) { + prog->exception_cb_idx = -1; + break; + } + + name += len; + if (str_is_empty(name)) { + pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", + prog->name); + return -EINVAL; + } + + for (k = 0; k < obj->nr_programs; k++) { + struct bpf_program *subprog = &obj->programs[k]; + + if (!prog_is_subprog(obj, subprog)) + continue; + if (strcmp(name, subprog->name)) + continue; + /* Enforce non-hidden, as from verifier point of + * view it expects global functions, whereas the + * mark_btf_static fixes up linkage as static. + */ + if (!subprog->sym_global || subprog->mark_btf_static) { + pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", + prog->name, subprog->name); + return -EINVAL; + } + /* Let's see if we already saw a static exception callback with the same name */ + if (prog->exception_cb_idx >= 0) { + pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", + prog->name, subprog->name); + return -EINVAL; + } + prog->exception_cb_idx = k; + break; + } + + if (prog->exception_cb_idx >= 0) + continue; + pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); + return -ENOENT; + } + } +skip_exception_cb: + sanitize = btf_needs_sanitization(obj); if (sanitize) { const void *raw_data; @@ -6235,13 +6325,45 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra } static int +bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, + struct bpf_program *subprog) +{ + struct bpf_insn *insns; + size_t new_cnt; + int err; + + subprog->sub_insn_off = main_prog->insns_cnt; + + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; + + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); + + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; + return 0; +} + +static int bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *prog) { - size_t sub_insn_idx, insn_idx, new_cnt; + size_t sub_insn_idx, insn_idx; struct bpf_program *subprog; - struct bpf_insn *insns, *insn; struct reloc_desc *relo; + struct bpf_insn *insn; int err; err = reloc_prog_func_and_line_info(obj, main_prog, prog); @@ -6316,25 +6438,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * and relocate. */ if (subprog->sub_insn_off == 0) { - subprog->sub_insn_off = main_prog->insns_cnt; - - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; - - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); - - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); - - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); + err = bpf_object__append_subprog_code(obj, main_prog, subprog); if (err) return err; err = bpf_object__reloc_code(obj, main_prog, subprog); @@ -6568,6 +6672,25 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog->name, err); return err; } + + /* Now, also append exception callback if it has not been done already. */ + if (prog->exception_cb_idx >= 0) { + struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; + + /* Calling exception callback directly is disallowed, which the + * verifier will reject later. In case it was processed already, + * we can skip this step, otherwise for all other valid cases we + * have to append exception callback now. + */ + if (subprog->sub_insn_off == 0) { + err = bpf_object__append_subprog_code(obj, prog, subprog); + if (err) + return err; + err = bpf_object__reloc_code(obj, prog, subprog); + if (err) + return err; + } + } } /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { @@ -8842,14 +8965,19 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), @@ -10996,7 +11124,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", &probe_type, &binary_path, &func_name); switch (n) { case 1: @@ -11506,14 +11634,14 @@ err_out: static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); - char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; - int n, ret = -EINVAL; + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; + int n, c, ret = -EINVAL; long offset = 0; *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", - &probe_type, &binary_path, &func_name, &offset); + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); switch (n) { case 1: /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ @@ -11524,7 +11652,17 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf prog->name, prog->sec_name); break; case 3: - case 4: + /* check if user specifies `+offset`, if yes, this should be + * the last part of the string, make sure sscanf read to EOL + */ + func_off = strrchr(func_name, '+'); + if (func_off) { + n = sscanf(func_off, "+%li%n", &offset, &c); + if (n == 1 && *(func_off + c) == '\0') + func_off[0] = '\0'; + else + offset = 0; + } opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || strcmp(probe_type, "uretprobe.s") == 0; if (opts.retprobe && offset != 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0e52621cba43..475378438545 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1229,6 +1229,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, /* Ring buffer APIs */ struct ring_buffer; +struct ring; struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); @@ -1249,6 +1250,78 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); +/** + * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given + * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance. + * + * @param rb A ringbuffer manager object. + * @param idx An index into the ringbuffers contained within the ringbuffer + * manager object. The index is 0-based and corresponds to the order in which + * ring_buffer__add was called. + * @return A ringbuffer object on success; NULL and errno set if the index is + * invalid. + */ +LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb, + unsigned int idx); + +/** + * @brief **ring__consumer_pos()** returns the current consumer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current consumer position. + */ +LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r); + +/** + * @brief **ring__producer_pos()** returns the current producer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current producer position. + */ +LIBBPF_API unsigned long ring__producer_pos(const struct ring *r); + +/** + * @brief **ring__avail_data_size()** returns the number of bytes in the + * ringbuffer not yet consumed. This has no locking associated with it, so it + * can be inaccurate if operations are ongoing while this is called. However, it + * should still show the correct trend over the long-term. + * + * @param r A ringbuffer object. + * @return The number of bytes not yet consumed. + */ +LIBBPF_API size_t ring__avail_data_size(const struct ring *r); + +/** + * @brief **ring__size()** returns the total size of the ringbuffer's map data + * area (excluding special producer/consumer pages). Effectively this gives the + * amount of usable bytes of data inside the ringbuffer. + * + * @param r A ringbuffer object. + * @return The total size of the ringbuffer map data area. + */ +LIBBPF_API size_t ring__size(const struct ring *r); + +/** + * @brief **ring__map_fd()** returns the file descriptor underlying the given + * ringbuffer. + * + * @param r A ringbuffer object. + * @return The underlying ringbuffer file descriptor + */ +LIBBPF_API int ring__map_fd(const struct ring *r); + +/** + * @brief **ring__consume()** consumes available ringbuffer data without event + * polling. + * + * @param r A ringbuffer object. + * @return The number of records consumed (or INT_MAX, whichever is less), or + * a negative number if any of the callbacks return an error. + */ +LIBBPF_API int ring__consume(struct ring *r); + struct user_ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatibility */ }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 57712321490f..cc973b678a39 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -400,4 +400,11 @@ LIBBPF_1.3.0 { bpf_program__attach_netfilter; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; + ring__avail_data_size; + ring__consume; + ring__consumer_pos; + ring__map_fd; + ring__producer_pos; + ring__size; + ring_buffer__ring; } LIBBPF_1.2.0; diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 02199364db13..aacb64278a01 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -34,7 +34,7 @@ struct ring { struct ring_buffer { struct epoll_event *events; - struct ring *rings; + struct ring **rings; size_t page_size; int epoll_fd; int ring_cnt; @@ -57,7 +57,7 @@ struct ringbuf_hdr { __u32 pad; }; -static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) +static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r) { if (r->consumer_pos) { munmap(r->consumer_pos, rb->page_size); @@ -67,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); r->producer_pos = NULL; } + + free(r); } /* Add extra RINGBUF maps to this ring buffer manager */ @@ -107,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, return libbpf_err(-ENOMEM); rb->events = tmp; - r = &rb->rings[rb->ring_cnt]; - memset(r, 0, sizeof(*r)); + r = calloc(1, sizeof(*r)); + if (!r) + return libbpf_err(-ENOMEM); + rb->rings[rb->ring_cnt] = r; r->map_fd = map_fd; r->sample_cb = sample_cb; @@ -121,7 +125,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->consumer_pos = tmp; @@ -131,16 +135,16 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, */ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; if (mmap_sz != (__u64)(size_t)mmap_sz) { + err = -E2BIG; pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); - return libbpf_err(-E2BIG); + goto err_out; } tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->producer_pos = tmp; r->data = tmp + rb->page_size; @@ -152,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } rb->ring_cnt++; return 0; + +err_out: + ringbuf_free_ring(rb, r); + return libbpf_err(err); } void ring_buffer__free(struct ring_buffer *rb) @@ -170,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb) return; for (i = 0; i < rb->ring_cnt; ++i) - ringbuf_unmap_ring(rb, &rb->rings[i]); + ringbuf_free_ring(rb, rb->rings[i]); if (rb->epoll_fd >= 0) close(rb->epoll_fd); @@ -278,7 +285,7 @@ int ring_buffer__consume(struct ring_buffer *rb) int i; for (i = 0; i < rb->ring_cnt; i++) { - struct ring *ring = &rb->rings[i]; + struct ring *ring = rb->rings[i]; err = ringbuf_process_ring(ring); if (err < 0) @@ -305,7 +312,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) for (i = 0; i < cnt; i++) { __u32 ring_id = rb->events[i].data.fd; - struct ring *ring = &rb->rings[ring_id]; + struct ring *ring = rb->rings[ring_id]; err = ringbuf_process_ring(ring); if (err < 0) @@ -323,6 +330,58 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb) return rb->epoll_fd; } +struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx) +{ + if (idx >= rb->ring_cnt) + return errno = ERANGE, NULL; + + return rb->rings[idx]; +} + +unsigned long ring__consumer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */ + return smp_load_acquire(r->consumer_pos); +} + +unsigned long ring__producer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in + * the kernel. + */ + return smp_load_acquire(r->producer_pos); +} + +size_t ring__avail_data_size(const struct ring *r) +{ + unsigned long cons_pos, prod_pos; + + cons_pos = ring__consumer_pos(r); + prod_pos = ring__producer_pos(r); + return prod_pos - cons_pos; +} + +size_t ring__size(const struct ring *r) +{ + return r->mask + 1; +} + +int ring__map_fd(const struct ring *r) +{ + return r->map_fd; +} + +int ring__consume(struct ring *r) +{ + int64_t res; + + res = ringbuf_process_ring(r); + if (res < 0) + return libbpf_err(res); + + return res > INT_MAX ? INT_MAX : res; +} + static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) { if (rb->consumer_pos) { diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 68b408ca0f7f..b5ffe8cd1144 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -45,12 +45,26 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value) return netdev_xdp_act_strmap[value]; } +static const char * const netdev_xdp_rx_metadata_strmap[] = { + [0] = "timestamp", + [1] = "hash", +}; + +const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value) +{ + value = ffs(value) - 1; + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_rx_metadata_strmap)) + return NULL; + return netdev_xdp_rx_metadata_strmap[value]; +} + /* Policies */ struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, [NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, [NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, }, [NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, }, + [NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, }, }; struct ynl_policy_nest netdev_dev_nest = { @@ -97,6 +111,11 @@ int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; dst->_present.xdp_zc_max_segs = 1; dst->xdp_zc_max_segs = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_DEV_XDP_RX_METADATA_FEATURES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.xdp_rx_metadata_features = 1; + dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr); } } diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 0952d3261f4d..b4351ff34595 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -18,6 +18,7 @@ extern const struct ynl_family ynl_netdev_family; /* Enums */ const char *netdev_op_str(int op); const char *netdev_xdp_act_str(enum netdev_xdp_act value); +const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); /* Common nested types */ /* ============== NETDEV_CMD_DEV_GET ============== */ @@ -48,11 +49,13 @@ struct netdev_dev_get_rsp { __u32 ifindex:1; __u32 xdp_features:1; __u32 xdp_zc_max_segs:1; + __u32 xdp_rx_metadata_features:1; } _present; __u32 ifindex; __u64 xdp_features; __u32 xdp_zc_max_segs; + __u64 xdp_rx_metadata_features; }; void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp); diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index f2db8bb78309..32abbc0af39e 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -4,7 +4,7 @@ include ../Makefile.deps CC=gcc CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ - -I../lib/ -I../generated/ -idirafter $(UAPI_PATH) + -I../../../include/uapi -I../lib/ -I../generated/ -idirafter $(UAPI_PATH) ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan endif diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c index 06433400dddd..b828225daad0 100644 --- a/tools/net/ynl/samples/netdev.c +++ b/tools/net/ynl/samples/netdev.c @@ -32,12 +32,18 @@ static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op) if (!d->_present.xdp_features) return; - printf("%llx:", d->xdp_features); + printf("xdp-features (%llx):", d->xdp_features); for (int i = 0; d->xdp_features > 1U << i; i++) { if (d->xdp_features & (1U << i)) printf(" %s", netdev_xdp_act_str(1 << i)); } + printf(" xdp-rx-metadata-features (%llx):", d->xdp_rx_metadata_features); + for (int i = 0; d->xdp_rx_metadata_features > 1U << i; i++) { + if (d->xdp_rx_metadata_features & (1U << i)) + printf(" %s", netdev_xdp_rx_metadata_str(1 << i)); + } + printf(" xdp-zc-max-segs=%u", d->xdp_zc_max_segs); name = netdev_op_str(op); diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 3babaf3eee5c..5c2cc7e8c5d0 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,5 +1,6 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 +exceptions # JIT does not support calling kfunc bpf_throw: -524 fexit_sleep # The test never returns. The remaining tests cannot start. kprobe_multi_bench_attach # needs CONFIG_FPROBE kprobe_multi_test # needs CONFIG_FPROBE @@ -9,3 +10,4 @@ fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_ma fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 5061d9e24c16..1a63996c0304 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,29 +1,5 @@ # TEMPORARY # Alphabetical order -bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) -bpf_cookie # failed to open_and_load program: -524 (trampoline) -bpf_loop # attaches to __x64_sys_nanosleep -cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) -dynptr/test_dynptr_skb_data -dynptr/test_skb_readonly -fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) +exceptions # JIT does not support calling kfunc bpf_throw (exceptions) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) -iters/testmod_seq* # s390x doesn't support kfuncs in modules yet -kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test # relies on fentry -ksyms_btf/weak_ksyms* # test_ksyms_weak__open_and_load unexpected error: -22 (kfunc) -ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) -ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) -ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) -module_attach # skel_attach skeleton attach failed: -524 (trampoline) -ringbuf # skel_load skeleton load failed (?) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -test_lsm # attach unexpected error: -524 (trampoline) -trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) -trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) -unpriv_bpf_disabled # fentry -user_ringbuf # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3 (?) -verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) -xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) -xdp_metadata # JIT does not support calling kernel function (kfunc) -test_task_under_cgroup # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index caede9b574cb..4225f975fce3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -27,7 +27,11 @@ endif BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= SAN_LDFLAGS ?= $(SAN_CFLAGS) -CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ +RELEASE ?= +OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) +CFLAGS += -g $(OPT_FLAGS) -rdynamic \ + -Wall -Werror \ + $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) @@ -104,7 +108,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ xdp_features -TEST_GEN_FILES += liburandom_read.so urandom_read sign-file +TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); @@ -188,7 +192,7 @@ $(OUTPUT)/%:%.c $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ # LLVM's ld.lld doesn't support all the architectures, so use it only on x86 -ifeq ($(SRCARCH),x86) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv)) LLD := lld else LLD := ld @@ -196,17 +200,20 @@ endif # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. -$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c +$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map $(call msg,LIB,,$@) - $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ - $^ $(filter-out -static,$(LDLIBS)) \ + $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ + $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ + $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ + -Wl,--version-script=liburandom_read.map \ -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ + $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ + $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -Wl,-rpath=. -o $@ @@ -238,7 +245,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ - EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ @@ -276,7 +283,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ @@ -287,7 +294,7 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ OUTPUT=$(BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ @@ -310,7 +317,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers @@ -319,7 +326,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ + EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE= \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ CC="$(HOSTCC)" LD="$(HOSTLD)" \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers @@ -640,7 +647,9 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ -$(OUTPUT)/xskxceiver: xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) +# Include find_bit.c to compile xskxceiver. +EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c +$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 209811b1993a..2c8cb3f61529 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -131,4 +131,331 @@ extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *nod */ extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym; +/* Description + * Allocates a percpu object of the type represented by 'local_type_id' in + * program BTF. User may use the bpf_core_type_id_local macro to pass the + * type ID of a struct in program BTF. + * + * The 'local_type_id' parameter must be a known constant. + * The 'meta' parameter is rewritten by the verifier, no need for BPF + * program to set it. + * Returns + * A pointer to a percpu object of the type corresponding to the passed in + * 'local_type_id', or NULL on failure. + */ +extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym; + +/* Convenience macro to wrap over bpf_percpu_obj_new_impl */ +#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new_impl(bpf_core_type_id_local(type), NULL)) + +/* Description + * Free an allocated percpu object. All fields of the object that require + * destruction will be destructed before the storage is freed. + * + * The 'meta' parameter is rewritten by the verifier, no need for BPF + * program to set it. + * Returns + * Void. + */ +extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym; + +struct bpf_iter_task_vma; + +extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, + struct task_struct *task, + unsigned long addr) __ksym; +extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym; +extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym; + +/* Convenience macro to wrap over bpf_obj_drop_impl */ +#define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL) + +/* Description + * Throw a BPF exception from the program, immediately terminating its + * execution and unwinding the stack. The supplied 'cookie' parameter + * will be the return value of the program when an exception is thrown, + * and the default exception callback is used. Otherwise, if an exception + * callback is set using the '__exception_cb(callback)' declaration tag + * on the main program, the 'cookie' parameter will be the callback's only + * input argument. + * + * Thus, in case of default exception callback, 'cookie' is subjected to + * constraints on the program's return value (as with R0 on exit). + * Otherwise, the return value of the marked exception callback will be + * subjected to the same checks. + * + * Note that throwing an exception with lingering resources (locks, + * references, etc.) will lead to a verification error. + * + * Note that callbacks *cannot* call this helper. + * Returns + * Never. + * Throws + * An exception with the specified 'cookie' value. + */ +extern void bpf_throw(u64 cookie) __ksym; + +/* This macro must be used to mark the exception callback corresponding to the + * main program. For example: + * + * int exception_cb(u64 cookie) { + * return cookie; + * } + * + * SEC("tc") + * __exception_cb(exception_cb) + * int main_prog(struct __sk_buff *ctx) { + * ... + * return TC_ACT_OK; + * } + * + * Here, exception callback for the main program will be 'exception_cb'. Note + * that this attribute can only be used once, and multiple exception callbacks + * specified for the main program will lead to verification error. + */ +#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name))) + +#define __bpf_assert_signed(x) _Generic((x), \ + unsigned long: 0, \ + unsigned long long: 0, \ + signed long: 1, \ + signed long long: 1 \ +) + +#define __bpf_assert_check(LHS, op, RHS) \ + _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \ + _Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n"); \ + _Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert"); \ + _Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression") + +#define __bpf_assert(LHS, op, cons, RHS, VAL) \ + ({ \ + (void)bpf_throw; \ + asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw" \ + : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : ); \ + }) + +#define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign) \ + ({ \ + __bpf_assert_check(LHS, op, RHS); \ + if (__bpf_assert_signed(LHS) && !(supp_sign)) \ + __bpf_assert(LHS, "s" #op, cons, RHS, VAL); \ + else \ + __bpf_assert(LHS, #op, cons, RHS, VAL); \ + }) + +#define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign) \ + ({ \ + if (sizeof(typeof(RHS)) == 8) { \ + const typeof(RHS) rhs_var = (RHS); \ + __bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign); \ + } else { \ + __bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign); \ + } \ + }) + +/* Description + * Assert that a conditional expression is true. + * Returns + * Void. + * Throws + * An exception with the value zero when the assertion fails. + */ +#define bpf_assert(cond) if (!(cond)) bpf_throw(0); + +/* Description + * Assert that a conditional expression is true. + * Returns + * Void. + * Throws + * An exception with the specified value when the assertion fails. + */ +#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value); + +/* Description + * Assert that LHS is equal to RHS. This statement updates the known value + * of LHS during verification. Note that RHS must be a constant value, and + * must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the value zero when the assertion fails. + */ +#define bpf_assert_eq(LHS, RHS) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, ==, RHS, 0, true); \ + }) + +/* Description + * Assert that LHS is equal to RHS. This statement updates the known value + * of LHS during verification. Note that RHS must be a constant value, and + * must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the specified value when the assertion fails. + */ +#define bpf_assert_eq_with(LHS, RHS, value) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, ==, RHS, value, true); \ + }) + +/* Description + * Assert that LHS is less than RHS. This statement updates the known + * bounds of LHS during verification. Note that RHS must be a constant + * value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the value zero when the assertion fails. + */ +#define bpf_assert_lt(LHS, RHS) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, <, RHS, 0, false); \ + }) + +/* Description + * Assert that LHS is less than RHS. This statement updates the known + * bounds of LHS during verification. Note that RHS must be a constant + * value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the specified value when the assertion fails. + */ +#define bpf_assert_lt_with(LHS, RHS, value) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, <, RHS, value, false); \ + }) + +/* Description + * Assert that LHS is greater than RHS. This statement updates the known + * bounds of LHS during verification. Note that RHS must be a constant + * value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the value zero when the assertion fails. + */ +#define bpf_assert_gt(LHS, RHS) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, >, RHS, 0, false); \ + }) + +/* Description + * Assert that LHS is greater than RHS. This statement updates the known + * bounds of LHS during verification. Note that RHS must be a constant + * value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the specified value when the assertion fails. + */ +#define bpf_assert_gt_with(LHS, RHS, value) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, >, RHS, value, false); \ + }) + +/* Description + * Assert that LHS is less than or equal to RHS. This statement updates the + * known bounds of LHS during verification. Note that RHS must be a + * constant value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the value zero when the assertion fails. + */ +#define bpf_assert_le(LHS, RHS) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, <=, RHS, 0, false); \ + }) + +/* Description + * Assert that LHS is less than or equal to RHS. This statement updates the + * known bounds of LHS during verification. Note that RHS must be a + * constant value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the specified value when the assertion fails. + */ +#define bpf_assert_le_with(LHS, RHS, value) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, <=, RHS, value, false); \ + }) + +/* Description + * Assert that LHS is greater than or equal to RHS. This statement updates + * the known bounds of LHS during verification. Note that RHS must be a + * constant value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the value zero when the assertion fails. + */ +#define bpf_assert_ge(LHS, RHS) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, >=, RHS, 0, false); \ + }) + +/* Description + * Assert that LHS is greater than or equal to RHS. This statement updates + * the known bounds of LHS during verification. Note that RHS must be a + * constant value, and must fit within the data type of LHS. + * Returns + * Void. + * Throws + * An exception with the specified value when the assertion fails. + */ +#define bpf_assert_ge_with(LHS, RHS, value) \ + ({ \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, >=, RHS, value, false); \ + }) + +/* Description + * Assert that LHS is in the range [BEG, END] (inclusive of both). This + * statement updates the known bounds of LHS during verification. Note + * that both BEG and END must be constant values, and must fit within the + * data type of LHS. + * Returns + * Void. + * Throws + * An exception with the value zero when the assertion fails. + */ +#define bpf_assert_range(LHS, BEG, END) \ + ({ \ + _Static_assert(BEG <= END, "BEG must be <= END"); \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, >=, BEG, 0, false); \ + __bpf_assert_op(LHS, <=, END, 0, false); \ + }) + +/* Description + * Assert that LHS is in the range [BEG, END] (inclusive of both). This + * statement updates the known bounds of LHS during verification. Note + * that both BEG and END must be constant values, and must fit within the + * data type of LHS. + * Returns + * Void. + * Throws + * An exception with the specified value when the assertion fails. + */ +#define bpf_assert_range_with(LHS, BEG, END, value) \ + ({ \ + _Static_assert(BEG <= END, "BEG must be <= END"); \ + barrier_var(LHS); \ + __bpf_assert_op(LHS, >=, BEG, value, false); \ + __bpf_assert_op(LHS, <=, END, value, false); \ + }) + #endif diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 642dda0e758a..5ca68ff0b59f 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -1,6 +1,8 @@ #ifndef __BPF_KFUNCS__ #define __BPF_KFUNCS__ +struct bpf_sock_addr_kern; + /* Description * Initializes an skb-type dynptr * Returns @@ -41,4 +43,16 @@ extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; +/* Description + * Modify the address of a AF_UNIX sockaddr. + * Returns__bpf_kfunc + * -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified. + */ +extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, + const __u8 *sun_path, __u32 sun_path__sz) __ksym; + +void *bpf_cast_to_kern_ctx(void *) __ksym; + +void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index cefc5dd72573..a5e246f7b202 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -138,6 +138,10 @@ __bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) it->cnt = 0; } +__bpf_kfunc void bpf_kfunc_common_test(void) +{ +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -343,6 +347,7 @@ BTF_SET8_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_kfunc_common_test) BTF_SET8_END(bpf_testmod_common_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h index f5c5b1375c24..7c664dd61059 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -104,4 +104,6 @@ void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); + +void bpf_kfunc_common_test(void) __ksym; #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 2caee8423ee0..5b1da2a32ea7 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -49,6 +49,10 @@ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ CGROUP_WORK_DIR) +static __thread bool cgroup_workdir_mounted; + +static void __cleanup_cgroup_environment(void); + static int __enable_controllers(const char *cgroup_path, const char *controllers) { char path[PATH_MAX + 1]; @@ -195,6 +199,11 @@ int setup_cgroup_environment(void) format_cgroup_path(cgroup_workdir, ""); + if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) { + log_err("mkdir mount"); + return 1; + } + if (unshare(CLONE_NEWNS)) { log_err("unshare"); return 1; @@ -209,9 +218,10 @@ int setup_cgroup_environment(void) log_err("mount cgroup2"); return 1; } + cgroup_workdir_mounted = true; /* Cleanup existing failed runs, now that the environment is setup */ - cleanup_cgroup_environment(); + __cleanup_cgroup_environment(); if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { log_err("mkdir cgroup work dir"); @@ -306,10 +316,25 @@ int join_parent_cgroup(const char *relative_path) } /** + * __cleanup_cgroup_environment() - Delete temporary cgroups + * + * This is a helper for cleanup_cgroup_environment() that is responsible for + * deletion of all temporary cgroups that have been created during the test. + */ +static void __cleanup_cgroup_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_cgroup_path(cgroup_workdir, ""); + join_cgroup_from_top(CGROUP_MOUNT_PATH); + nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); +} + +/** * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment * * This is an idempotent function to delete all temporary cgroups that - * have been created during the test, including the cgroup testing work + * have been created during the test and unmount the cgroup testing work * directory. * * At call time, it moves the calling process to the root cgroup, and then @@ -320,11 +345,10 @@ int join_parent_cgroup(const char *relative_path) */ void cleanup_cgroup_environment(void) { - char cgroup_workdir[PATH_MAX + 1]; - - format_cgroup_path(cgroup_workdir, ""); - join_cgroup_from_top(CGROUP_MOUNT_PATH); - nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); + __cleanup_cgroup_environment(); + if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH)) + log_err("umount cgroup2"); + cgroup_workdir_mounted = false; } /** diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index e41eb33b2704..02dd4409200e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -84,3 +84,4 @@ CONFIG_USERFAULTFD=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y +CONFIG_VSOCKETS=y diff --git a/tools/testing/selftests/bpf/liburandom_read.map b/tools/testing/selftests/bpf/liburandom_read.map new file mode 100644 index 000000000000..38a97a419a04 --- /dev/null +++ b/tools/testing/selftests/bpf/liburandom_read.map @@ -0,0 +1,15 @@ +LIBURANDOM_READ_1.0.0 { + global: + urandlib_api; + urandlib_api_sameoffset; + urandlib_read_without_sema; + urandlib_read_with_sema; + urandlib_read_with_sema_semaphore; + local: + *; +}; + +LIBURANDOM_READ_2.0.0 { + global: + urandlib_api; +} LIBURANDOM_READ_1.0.0; diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c index 16f1671e4bde..66191ae9863c 100644 --- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -33,11 +33,11 @@ static void create_inner_maps(enum bpf_map_type map_type, { int map_fd, map_index, ret; __u32 map_key = 0, map_id; - char map_name[15]; + char map_name[16]; for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) { memset(map_name, 0, sizeof(map_name)); - sprintf(map_name, "inner_map_fd_%d", map_index); + snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index); map_fd = bpf_map_create(map_type, map_name, sizeof(__u32), sizeof(__u32), 1, NULL); CHECK(map_fd < 0, diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index da72a3a66230..6db27a9088e9 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -11,6 +11,7 @@ #include <arpa/inet.h> #include <sys/mount.h> #include <sys/stat.h> +#include <sys/un.h> #include <linux/err.h> #include <linux/in.h> @@ -257,6 +258,26 @@ static int connect_fd_to_addr(int fd, return 0; } +int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type) +{ + int fd; + + fd = socket(addr->ss_family, type, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (connect_fd_to_addr(fd, addr, addrlen, false)) + goto error_close; + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static const struct network_helper_opts default_opts; int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) @@ -380,6 +401,19 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, if (len) *len = sizeof(*sin6); return 0; + } else if (family == AF_UNIX) { + /* Note that we always use abstract unix sockets to avoid having + * to clean up leftover files. + */ + struct sockaddr_un *sun = (void *)addr; + + memset(addr, 0, sizeof(*sun)); + sun->sun_family = family; + sun->sun_path[0] = 0; + strcpy(sun->sun_path + 1, addr_str); + if (len) + *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); + return 0; } return -1; } diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5eccc67d1a99..34f1200a781b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -51,6 +51,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens); void free_fds(int *fds, unsigned int nr_close_fds); +int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type); int connect_to_fd(int server_fd, int timeout_ms); int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index b92770592563..465c1c3a3d3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -6,6 +6,7 @@ struct bpf_reg_match { unsigned int line; + const char *reg; const char *match; }; @@ -39,13 +40,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=2"}, - {1, "R3_w=4"}, - {2, "R3_w=8"}, - {3, "R3_w=16"}, - {4, "R3_w=32"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "2"}, + {1, "R3_w", "4"}, + {2, "R3_w", "8"}, + {3, "R3_w", "16"}, + {4, "R3_w", "32"}, }, }, { @@ -67,19 +68,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=1"}, - {1, "R3_w=2"}, - {2, "R3_w=4"}, - {3, "R3_w=8"}, - {4, "R3_w=16"}, - {5, "R3_w=1"}, - {6, "R4_w=32"}, - {7, "R4_w=16"}, - {8, "R4_w=8"}, - {9, "R4_w=4"}, - {10, "R4_w=2"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "1"}, + {1, "R3_w", "2"}, + {2, "R3_w", "4"}, + {3, "R3_w", "8"}, + {4, "R3_w", "16"}, + {5, "R3_w", "1"}, + {6, "R4_w", "32"}, + {7, "R4_w", "16"}, + {8, "R4_w", "8"}, + {9, "R4_w", "4"}, + {10, "R4_w", "2"}, }, }, { @@ -96,14 +97,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=4"}, - {1, "R3_w=8"}, - {2, "R3_w=10"}, - {3, "R4_w=8"}, - {4, "R4_w=12"}, - {5, "R4_w=14"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "4"}, + {1, "R3_w", "8"}, + {2, "R3_w", "10"}, + {3, "R4_w", "8"}, + {4, "R4_w", "12"}, + {5, "R4_w", "14"}, }, }, { @@ -118,12 +119,12 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1=ctx(off=0,imm=0)"}, - {0, "R10=fp0"}, - {0, "R3_w=7"}, - {1, "R3_w=7"}, - {2, "R3_w=14"}, - {3, "R3_w=56"}, + {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R10", "fp0"}, + {0, "R3_w", "7"}, + {1, "R3_w", "7"}, + {2, "R3_w", "14"}, + {3, "R3_w", "56"}, }, }, @@ -161,19 +162,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w=pkt(off=8,r=8,imm=0)"}, - {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {7, "R3_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, - {8, "R3_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {9, "R3_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {10, "R3_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, - {12, "R3_w=pkt_end(off=0,imm=0)"}, - {17, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {18, "R4_w=scalar(umax=8160,var_off=(0x0; 0x1fe0))"}, - {19, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, - {20, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {21, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {22, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, + {6, "R0_w", "pkt(off=8,r=8,imm=0)"}, + {6, "R3_w", "var_off=(0x0; 0xff)"}, + {7, "R3_w", "var_off=(0x0; 0x1fe)"}, + {8, "R3_w", "var_off=(0x0; 0x3fc)"}, + {9, "R3_w", "var_off=(0x0; 0x7f8)"}, + {10, "R3_w", "var_off=(0x0; 0xff0)"}, + {12, "R3_w", "pkt_end(off=0,imm=0)"}, + {17, "R4_w", "var_off=(0x0; 0xff)"}, + {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, + {19, "R4_w", "var_off=(0x0; 0xff0)"}, + {20, "R4_w", "var_off=(0x0; 0x7f8)"}, + {21, "R4_w", "var_off=(0x0; 0x3fc)"}, + {22, "R4_w", "var_off=(0x0; 0x1fe)"}, }, }, { @@ -194,16 +195,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {7, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {9, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"}, - {11, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {12, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, - {13, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"}, - {14, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"}, - {15, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"}, + {6, "R3_w", "var_off=(0x0; 0xff)"}, + {7, "R4_w", "var_off=(0x0; 0xff)"}, + {8, "R4_w", "var_off=(0x0; 0xff)"}, + {9, "R4_w", "var_off=(0x0; 0xff)"}, + {10, "R4_w", "var_off=(0x0; 0x1fe)"}, + {11, "R4_w", "var_off=(0x0; 0xff)"}, + {12, "R4_w", "var_off=(0x0; 0x3fc)"}, + {13, "R4_w", "var_off=(0x0; 0xff)"}, + {14, "R4_w", "var_off=(0x0; 0x7f8)"}, + {15, "R4_w", "var_off=(0x0; 0xff0)"}, }, }, { @@ -234,14 +235,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w=pkt(off=0,r=0,imm=0)"}, - {4, "R5_w=pkt(off=14,r=0,imm=0)"}, - {5, "R4_w=pkt(off=14,r=0,imm=0)"}, - {9, "R2=pkt(off=0,r=18,imm=0)"}, - {10, "R5=pkt(off=14,r=18,imm=0)"}, - {10, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"}, - {13, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"}, - {14, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"}, + {2, "R5_w", "pkt(off=0,r=0,imm=0)"}, + {4, "R5_w", "pkt(off=14,r=0,imm=0)"}, + {5, "R4_w", "pkt(off=14,r=0,imm=0)"}, + {9, "R2", "pkt(off=0,r=18,imm=0)"}, + {10, "R5", "pkt(off=14,r=18,imm=0)"}, + {10, "R4_w", "var_off=(0x0; 0xff)"}, + {13, "R4_w", "var_off=(0x0; 0xffff)"}, + {14, "R4_w", "var_off=(0x0; 0xffff)"}, }, }, { @@ -298,20 +299,20 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5_w=pkt(id=1,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w", "pkt(id=1,off=14,"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable * offset is considered using reg->aux_off_align which * is 4 and meets the load's requirements. */ - {15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {15, "R4", "var_off=(0x0; 0x3fc)"}, + {15, "R5", "var_off=(0x0; 0x3fc)"}, /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. To avoid BPF * verifier's precision backtracking logging @@ -319,46 +320,46 @@ static struct bpf_align_test tests[] = { * instruction to validate R5 state. We also check * that R4 is what it should be in such case. */ - {18, "R4_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, - {18, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {18, "R4_w", "var_off=(0x0; 0x3fc)"}, + {18, "R5_w", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w", "pkt(id=2,off=14,"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte * aligned, so the total offset is 4-byte aligned and * meets the load's requirements. */ - {24, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, - {24, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"}, + {24, "R4", "var_off=(0x0; 0x3fc)"}, + {24, "R5", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w=pkt(off=14,r=8"}, + {26, "R5_w", "pkt(off=14,r=8,"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. */ - {28, "R4_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, - {28, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {28, "R4_w", "var_off=(0x0; 0x3fc)"}, + {28, "R5_w", "var_off=(0x0; 0x3fc)"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {29, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"}, + {29, "R5_w", "pkt(id=3,off=18,"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {31, "R4_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, - {31, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"}, + {31, "R4_w", "var_off=(0x0; 0x7fc)"}, + {31, "R5_w", "var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {35, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, - {35, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"}, + {35, "R4", "var_off=(0x0; 0x7fc)"}, + {35, "R5", "var_off=(0x0; 0x7fc)"}, }, }, { @@ -396,36 +397,36 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {8, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"}, + {8, "R6_w", "var_off=(0x2; 0x7fc)"}, /* Packet pointer has (4n+2) offset */ - {11, "R5_w=pkt(id=1,off=0,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, - {12, "R4=pkt(id=1,off=4,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, + {11, "R5_w", "var_off=(0x2; 0x7fc)"}, + {12, "R4", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {15, "R5=pkt(id=1,off=0,r=4,umin=14,umax=1034,var_off=(0x2; 0x7fc)"}, + {15, "R5", "var_off=(0x2; 0x7fc)"}, /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {17, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {17, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5_w=pkt(id=2,off=0,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, - {20, "R4=pkt(id=2,off=4,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, + {19, "R5_w", "var_off=(0x2; 0xffc)"}, + {20, "R4", "var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {23, "R5=pkt(id=2,off=0,r=4,umin=14,umax=2054,var_off=(0x2; 0xffc)"}, + {23, "R5", "var_off=(0x2; 0xffc)"}, }, }, { @@ -458,18 +459,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w=pkt_end(off=0,imm=0)"}, + {3, "R5_w", "pkt_end(off=0,imm=0)"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {5, "R5_w=scalar(smax=9223372036854775804,umax=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, + {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {6, "R5_w=scalar(smin=-9223372036854775806,smax=9223372036854775806,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=scalar(umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, - {12, "R4_w=pkt(id=1,off=4,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -477,7 +478,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { @@ -512,24 +513,23 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {8, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {8, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w", "var_off=(0x2; 0x7fc)"}, /* New unknown value in R7 is (4n) */ - {10, "R7_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"}, + {10, "R7_w", "var_off=(0x0; 0x3fc)"}, /* Subtracting it from R6 blows our unsigned bounds */ - {11, "R6=scalar(smin=-1006,smax=1034,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ - {14, "R6=scalar(umin=2,umax=1034,var_off=(0x2; 0x7fc))"}, + {14, "R6", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin=2,umax=1034,var_off=(0x2; 0x7fc)"}, - + {20, "R5", "var_off=(0x2; 0x7fc)"}, }, }, { @@ -566,23 +566,23 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w=pkt(off=0,r=8,imm=0)"}, - {9, "R6_w=scalar(umax=60,var_off=(0x0; 0x3c))"}, + {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {9, "R6_w", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w=scalar(umin=14,umax=74,var_off=(0x2; 0x7c))"}, + {10, "R6_w", "var_off=(0x2; 0x7c)"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=2,off=0,r=8,umin=18446744073709551542,umax=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ - {14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"}, + {14, "R7_w", "var_off=(0x0; 0x7fc)"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, + {16, "R5_w", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0x7fc)"}, + {20, "R5", "var_off=(0x2; 0x7fc)"}, }, }, }; @@ -635,6 +635,7 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(bpf_vlog_copy, "\n"); for (i = 0; i < MAX_MATCHES; i++) { struct bpf_reg_match m = test->matches[i]; + const char *p; int tmp; if (!m.match) @@ -649,8 +650,8 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(NULL, "\n"); } if (!line_ptr) { - printf("Failed to find line %u for match: %s\n", - m.line, m.match); + printf("Failed to find line %u for match: %s=%s\n", + m.line, m.reg, m.match); ret = 1; printf("%s", bpf_vlog); break; @@ -667,15 +668,15 @@ static int do_test_single(struct bpf_align_test *test) * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ - while (!strstr(line_ptr, m.match)) { + while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { cur_line = -1; line_ptr = strtok(NULL, "\n"); sscanf(line_ptr ?: "", "%u: ", &cur_line); if (!line_ptr || cur_line != m.line) break; } - if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) { - printf("Failed to find match %u: %s\n", m.line, m.match); + if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { + printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match); ret = 1; printf("%s", bpf_vlog); break; diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index d2d9e965eba5..053f4d6da77a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -193,8 +193,8 @@ error: void test_bloom_filter_map(void) { - __u32 *rand_vals, nr_rand_vals; - struct bloom_filter_map *skel; + __u32 *rand_vals = NULL, nr_rand_vals = 0; + struct bloom_filter_map *skel = NULL; int err; test_fail_cases(); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 1f02168103dd..41aba139b20b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -10,7 +10,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" -#include "bpf_iter_task_vma.skel.h" +#include "bpf_iter_task_vmas.skel.h" #include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" @@ -1399,19 +1399,19 @@ static void str_strip_first_line(char *str) static void test_task_vma_common(struct bpf_iter_attach_opts *opts) { int err, iter_fd = -1, proc_maps_fd = -1; - struct bpf_iter_task_vma *skel; + struct bpf_iter_task_vmas *skel; int len, read_size = 4; char maps_path[64]; - skel = bpf_iter_task_vma__open(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + skel = bpf_iter_task_vmas__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open")) return; skel->bss->pid = getpid(); skel->bss->one_task = opts ? 1 : 0; - err = bpf_iter_task_vma__load(skel); - if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + err = bpf_iter_task_vmas__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vmas__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1462,25 +1462,25 @@ static void test_task_vma_common(struct bpf_iter_attach_opts *opts) out: close(proc_maps_fd); close(iter_fd); - bpf_iter_task_vma__destroy(skel); + bpf_iter_task_vmas__destroy(skel); } static void test_task_vma_dead_task(void) { - struct bpf_iter_task_vma *skel; + struct bpf_iter_task_vmas *skel; int wstatus, child_pid = -1; time_t start_tm, cur_tm; int err, iter_fd = -1; int wait_sec = 3; - skel = bpf_iter_task_vma__open(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) + skel = bpf_iter_task_vmas__open(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open")) return; skel->bss->pid = getpid(); - err = bpf_iter_task_vma__load(skel); - if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) + err = bpf_iter_task_vmas__load(skel); + if (!ASSERT_OK(err, "bpf_iter_task_vmas__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1533,7 +1533,7 @@ static void test_task_vma_dead_task(void) out: waitpid(child_pid, &wstatus, 0); close(iter_fd); - bpf_iter_task_vma__destroy(skel); + bpf_iter_task_vmas__destroy(skel); } void test_bpf_sockmap_map_iter_fd(void) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 4e0cdb593318..92d51f377fe5 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -7296,7 +7296,7 @@ static struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), - BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ + BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */ /* tag -> t */ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */ @@ -7317,7 +7317,7 @@ static struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), - BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ + BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */ diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c index 289218c2216c..40fe571f2fe7 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_ping.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c @@ -28,9 +28,9 @@ static void subtest(int cgroup_fd, struct connect_ping *skel, .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; - struct sockaddr *sa; + struct sockaddr *sa = NULL; socklen_t sa_len; - int protocol; + int protocol = -1; int sock_fd; switch (family) { diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c new file mode 100644 index 000000000000..516f4a13013c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> + +#include "exceptions.skel.h" +#include "exceptions_ext.skel.h" +#include "exceptions_fail.skel.h" +#include "exceptions_assert.skel.h" + +static char log_buf[1024 * 1024]; + +static void test_exceptions_failure(void) +{ + RUN_TESTS(exceptions_fail); +} + +static void test_exceptions_success(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, ropts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct exceptions_ext *eskel = NULL; + struct exceptions *skel; + int ret; + + skel = exceptions__open(); + if (!ASSERT_OK_PTR(skel, "exceptions__open")) + return; + + ret = exceptions__load(skel); + if (!ASSERT_OK(ret, "exceptions__load")) + goto done; + + if (!ASSERT_OK(bpf_map_update_elem(bpf_map__fd(skel->maps.jmp_table), &(int){0}, + &(int){bpf_program__fd(skel->progs.exception_tail_call_target)}, BPF_ANY), + "bpf_map_update_elem jmp_table")) + goto done; + +#define RUN_SUCCESS(_prog, return_val) \ + if (!test__start_subtest(#_prog)) goto _prog##_##return_val; \ + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs._prog), &ropts); \ + ASSERT_OK(ret, #_prog " prog run ret"); \ + ASSERT_EQ(ropts.retval, return_val, #_prog " prog run retval"); \ + _prog##_##return_val: + + RUN_SUCCESS(exception_throw_always_1, 64); + RUN_SUCCESS(exception_throw_always_2, 32); + RUN_SUCCESS(exception_throw_unwind_1, 16); + RUN_SUCCESS(exception_throw_unwind_2, 32); + RUN_SUCCESS(exception_throw_default, 0); + RUN_SUCCESS(exception_throw_default_value, 5); + RUN_SUCCESS(exception_tail_call, 24); + RUN_SUCCESS(exception_ext, 0); + RUN_SUCCESS(exception_ext_mod_cb_runtime, 35); + RUN_SUCCESS(exception_throw_subprog, 1); + RUN_SUCCESS(exception_assert_nz_gfunc, 1); + RUN_SUCCESS(exception_assert_zero_gfunc, 1); + RUN_SUCCESS(exception_assert_neg_gfunc, 1); + RUN_SUCCESS(exception_assert_pos_gfunc, 1); + RUN_SUCCESS(exception_assert_negeq_gfunc, 1); + RUN_SUCCESS(exception_assert_poseq_gfunc, 1); + RUN_SUCCESS(exception_assert_nz_gfunc_with, 1); + RUN_SUCCESS(exception_assert_zero_gfunc_with, 1); + RUN_SUCCESS(exception_assert_neg_gfunc_with, 1); + RUN_SUCCESS(exception_assert_pos_gfunc_with, 1); + RUN_SUCCESS(exception_assert_negeq_gfunc_with, 1); + RUN_SUCCESS(exception_assert_poseq_gfunc_with, 1); + RUN_SUCCESS(exception_bad_assert_nz_gfunc, 0); + RUN_SUCCESS(exception_bad_assert_zero_gfunc, 0); + RUN_SUCCESS(exception_bad_assert_neg_gfunc, 0); + RUN_SUCCESS(exception_bad_assert_pos_gfunc, 0); + RUN_SUCCESS(exception_bad_assert_negeq_gfunc, 0); + RUN_SUCCESS(exception_bad_assert_poseq_gfunc, 0); + RUN_SUCCESS(exception_bad_assert_nz_gfunc_with, 100); + RUN_SUCCESS(exception_bad_assert_zero_gfunc_with, 105); + RUN_SUCCESS(exception_bad_assert_neg_gfunc_with, 200); + RUN_SUCCESS(exception_bad_assert_pos_gfunc_with, 0); + RUN_SUCCESS(exception_bad_assert_negeq_gfunc_with, 101); + RUN_SUCCESS(exception_bad_assert_poseq_gfunc_with, 99); + RUN_SUCCESS(exception_assert_range, 1); + RUN_SUCCESS(exception_assert_range_with, 1); + RUN_SUCCESS(exception_bad_assert_range, 0); + RUN_SUCCESS(exception_bad_assert_range_with, 10); + +#define RUN_EXT(load_ret, attach_err, expr, msg, after_link) \ + { \ + LIBBPF_OPTS(bpf_object_open_opts, o, .kernel_log_buf = log_buf, \ + .kernel_log_size = sizeof(log_buf), \ + .kernel_log_level = 2); \ + exceptions_ext__destroy(eskel); \ + eskel = exceptions_ext__open_opts(&o); \ + struct bpf_program *prog = NULL; \ + struct bpf_link *link = NULL; \ + if (!ASSERT_OK_PTR(eskel, "exceptions_ext__open")) \ + goto done; \ + (expr); \ + ASSERT_OK_PTR(bpf_program__name(prog), bpf_program__name(prog)); \ + if (!ASSERT_EQ(exceptions_ext__load(eskel), load_ret, \ + "exceptions_ext__load")) { \ + printf("%s\n", log_buf); \ + goto done; \ + } \ + if (load_ret != 0) { \ + if (!ASSERT_OK_PTR(strstr(log_buf, msg), "strstr")) { \ + printf("%s\n", log_buf); \ + goto done; \ + } \ + } \ + if (!load_ret && attach_err) { \ + if (!ASSERT_ERR_PTR(link = bpf_program__attach(prog), "attach err")) \ + goto done; \ + } else if (!load_ret) { \ + if (!ASSERT_OK_PTR(link = bpf_program__attach(prog), "attach ok")) \ + goto done; \ + (void)(after_link); \ + bpf_link__destroy(link); \ + } \ + } + + if (test__start_subtest("non-throwing fentry -> exception_cb")) + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.pfentry; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), + "exception_cb_mod"), "set_attach_target")) + goto done; + }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + + if (test__start_subtest("throwing fentry -> exception_cb")) + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.throwing_fentry; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), + "exception_cb_mod"), "set_attach_target")) + goto done; + }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + + if (test__start_subtest("non-throwing fexit -> exception_cb")) + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.pfexit; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), + "exception_cb_mod"), "set_attach_target")) + goto done; + }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + + if (test__start_subtest("throwing fexit -> exception_cb")) + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.throwing_fexit; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), + "exception_cb_mod"), "set_attach_target")) + goto done; + }), "FENTRY/FEXIT programs cannot attach to exception callback", 0); + + if (test__start_subtest("throwing extension (with custom cb) -> exception_cb")) + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.throwing_exception_cb_extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), + "exception_cb_mod"), "set_attach_target")) + goto done; + }), "Extension programs cannot attach to exception callback", 0); + + if (test__start_subtest("throwing extension -> global func in exception_cb")) + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_exception_cb_extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime), + "exception_cb_mod_global"), "set_attach_target")) + goto done; + }), "", ({ RUN_SUCCESS(exception_ext_mod_cb_runtime, 131); })); + + if (test__start_subtest("throwing extension (with custom cb) -> global func in exception_cb")) + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_ext), + "exception_ext_global"), "set_attach_target")) + goto done; + }), "", ({ RUN_SUCCESS(exception_ext, 128); })); + + if (test__start_subtest("non-throwing fentry -> non-throwing subprog")) + /* non-throwing fentry -> non-throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.pfentry; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("throwing fentry -> non-throwing subprog")) + /* throwing fentry -> non-throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_fentry; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("non-throwing fentry -> throwing subprog")) + /* non-throwing fentry -> throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.pfentry; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "throwing_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("throwing fentry -> throwing subprog")) + /* throwing fentry -> throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_fentry; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "throwing_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("non-throwing fexit -> non-throwing subprog")) + /* non-throwing fexit -> non-throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.pfexit; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("throwing fexit -> non-throwing subprog")) + /* throwing fexit -> non-throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_fexit; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("non-throwing fexit -> throwing subprog")) + /* non-throwing fexit -> throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.pfexit; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "throwing_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("throwing fexit -> throwing subprog")) + /* throwing fexit -> throwing subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_fexit; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "throwing_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + /* fmod_ret not allowed for subprog - Check so we remember to handle its + * throwing specification compatibility with target when supported. + */ + if (test__start_subtest("non-throwing fmod_ret -> non-throwing subprog")) + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.pfmod_ret; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "subprog"), "set_attach_target")) + goto done; + }), "can't modify return codes of BPF program", 0); + + /* fmod_ret not allowed for subprog - Check so we remember to handle its + * throwing specification compatibility with target when supported. + */ + if (test__start_subtest("non-throwing fmod_ret -> non-throwing global subprog")) + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.pfmod_ret; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "global_subprog"), "set_attach_target")) + goto done; + }), "can't modify return codes of BPF program", 0); + + if (test__start_subtest("non-throwing extension -> non-throwing subprog")) + /* non-throwing extension -> non-throwing subprog : BAD (!global) */ + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "subprog"), "set_attach_target")) + goto done; + }), "subprog() is not a global function", 0); + + if (test__start_subtest("non-throwing extension -> throwing subprog")) + /* non-throwing extension -> throwing subprog : BAD (!global) */ + RUN_EXT(-EINVAL, true, ({ + prog = eskel->progs.extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "throwing_subprog"), "set_attach_target")) + goto done; + }), "throwing_subprog() is not a global function", 0); + + if (test__start_subtest("non-throwing extension -> non-throwing subprog")) + /* non-throwing extension -> non-throwing global subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "global_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("non-throwing extension -> throwing global subprog")) + /* non-throwing extension -> throwing global subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "throwing_global_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("throwing extension -> throwing global subprog")) + /* throwing extension -> throwing global subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "throwing_global_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("throwing extension -> non-throwing global subprog")) + /* throwing extension -> non-throwing global subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "global_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("non-throwing extension -> main subprog")) + /* non-throwing extension -> main subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "exception_throw_subprog"), "set_attach_target")) + goto done; + }), "", 0); + + if (test__start_subtest("throwing extension -> main subprog")) + /* throwing extension -> main subprog : OK */ + RUN_EXT(0, false, ({ + prog = eskel->progs.throwing_extension; + bpf_program__set_autoload(prog, true); + if (!ASSERT_OK(bpf_program__set_attach_target(prog, + bpf_program__fd(skel->progs.exception_throw_subprog), + "exception_throw_subprog"), "set_attach_target")) + goto done; + }), "", 0); + +done: + exceptions_ext__destroy(eskel); + exceptions__destroy(skel); +} + +static void test_exceptions_assertions(void) +{ + RUN_TESTS(exceptions_assert); +} + +void test_exceptions(void) +{ + test_exceptions_success(); + test_exceptions_failure(); + test_exceptions_assertions(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 2fd05649bad1..4ad4cd69152e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -11,9 +11,13 @@ #define NS_TEST "fib_lookup_ns" #define IPV6_IFACE_ADDR "face::face" +#define IPV6_IFACE_ADDR_SEC "cafe::cafe" +#define IPV6_ADDR_DST "face::3" #define IPV6_NUD_FAILED_ADDR "face::1" #define IPV6_NUD_STALE_ADDR "face::2" #define IPV4_IFACE_ADDR "10.0.0.254" +#define IPV4_IFACE_ADDR_SEC "10.1.0.254" +#define IPV4_ADDR_DST "10.2.0.254" #define IPV4_NUD_FAILED_ADDR "10.0.0.1" #define IPV4_NUD_STALE_ADDR "10.0.0.2" #define IPV4_TBID_ADDR "172.0.0.254" @@ -31,6 +35,7 @@ struct fib_lookup_test { const char *desc; const char *daddr; int expected_ret; + const char *expected_src; int lookup_flags; __u32 tbid; __u8 dmac[6]; @@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = { .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, .dmac = DMAC_INIT2, }, + { .desc = "IPv4 set src addr from netdev", + .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set src addr from netdev", + .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 set prefsrc addr from route", + .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV4_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 set prefsrc addr route", + .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_src = IPV6_IFACE_ADDR_SEC, + .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, }; static int ifindex; @@ -97,6 +118,13 @@ static int setup_netns(void) SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + /* Setup for prefsrc IP addr selection */ + SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC); + + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC); + SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC); + /* Setup for tbid lookup tests */ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); @@ -133,9 +161,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { params->family = AF_INET6; - ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) - return -1; + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) + return -1; + } + return 0; } @@ -143,9 +174,12 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) return -1; params->family = AF_INET; - ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); - if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) - return -1; + + if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) { + ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, ¶ms->ipv4_src); + if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)")) + return -1; + } return 0; } @@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac) mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } +static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src) +{ + int ret; + __u32 src6[4]; + __be32 src4; + + switch (fib_params->family) { + case AF_INET6: + ret = inet_pton(AF_INET6, expected_src, src6); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src)); + if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) { + char str_src6[64]; + + inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6, + sizeof(str_src6)); + printf("ipv6 expected %s actual %s ", expected_src, + str_src6); + } + + break; + case AF_INET: + ret = inet_pton(AF_INET, expected_src, &src4); + ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); + + ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src"); + + break; + default: + PRINT_FAIL("invalid addr family: %d", fib_params->family); + } +} + void test_fib_lookup(void) { struct bpf_fib_lookup *fib_params; @@ -207,6 +275,9 @@ void test_fib_lookup(void) ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret, "fib_lookup_ret"); + if (tests[i].expected_src) + assert_src_ip(fib_params, tests[i].expected_src); + ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); if (!ASSERT_EQ(ret, 0, "dmac not match")) { char expected[18], actual[18]; diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 9d768e083714..97142a4db374 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -308,7 +308,7 @@ void test_fill_link_info(void) return; /* load kallsyms to compare the addr */ - if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + if (!ASSERT_OK(load_kallsyms(), "load_kallsyms")) goto cleanup; kprobe_addr = ksym_get_addr(KPROBE_FUNC); diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 10804ae5ae97..b696873c5455 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -8,6 +8,7 @@ #include "iters_looping.skel.h" #include "iters_num.skel.h" #include "iters_testmod_seq.skel.h" +#include "iters_task_vma.skel.h" static void subtest_num_iters(void) { @@ -90,6 +91,61 @@ cleanup: iters_testmod_seq__destroy(skel); } +static void subtest_task_vma_iters(void) +{ + unsigned long start, end, bpf_iter_start, bpf_iter_end; + struct iters_task_vma *skel; + char rest_of_line[1000]; + unsigned int seen; + FILE *f = NULL; + int err; + + skel = iters_task_vma__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + skel->bss->target_pid = getpid(); + + err = iters_task_vma__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + getpgid(skel->bss->target_pid); + iters_task_vma__detach(skel); + + if (!ASSERT_GT(skel->bss->vmas_seen, 0, "vmas_seen_gt_zero")) + goto cleanup; + + f = fopen("/proc/self/maps", "r"); + if (!ASSERT_OK_PTR(f, "proc_maps_fopen")) + goto cleanup; + + seen = 0; + while (fscanf(f, "%lx-%lx %[^\n]\n", &start, &end, rest_of_line) == 3) { + /* [vsyscall] vma isn't _really_ part of task->mm vmas. + * /proc/PID/maps returns it when out of vmas - see get_gate_vma + * calls in fs/proc/task_mmu.c + */ + if (strstr(rest_of_line, "[vsyscall]")) + continue; + + bpf_iter_start = skel->bss->vm_ranges[seen].vm_start; + bpf_iter_end = skel->bss->vm_ranges[seen].vm_end; + + ASSERT_EQ(bpf_iter_start, start, "vma->vm_start match"); + ASSERT_EQ(bpf_iter_end, end, "vma->vm_end match"); + seen++; + } + + if (!ASSERT_EQ(skel->bss->vmas_seen, seen, "vmas_seen_eq")) + goto cleanup; + +cleanup: + if (f) + fclose(f); + iters_task_vma__destroy(skel); +} + void test_iters(void) { RUN_TESTS(iters_state_safety); @@ -103,4 +159,6 @@ void test_iters(void) subtest_num_iters(); if (test__start_subtest("testmod_seq")) subtest_testmod_seq_iters(); + if (test__start_subtest("task_vma")) + subtest_task_vma_iters(); } diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c index 1fbe7e4ac00a..9d03528f05db 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c @@ -4,6 +4,8 @@ #include "trace_helpers.h" #include "bpf/libbpf_internal.h" +static struct ksyms *ksyms; + static void kprobe_multi_testmod_check(struct kprobe_multi *skel) { ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result"); @@ -50,12 +52,12 @@ static void test_testmod_attach_api_addrs(void) LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); unsigned long long addrs[3]; - addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1"); - ASSERT_NEQ(addrs[0], 0, "ksym_get_addr"); - addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2"); - ASSERT_NEQ(addrs[1], 0, "ksym_get_addr"); - addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3"); - ASSERT_NEQ(addrs[2], 0, "ksym_get_addr"); + addrs[0] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test1"); + ASSERT_NEQ(addrs[0], 0, "ksym_get_addr_local"); + addrs[1] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test2"); + ASSERT_NEQ(addrs[1], 0, "ksym_get_addr_local"); + addrs[2] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test3"); + ASSERT_NEQ(addrs[2], 0, "ksym_get_addr_local"); opts.addrs = (const unsigned long *) addrs; opts.cnt = ARRAY_SIZE(addrs); @@ -79,11 +81,15 @@ static void test_testmod_attach_api_syms(void) void serial_test_kprobe_multi_testmod_test(void) { - if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) return; if (test__start_subtest("testmod_attach_api_syms")) test_testmod_attach_api_syms(); + if (test__start_subtest("testmod_attach_api_addrs")) test_testmod_attach_api_addrs(); + + free_kallsyms_local(ksyms); } diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index efb8bd43653c..c440ea3311ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -142,10 +142,14 @@ static void test_libbpf_bpf_map_type_str(void) /* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value * (map_type). For this enum value, libbpf_bpf_map_type_str() picks - * BPF_MAP_TYPE_CGROUP_STORAGE. + * BPF_MAP_TYPE_CGROUP_STORAGE. The same for + * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and + * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE. */ if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0) continue; + if (strcmp(map_type_name, "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED") == 0) + continue; ASSERT_STREQ(buf, map_type_name, "exp_str_value"); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 18cf7b17463d..69dc31383b78 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -65,8 +65,8 @@ static struct { { "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, { "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" }, { "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" }, - { "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" }, - { "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" }, + { "obj_new_no_composite", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" }, + { "obj_new_no_struct", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" }, { "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" }, { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, { "obj_new_acq", "Unreleased reference id=" }, @@ -268,7 +268,7 @@ end: static void list_and_rb_node_same_struct(bool refcount_field) { - int bpf_rb_node_btf_id, bpf_refcount_btf_id, foo_btf_id; + int bpf_rb_node_btf_id, bpf_refcount_btf_id = 0, foo_btf_id; struct btf *btf; int id, err; diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h index 61333f2a03f9..e9190574e79f 100644 --- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -49,7 +49,8 @@ static int open_tuntap(const char *dev_name, bool need_mac) return -1; ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); - memcpy(ifr.ifr_name, dev_name, IFNAMSIZ); + strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; err = ioctl(fd, TUNSETIFF, &ifr); if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c new file mode 100644 index 000000000000..70d90c43537c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/missed.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "missed_kprobe.skel.h" +#include "missed_kprobe_recursion.skel.h" +#include "missed_tp_recursion.skel.h" + +/* + * Putting kprobe on bpf_fentry_test1 that calls bpf_kfunc_common_test + * kfunc, which has also kprobe on. The latter won't get triggered due + * to kprobe recursion check and kprobe missed counter is incremented. + */ +static void test_missed_perf_kprobe(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_link_info info = {}; + struct missed_kprobe *skel; + __u32 len = sizeof(info); + int err, prog_fd; + + skel = missed_kprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_kprobe__open_and_load")) + goto cleanup; + + err = missed_kprobe__attach(skel); + if (!ASSERT_OK(err, "missed_kprobe__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + err = bpf_link_get_info_by_fd(bpf_link__fd(skel->links.test2), &info, &len); + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd")) + goto cleanup; + + ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "info.type"); + ASSERT_EQ(info.perf_event.type, BPF_PERF_EVENT_KPROBE, "info.perf_event.type"); + ASSERT_EQ(info.perf_event.kprobe.missed, 1, "info.perf_event.kprobe.missed"); + +cleanup: + missed_kprobe__destroy(skel); +} + +static __u64 get_missed_count(int fd) +{ + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + int err; + + err = bpf_prog_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + return (__u64) -1; + return info.recursion_misses; +} + +/* + * Putting kprobe.multi on bpf_fentry_test1 that calls bpf_kfunc_common_test + * kfunc which has 3 perf event kprobes and 1 kprobe.multi attached. + * + * Because fprobe (kprobe.multi attach layear) does not have strict recursion + * check the kprobe's bpf_prog_active check is hit for test2-5. + */ +static void test_missed_kprobe_recursion(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct missed_kprobe_recursion *skel; + int err, prog_fd; + + skel = missed_kprobe_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_kprobe_recursion__open_and_load")) + goto cleanup; + + err = missed_kprobe_recursion__attach(skel); + if (!ASSERT_OK(err, "missed_kprobe_recursion__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses"); + ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses"); + +cleanup: + missed_kprobe_recursion__destroy(skel); +} + +/* + * Putting kprobe on bpf_fentry_test1 that calls bpf_printk and invokes + * bpf_trace_printk tracepoint. The bpf_trace_printk tracepoint has test[234] + * programs attached to it. + * + * Because kprobe execution goes through bpf_prog_active check, programs + * attached to the tracepoint will fail the recursion check and increment + * the recursion_misses stats. + */ +static void test_missed_tp_recursion(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct missed_tp_recursion *skel; + int err, prog_fd; + + skel = missed_tp_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "missed_tp_recursion__open_and_load")) + goto cleanup; + + err = missed_tp_recursion__attach(skel); + if (!ASSERT_OK(err, "missed_tp_recursion__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses"); + ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses"); + +cleanup: + missed_tp_recursion__destroy(skel); +} + +void test_missed(void) +{ + if (test__start_subtest("perf_kprobe")) + test_missed_perf_kprobe(); + if (test__start_subtest("kprobe_recursion")) + test_missed_kprobe_recursion(); + if (test__start_subtest("tp_recursion")) + test_missed_tp_recursion(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c index c7636e18b1eb..aa9f67eb1c95 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c +++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c @@ -61,6 +61,11 @@ void test_module_fentry_shadow(void) int link_fd[2] = {}; __s32 btf_id[2] = {}; + if (!env.has_testmod) { + test__skip(); + return; + } + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, .expected_attach_type = BPF_TRACE_FENTRY, ); diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c new file mode 100644 index 000000000000..343da65864d6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "percpu_alloc_array.skel.h" +#include "percpu_alloc_cgrp_local_storage.skel.h" +#include "percpu_alloc_fail.skel.h" + +static void test_array(void) +{ + struct percpu_alloc_array *skel; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = percpu_alloc_array__open(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open")) + return; + + bpf_program__set_autoload(skel->progs.test_array_map_1, true); + bpf_program__set_autoload(skel->progs.test_array_map_2, true); + bpf_program__set_autoload(skel->progs.test_array_map_3, true); + bpf_program__set_autoload(skel->progs.test_array_map_4, true); + + skel->bss->my_pid = getpid(); + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + + err = percpu_alloc_array__load(skel); + if (!ASSERT_OK(err, "percpu_alloc_array__load")) + goto out; + + err = percpu_alloc_array__attach(skel); + if (!ASSERT_OK(err, "percpu_alloc_array__attach")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.test_array_map_1); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run array_map 1-4"); + ASSERT_EQ(topts.retval, 0, "test_run array_map 1-4"); + ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d"); + ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c"); +out: + percpu_alloc_array__destroy(skel); +} + +static void test_array_sleepable(void) +{ + struct percpu_alloc_array *skel; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + skel = percpu_alloc_array__open(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc__open")) + return; + + bpf_program__set_autoload(skel->progs.test_array_map_10, true); + + skel->bss->my_pid = getpid(); + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + + err = percpu_alloc_array__load(skel); + if (!ASSERT_OK(err, "percpu_alloc_array__load")) + goto out; + + err = percpu_alloc_array__attach(skel); + if (!ASSERT_OK(err, "percpu_alloc_array__attach")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.test_array_map_10); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run array_map_10"); + ASSERT_EQ(topts.retval, 0, "test_run array_map_10"); + ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d"); + ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c"); +out: + percpu_alloc_array__destroy(skel); +} + +static void test_cgrp_local_storage(void) +{ + struct percpu_alloc_cgrp_local_storage *skel; + int err, cgroup_fd, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + cgroup_fd = test__join_cgroup("/percpu_alloc"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /percpu_alloc")) + return; + + skel = percpu_alloc_cgrp_local_storage__open(); + if (!ASSERT_OK_PTR(skel, "percpu_alloc_cgrp_local_storage__open")) + goto close_fd; + + skel->bss->my_pid = getpid(); + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + + err = percpu_alloc_cgrp_local_storage__load(skel); + if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__load")) + goto destroy_skel; + + err = percpu_alloc_cgrp_local_storage__attach(skel); + if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__attach")) + goto destroy_skel; + + prog_fd = bpf_program__fd(skel->progs.test_cgrp_local_storage_1); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run cgrp_local_storage 1-3"); + ASSERT_EQ(topts.retval, 0, "test_run cgrp_local_storage 1-3"); + ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d"); + ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c"); + +destroy_skel: + percpu_alloc_cgrp_local_storage__destroy(skel); +close_fd: + close(cgroup_fd); +} + +static void test_failure(void) { + RUN_TESTS(percpu_alloc_fail); +} + +void test_percpu_alloc(void) +{ + if (test__start_subtest("array")) + test_array(); + if (test__start_subtest("array_sleepable")) + test_array_sleepable(); + if (test__start_subtest("cgrp_local_storage")) + test_cgrp_local_storage(); + if (test__start_subtest("failure_tests")) + test_failure(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c new file mode 100644 index 000000000000..3a2ec3923fca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <pthread.h> +#include <stdbool.h> +#include <test_progs.h> + +#include "preempted_bpf_ma_op.skel.h" + +#define ALLOC_THREAD_NR 4 +#define ALLOC_LOOP_NR 512 + +struct alloc_ctx { + /* output */ + int run_err; + /* input */ + int fd; + bool *nomem_err; +}; + +static void *run_alloc_prog(void *data) +{ + struct alloc_ctx *ctx = data; + cpu_set_t cpu_set; + int i; + + CPU_ZERO(&cpu_set); + CPU_SET(0, &cpu_set); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + + for (i = 0; i < ALLOC_LOOP_NR && !*ctx->nomem_err; i++) { + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err; + + err = bpf_prog_test_run_opts(ctx->fd, &topts); + ctx->run_err |= err | topts.retval; + } + + return NULL; +} + +void test_preempted_bpf_ma_op(void) +{ + struct alloc_ctx ctx[ALLOC_THREAD_NR]; + struct preempted_bpf_ma_op *skel; + pthread_t tid[ALLOC_THREAD_NR]; + int i, err; + + skel = preempted_bpf_ma_op__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + err = preempted_bpf_ma_op__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + for (i = 0; i < ARRAY_SIZE(ctx); i++) { + struct bpf_program *prog; + char name[8]; + + snprintf(name, sizeof(name), "test%d", i); + prog = bpf_object__find_program_by_name(skel->obj, name); + if (!ASSERT_OK_PTR(prog, "no test prog")) + goto out; + + ctx[i].run_err = 0; + ctx[i].fd = bpf_program__fd(prog); + ctx[i].nomem_err = &skel->bss->nomem_err; + } + + memset(tid, 0, sizeof(tid)); + for (i = 0; i < ARRAY_SIZE(tid); i++) { + err = pthread_create(&tid[i], NULL, run_alloc_prog, &ctx[i]); + if (!ASSERT_OK(err, "pthread_create")) + break; + } + + for (i = 0; i < ARRAY_SIZE(tid); i++) { + if (!tid[i]) + break; + pthread_join(tid[i], NULL); + ASSERT_EQ(ctx[i].run_err, 0, "run prog err"); + } + + ASSERT_FALSE(skel->bss->nomem_err, "ENOMEM"); +out: + preempted_bpf_ma_op__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index 722c5f2a7776..a043af9cd6d9 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type) int i, err, prog_fd, map_in_fd, map_out_fd; char file[32], buf[128]; struct bpf_object *obj; - struct iphdr iph; + struct iphdr iph = {}; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index ac104dc652e3..48c5695b7abf 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -91,6 +91,9 @@ static void ringbuf_subtest(void) int err, cnt, rb_fd; int page_size = getpagesize(); void *mmap_ptr, *tmp_ptr; + struct ring *ring; + int map_fd; + unsigned long avail_data, ring_size, cons_pos, prod_pos; skel = test_ringbuf_lskel__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) @@ -162,6 +165,13 @@ static void ringbuf_subtest(void) trigger_samples(); + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0")) + goto cleanup; + + map_fd = ring__map_fd(ring); + ASSERT_EQ(map_fd, skel->maps.ringbuf.map_fd, "ring_map_fd"); + /* 2 submitted + 1 discarded records */ CHECK(skel->bss->avail_data != 3 * rec_sz, "err_avail_size", "exp %ld, got %ld\n", @@ -176,6 +186,18 @@ static void ringbuf_subtest(void) "err_prod_pos", "exp %ld, got %ld\n", 3L * rec_sz, skel->bss->prod_pos); + /* verify getting this data directly via the ring object yields the same + * results + */ + avail_data = ring__avail_data_size(ring); + ASSERT_EQ(avail_data, 3 * rec_sz, "ring_avail_size"); + ring_size = ring__size(ring); + ASSERT_EQ(ring_size, page_size, "ring_ring_size"); + cons_pos = ring__consumer_pos(ring); + ASSERT_EQ(cons_pos, 0, "ring_cons_pos"); + prod_pos = ring__producer_pos(ring); + ASSERT_EQ(prod_pos, 3 * rec_sz, "ring_prod_pos"); + /* poll for samples */ err = ring_buffer__poll(ringbuf, -1); @@ -282,6 +304,10 @@ static void ringbuf_subtest(void) err = ring_buffer__consume(ringbuf); CHECK(err < 0, "rb_consume", "failed: %d\b", err); + /* also consume using ring__consume to make sure it works the same */ + err = ring__consume(ring); + ASSERT_GE(err, 0, "ring_consume"); + /* 3 rounds, 2 samples each */ cnt = atomic_xchg(&sample_cnt, 0); CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index 1455911d9fcb..58522195081b 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -42,6 +42,8 @@ void test_ringbuf_multi(void) { struct test_ringbuf_multi *skel; struct ring_buffer *ringbuf = NULL; + struct ring *ring_old; + struct ring *ring; int err; int page_size = getpagesize(); int proto_fd = -1; @@ -84,11 +86,24 @@ void test_ringbuf_multi(void) if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; + /* verify ring_buffer__ring returns expected results */ + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0")) + goto cleanup; + ring_old = ring; + ring = ring_buffer__ring(ringbuf, 1); + ASSERT_ERR_PTR(ring, "ring_buffer__ring_idx_1"); + err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2), process_sample, (void *)(long)2); if (CHECK(err, "ringbuf_add", "failed to add another ring\n")) goto cleanup; + /* verify adding a new ring didn't invalidate our older pointer */ + ring = ring_buffer__ring(ringbuf, 0); + if (!ASSERT_EQ(ring, ring_old, "ring_buffer__ring_again")) + goto cleanup; + err = test_ringbuf_multi__attach(skel); if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c index 8b571890c57e..c3d78846f31a 100644 --- a/tools/testing/selftests/bpf/prog_tests/section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -124,6 +124,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_INET6_CONNECT}, }, { + "cgroup/connect_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT}, + {0, BPF_CGROUP_UNIX_CONNECT}, + }, + { "cgroup/sendmsg4", {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG}, {0, BPF_CGROUP_UDP4_SENDMSG}, @@ -134,6 +139,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_UDP6_SENDMSG}, }, { + "cgroup/sendmsg_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG}, + {0, BPF_CGROUP_UNIX_SENDMSG}, + }, + { "cgroup/recvmsg4", {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG}, {0, BPF_CGROUP_UDP4_RECVMSG}, @@ -144,6 +154,11 @@ static struct sec_name_test tests[] = { {0, BPF_CGROUP_UDP6_RECVMSG}, }, { + "cgroup/recvmsg_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG}, + {0, BPF_CGROUP_UNIX_RECVMSG}, + }, + { "cgroup/sysctl", {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL}, {0, BPF_CGROUP_SYSCTL}, @@ -158,6 +173,36 @@ static struct sec_name_test tests[] = { {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT}, {0, BPF_CGROUP_SETSOCKOPT}, }, + { + "cgroup/getpeername4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME}, + {0, BPF_CGROUP_INET4_GETPEERNAME}, + }, + { + "cgroup/getpeername6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME}, + {0, BPF_CGROUP_INET6_GETPEERNAME}, + }, + { + "cgroup/getpeername_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME}, + {0, BPF_CGROUP_UNIX_GETPEERNAME}, + }, + { + "cgroup/getsockname4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME}, + {0, BPF_CGROUP_INET4_GETSOCKNAME}, + }, + { + "cgroup/getsockname6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME}, + {0, BPF_CGROUP_INET6_GETSOCKNAME}, + }, + { + "cgroup/getsockname_unix", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME}, + {0, BPF_CGROUP_UNIX_GETSOCKNAME}, + }, }; static void test_prog_type_by_name(const struct sec_name_test *test) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c new file mode 100644 index 000000000000..5fd617718991 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/un.h> + +#include "test_progs.h" + +#include "connect_unix_prog.skel.h" +#include "sendmsg_unix_prog.skel.h" +#include "recvmsg_unix_prog.skel.h" +#include "getsockname_unix_prog.skel.h" +#include "getpeername_unix_prog.skel.h" +#include "network_helpers.h" + +#define SERVUN_ADDRESS "bpf_cgroup_unix_test" +#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite" +#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src" + +enum sock_addr_test_type { + SOCK_ADDR_TEST_BIND, + SOCK_ADDR_TEST_CONNECT, + SOCK_ADDR_TEST_SENDMSG, + SOCK_ADDR_TEST_RECVMSG, + SOCK_ADDR_TEST_GETSOCKNAME, + SOCK_ADDR_TEST_GETPEERNAME, +}; + +typedef void *(*load_fn)(int cgroup_fd); +typedef void (*destroy_fn)(void *skel); + +struct sock_addr_test { + enum sock_addr_test_type type; + const char *name; + /* BPF prog properties */ + load_fn loadfn; + destroy_fn destroyfn; + /* Socket properties */ + int socket_family; + int socket_type; + /* IP:port pairs for BPF prog to override */ + const char *requested_addr; + unsigned short requested_port; + const char *expected_addr; + unsigned short expected_port; + const char *expected_src_addr; +}; + +static void *connect_unix_prog_load(int cgroup_fd) +{ + struct connect_unix_prog *skel; + + skel = connect_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.connect_unix_prog = bpf_program__attach_cgroup( + skel->progs.connect_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + connect_unix_prog__destroy(skel); + return NULL; +} + +static void connect_unix_prog_destroy(void *skel) +{ + connect_unix_prog__destroy(skel); +} + +static void *sendmsg_unix_prog_load(int cgroup_fd) +{ + struct sendmsg_unix_prog *skel; + + skel = sendmsg_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup( + skel->progs.sendmsg_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + sendmsg_unix_prog__destroy(skel); + return NULL; +} + +static void sendmsg_unix_prog_destroy(void *skel) +{ + sendmsg_unix_prog__destroy(skel); +} + +static void *recvmsg_unix_prog_load(int cgroup_fd) +{ + struct recvmsg_unix_prog *skel; + + skel = recvmsg_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup( + skel->progs.recvmsg_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + recvmsg_unix_prog__destroy(skel); + return NULL; +} + +static void recvmsg_unix_prog_destroy(void *skel) +{ + recvmsg_unix_prog__destroy(skel); +} + +static void *getsockname_unix_prog_load(int cgroup_fd) +{ + struct getsockname_unix_prog *skel; + + skel = getsockname_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.getsockname_unix_prog = bpf_program__attach_cgroup( + skel->progs.getsockname_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + getsockname_unix_prog__destroy(skel); + return NULL; +} + +static void getsockname_unix_prog_destroy(void *skel) +{ + getsockname_unix_prog__destroy(skel); +} + +static void *getpeername_unix_prog_load(int cgroup_fd) +{ + struct getpeername_unix_prog *skel; + + skel = getpeername_unix_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->links.getpeername_unix_prog = bpf_program__attach_cgroup( + skel->progs.getpeername_unix_prog, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach")) + goto cleanup; + + return skel; +cleanup: + getpeername_unix_prog__destroy(skel); + return NULL; +} + +static void getpeername_unix_prog_destroy(void *skel) +{ + getpeername_unix_prog__destroy(skel); +} + +static struct sock_addr_test tests[] = { + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix", + connect_unix_prog_load, + connect_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix", + sendmsg_unix_prog_load, + sendmsg_unix_prog_destroy, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix-dgram", + recvmsg_unix_prog_load, + recvmsg_unix_prog_destroy, + AF_UNIX, + SOCK_DGRAM, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_ADDRESS, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix-stream", + recvmsg_unix_prog_load, + recvmsg_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_ADDRESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname_unix", + getsockname_unix_prog_load, + getsockname_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername_unix", + getpeername_unix_prog_load, + getpeername_unix_prog_destroy, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + }, +}; + +typedef int (*info_fn)(int, struct sockaddr *, socklen_t *); + +static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len, + const struct sockaddr_storage *addr2, socklen_t addr2_len, + bool cmp_port) +{ + const struct sockaddr_in *four1, *four2; + const struct sockaddr_in6 *six1, *six2; + const struct sockaddr_un *un1, *un2; + + if (addr1->ss_family != addr2->ss_family) + return -1; + + if (addr1_len != addr2_len) + return -1; + + if (addr1->ss_family == AF_INET) { + four1 = (const struct sockaddr_in *)addr1; + four2 = (const struct sockaddr_in *)addr2; + return !((four1->sin_port == four2->sin_port || !cmp_port) && + four1->sin_addr.s_addr == four2->sin_addr.s_addr); + } else if (addr1->ss_family == AF_INET6) { + six1 = (const struct sockaddr_in6 *)addr1; + six2 = (const struct sockaddr_in6 *)addr2; + return !((six1->sin6_port == six2->sin6_port || !cmp_port) && + !memcmp(&six1->sin6_addr, &six2->sin6_addr, + sizeof(struct in6_addr))); + } else if (addr1->ss_family == AF_UNIX) { + un1 = (const struct sockaddr_un *)addr1; + un2 = (const struct sockaddr_un *)addr2; + return memcmp(un1, un2, addr1_len); + } + + return -1; +} + +static int cmp_sock_addr(info_fn fn, int sock1, + const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + struct sockaddr_storage addr1; + socklen_t len1 = sizeof(addr1); + + memset(&addr1, 0, len1); + if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0) + return -1; + + return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port); +} + +static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port); +} + +static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2, + socklen_t addr2_len, bool cmp_port) +{ + return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port); +} + +static void test_bind(struct sock_addr_test *test) +{ + struct sockaddr_storage expected_addr; + socklen_t expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, + test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + + /* Try to connect to server just in case */ + client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_connect(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, expected_addr, expected_src_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + expected_addr_len = sizeof(struct sockaddr_storage), + expected_src_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->expected_addr, test->expected_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + client = connect_to_addr(&addr, addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + if (test->expected_src_addr) { + err = make_sockaddr(test->socket_family, test->expected_src_addr, 0, + &expected_src_addr, &expected_src_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + } + + err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) + goto cleanup; + + if (test->expected_src_addr) { + err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + } +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_xmsg(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, src_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + src_addr_len = sizeof(struct sockaddr_storage); + struct msghdr hdr; + struct iovec iov; + char data = 'a'; + int serv = -1, client = -1, err; + + /* Unlike the other tests, here we test that we can rewrite the src addr + * with a recvmsg() hook. + */ + + serv = start_server(test->socket_family, test->socket_type, + test->expected_addr, test->expected_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + client = socket(test->socket_family, test->socket_type, 0); + if (!ASSERT_GE(client, 0, "socket")) + goto cleanup; + + /* AF_UNIX sockets have to be bound to something to trigger the recvmsg bpf program. */ + if (test->socket_family == AF_UNIX) { + err = make_sockaddr(AF_UNIX, SRCUN_ADDRESS, 0, &src_addr, &src_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len); + if (!ASSERT_OK(err, "bind")) + goto cleanup; + } + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + if (test->socket_type == SOCK_DGRAM) { + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_name = (void *)&addr; + hdr.msg_namelen = addr_len; + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + + err = sendmsg(client, &hdr, 0); + if (!ASSERT_EQ(err, sizeof(data), "sendmsg")) + goto cleanup; + } else { + /* Testing with connection-oriented sockets is only valid for + * recvmsg() tests. + */ + if (!ASSERT_EQ(test->type, SOCK_ADDR_TEST_RECVMSG, "recvmsg")) + goto cleanup; + + err = connect(client, (const struct sockaddr *)&addr, addr_len); + if (!ASSERT_OK(err, "connect")) + goto cleanup; + + err = send(client, &data, sizeof(data), 0); + if (!ASSERT_EQ(err, sizeof(data), "send")) + goto cleanup; + + err = listen(serv, 0); + if (!ASSERT_OK(err, "listen")) + goto cleanup; + + err = accept(serv, NULL, NULL); + if (!ASSERT_GE(err, 0, "accept")) + goto cleanup; + + close(serv); + serv = err; + } + + addr_len = src_addr_len = sizeof(struct sockaddr_storage); + + err = recvfrom(serv, &data, sizeof(data), 0, (struct sockaddr *) &src_addr, &src_addr_len); + if (!ASSERT_EQ(err, sizeof(data), "recvfrom")) + goto cleanup; + + ASSERT_EQ(data, 'a', "data mismatch"); + + if (test->expected_src_addr) { + err = make_sockaddr(test->socket_family, test->expected_src_addr, 0, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_addr(&src_addr, src_addr_len, &addr, addr_len, false); + if (!ASSERT_EQ(err, 0, "cmp_addr")) + goto cleanup; + } + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +static void test_getsockname(struct sock_addr_test *test) +{ + struct sockaddr_storage expected_addr; + socklen_t expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, + test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_local_addr")) + goto cleanup; + +cleanup: + if (serv != -1) + close(serv); +} + +static void test_getpeername(struct sock_addr_test *test) +{ + struct sockaddr_storage addr, expected_addr; + socklen_t addr_len = sizeof(struct sockaddr_storage), + expected_addr_len = sizeof(struct sockaddr_storage); + int serv = -1, client = -1, err; + + serv = start_server(test->socket_family, test->socket_type, + test->requested_addr, test->requested_port, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port, + &addr, &addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + client = connect_to_addr(&addr, addr_len, test->socket_type); + if (!ASSERT_GE(client, 0, "connect_to_addr")) + goto cleanup; + + err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, + &expected_addr, &expected_addr_len); + if (!ASSERT_EQ(err, 0, "make_sockaddr")) + goto cleanup; + + err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) + goto cleanup; + +cleanup: + if (client != -1) + close(client); + if (serv != -1) + close(serv); +} + +void test_sock_addr(void) +{ + int cgroup_fd = -1; + void *skel; + + cgroup_fd = test__join_cgroup("/sock_addr"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto cleanup; + + for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) { + struct sock_addr_test *test = &tests[i]; + + if (!test__start_subtest(test->name)) + continue; + + skel = test->loadfn(cgroup_fd); + if (!skel) + continue; + + switch (test->type) { + /* Not exercised yet but we leave this code here for when the + * INET and INET6 sockaddr tests are migrated to this file in + * the future. + */ + case SOCK_ADDR_TEST_BIND: + test_bind(test); + break; + case SOCK_ADDR_TEST_CONNECT: + test_connect(test); + break; + case SOCK_ADDR_TEST_SENDMSG: + case SOCK_ADDR_TEST_RECVMSG: + test_xmsg(test); + break; + case SOCK_ADDR_TEST_GETSOCKNAME: + test_getsockname(test); + break; + case SOCK_ADDR_TEST_GETPEERNAME: + test_getpeername(test); + break; + default: + ASSERT_TRUE(false, "Unknown sock addr test type"); + break; + } + + test->destroyfn(skel); + } + +cleanup: + if (cgroup_fd >= 0) + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index dda7060e86a0..f75f84d0b3d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -359,7 +359,7 @@ out: static void test_sockmap_skb_verdict_shutdown(void) { struct epoll_event ev, events[MAX_EVENTS]; - int n, err, map, verdict, s, c1, p1; + int n, err, map, verdict, s, c1 = -1, p1 = -1; struct test_sockmap_pass_prog *skel; int epollfd; int zero = 0; @@ -414,9 +414,9 @@ out: static void test_sockmap_skb_verdict_fionread(bool pass_prog) { int expected, zero = 0, sent, recvd, avail; - int err, map, verdict, s, c0, c1, p0, p1; - struct test_sockmap_pass_prog *pass; - struct test_sockmap_drop_prog *drop; + int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; + struct test_sockmap_pass_prog *pass = NULL; + struct test_sockmap_drop_prog *drop = NULL; char buf[256] = "0123456789"; if (pass_prog) { diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 36d829a65aa4..e880f97bc44d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -378,7 +378,7 @@ static inline int enable_reuseport(int s, int progfd) static inline int socket_loopback_reuseport(int family, int sotype, int progfd) { struct sockaddr_storage addr; - socklen_t len; + socklen_t len = 0; int err, s; init_addr_loopback(family, &addr, &len); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 8df8cbb447f1..a934d430c20c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -73,7 +73,7 @@ static void test_insert_bound(struct test_sockmap_listen *skel __always_unused, int family, int sotype, int mapfd) { struct sockaddr_storage addr; - socklen_t len; + socklen_t len = 0; u32 key = 0; u64 value; int err, s; @@ -871,7 +871,7 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, static void redir_partial(int family, int sotype, int sock_map, int parser_map) { - int s, c0, c1, p0, p1; + int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; int err, n, key, value; char buf[] = "abc"; @@ -1336,53 +1336,59 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } -static void unix_redir_to_connected(int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, + int sock_mapfd, int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); - int c0, c1, p0, p1; unsigned int pass; int err, n; - int sfd[2]; u32 key; char b; zero_verdict_count(verd_mapfd); - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - goto close0; - c1 = sfd[0], p1 = sfd[1]; - - err = add_to_sockmap(sock_mapfd, p0, p1); + err = add_to_sockmap(sock_mapfd, peer0, peer1); if (err) - goto close; + return; - n = write(c1, "a", 1); + n = write(cli1, "a", 1); if (n < 0) FAIL_ERRNO("%s: write", log_prefix); if (n == 0) FAIL("%s: incomplete write", log_prefix); if (n < 1) - goto close; + return; key = SK_PASS; err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); if (err) - goto close; + return; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC); if (n < 0) FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) FAIL("%s: incomplete recv", log_prefix); +} + +static void unix_redir_to_connected(int sotype, int sock_mapfd, + int verd_mapfd, enum redir_mode mode) +{ + int c0, c1, p0, p1; + int sfd[2]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + return; + c0 = sfd[0], p0 = sfd[1]; + + if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) + goto close0; + c1 = sfd[0], p1 = sfd[1]; + + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close: xclose(c1); xclose(p1); close0: @@ -1661,14 +1667,8 @@ close_peer0: static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); if (err) @@ -1677,32 +1677,8 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (err) goto close_cli0; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close_cli1; + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close_cli1; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close_cli1; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - -close_cli1: xclose(c1); xclose(p1); close_cli0: @@ -1747,15 +1723,9 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; int sfd[2]; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) return; @@ -1765,32 +1735,8 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (err) goto close; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close_cli1; - - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close_cli1; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close_cli1; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close_cli1: xclose(c1); xclose(p1); close: @@ -1827,15 +1773,9 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { - const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; - unsigned int pass; - int err, n; int sfd[2]; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); + int err; err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); if (err) @@ -1845,32 +1785,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, goto close_cli0; c1 = sfd[0], p1 = sfd[1]; - err = add_to_sockmap(sock_mapfd, p0, p1); - if (err) - goto close; - - n = write(c1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto close; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto close; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); -close: xclose(c1); xclose(p1); close_cli0: diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 58fe2c586ed7..fc6b2954e8f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -218,12 +218,14 @@ out: bpf_object__close(obj); } -static void test_tailcall_count(const char *which) +static void test_tailcall_count(const char *which, bool test_fentry, + bool test_fexit) { + struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL; + struct bpf_link *fentry_link = NULL, *fexit_link = NULL; int err, map_fd, prog_fd, main_fd, data_fd, i, val; struct bpf_map *prog_array, *data_map; struct bpf_program *prog; - struct bpf_object *obj; char buff[128] = {}; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = buff, @@ -265,23 +267,105 @@ static void test_tailcall_count(const char *which) if (CHECK_FAIL(err)) goto out; + if (test_fentry) { + fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o", + NULL); + if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file")) + goto out; + + prog = bpf_object__find_program_by_name(fentry_obj, "fentry"); + if (!ASSERT_OK_PTR(prog, "find fentry prog")) + goto out; + + err = bpf_program__set_attach_target(prog, prog_fd, + "subprog_tail"); + if (!ASSERT_OK(err, "set_attach_target subprog_tail")) + goto out; + + err = bpf_object__load(fentry_obj); + if (!ASSERT_OK(err, "load fentry_obj")) + goto out; + + fentry_link = bpf_program__attach_trace(prog); + if (!ASSERT_OK_PTR(fentry_link, "attach_trace")) + goto out; + } + + if (test_fexit) { + fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o", + NULL); + if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file")) + goto out; + + prog = bpf_object__find_program_by_name(fexit_obj, "fexit"); + if (!ASSERT_OK_PTR(prog, "find fexit prog")) + goto out; + + err = bpf_program__set_attach_target(prog, prog_fd, + "subprog_tail"); + if (!ASSERT_OK(err, "set_attach_target subprog_tail")) + goto out; + + err = bpf_object__load(fexit_obj); + if (!ASSERT_OK(err, "load fexit_obj")) + goto out; + + fexit_link = bpf_program__attach_trace(prog); + if (!ASSERT_OK_PTR(fexit_link, "attach_trace")) + goto out; + } + err = bpf_prog_test_run_opts(main_fd, &topts); ASSERT_OK(err, "tailcall"); ASSERT_EQ(topts.retval, 1, "tailcall retval"); data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) - return; + goto out; data_fd = bpf_map__fd(data_map); - if (CHECK_FAIL(map_fd < 0)) - return; + if (CHECK_FAIL(data_fd < 0)) + goto out; i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); ASSERT_OK(err, "tailcall count"); ASSERT_EQ(val, 33, "tailcall count"); + if (test_fentry) { + data_map = bpf_object__find_map_by_name(fentry_obj, ".bss"); + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), + "find tailcall_bpf2bpf_fentry.bss map")) + goto out; + + data_fd = bpf_map__fd(data_map); + if (!ASSERT_FALSE(data_fd < 0, + "find tailcall_bpf2bpf_fentry.bss map fd")) + goto out; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + ASSERT_OK(err, "fentry count"); + ASSERT_EQ(val, 33, "fentry count"); + } + + if (test_fexit) { + data_map = bpf_object__find_map_by_name(fexit_obj, ".bss"); + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), + "find tailcall_bpf2bpf_fexit.bss map")) + goto out; + + data_fd = bpf_map__fd(data_map); + if (!ASSERT_FALSE(data_fd < 0, + "find tailcall_bpf2bpf_fexit.bss map fd")) + goto out; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + ASSERT_OK(err, "fexit count"); + ASSERT_EQ(val, 33, "fexit count"); + } + i = 0; err = bpf_map_delete_elem(map_fd, &i); if (CHECK_FAIL(err)) @@ -291,6 +375,10 @@ static void test_tailcall_count(const char *which) ASSERT_OK(err, "tailcall"); ASSERT_OK(topts.retval, "tailcall retval"); out: + bpf_link__destroy(fentry_link); + bpf_link__destroy(fexit_link); + bpf_object__close(fentry_obj); + bpf_object__close(fexit_obj); bpf_object__close(obj); } @@ -299,7 +387,7 @@ out: */ static void test_tailcall_3(void) { - test_tailcall_count("tailcall3.bpf.o"); + test_tailcall_count("tailcall3.bpf.o", false, false); } /* test_tailcall_6 checks that the count value of the tail call limit @@ -307,7 +395,7 @@ static void test_tailcall_3(void) */ static void test_tailcall_6(void) { - test_tailcall_count("tailcall6.bpf.o"); + test_tailcall_count("tailcall6.bpf.o", false, false); } /* test_tailcall_4 checks that the kernel properly selects indirect jump @@ -352,11 +440,11 @@ static void test_tailcall_4(void) data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) - return; + goto out; data_fd = bpf_map__fd(data_map); - if (CHECK_FAIL(map_fd < 0)) - return; + if (CHECK_FAIL(data_fd < 0)) + goto out; for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); @@ -442,11 +530,11 @@ static void test_tailcall_5(void) data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) - return; + goto out; data_fd = bpf_map__fd(data_map); - if (CHECK_FAIL(map_fd < 0)) - return; + if (CHECK_FAIL(data_fd < 0)) + goto out; for (i = 0; i < bpf_map__max_entries(prog_array); i++) { snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); @@ -631,11 +719,11 @@ static void test_tailcall_bpf2bpf_2(void) data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) - return; + goto out; data_fd = bpf_map__fd(data_map); - if (CHECK_FAIL(map_fd < 0)) - return; + if (CHECK_FAIL(data_fd < 0)) + goto out; i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); @@ -805,11 +893,11 @@ static void test_tailcall_bpf2bpf_4(bool noise) data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) - return; + goto out; data_fd = bpf_map__fd(data_map); - if (CHECK_FAIL(map_fd < 0)) - return; + if (CHECK_FAIL(data_fd < 0)) + goto out; i = 0; val.noise = noise; @@ -872,7 +960,7 @@ static void test_tailcall_bpf2bpf_6(void) ASSERT_EQ(topts.retval, 0, "tailcall retval"); data_fd = bpf_map__fd(obj->maps.bss); - if (!ASSERT_GE(map_fd, 0, "bss map fd")) + if (!ASSERT_GE(data_fd, 0, "bss map fd")) goto out; i = 0; @@ -884,6 +972,139 @@ out: tailcall_bpf2bpf6__destroy(obj); } +/* test_tailcall_bpf2bpf_fentry checks that the count value of the tail call + * limit enforcement matches with expectations when tailcall is preceded with + * bpf2bpf call, and the bpf2bpf call is traced by fentry. + */ +static void test_tailcall_bpf2bpf_fentry(void) +{ + test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, false); +} + +/* test_tailcall_bpf2bpf_fexit checks that the count value of the tail call + * limit enforcement matches with expectations when tailcall is preceded with + * bpf2bpf call, and the bpf2bpf call is traced by fexit. + */ +static void test_tailcall_bpf2bpf_fexit(void) +{ + test_tailcall_count("tailcall_bpf2bpf2.bpf.o", false, true); +} + +/* test_tailcall_bpf2bpf_fentry_fexit checks that the count value of the tail + * call limit enforcement matches with expectations when tailcall is preceded + * with bpf2bpf call, and the bpf2bpf call is traced by both fentry and fexit. + */ +static void test_tailcall_bpf2bpf_fentry_fexit(void) +{ + test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, true); +} + +/* test_tailcall_bpf2bpf_fentry_entry checks that the count value of the tail + * call limit enforcement matches with expectations when tailcall is preceded + * with bpf2bpf call, and the bpf2bpf caller is traced by fentry. + */ +static void test_tailcall_bpf2bpf_fentry_entry(void) +{ + struct bpf_object *tgt_obj = NULL, *fentry_obj = NULL; + int err, map_fd, prog_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_link *fentry_link = NULL; + struct bpf_program *prog; + char buff[128] = {}; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); + + err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o", + BPF_PROG_TYPE_SCHED_CLS, + &tgt_obj, &prog_fd); + if (!ASSERT_OK(err, "load tgt_obj")) + return; + + prog_array = bpf_object__find_map_by_name(tgt_obj, "jmp_table"); + if (!ASSERT_OK_PTR(prog_array, "find jmp_table map")) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (!ASSERT_FALSE(map_fd < 0, "find jmp_table map fd")) + goto out; + + prog = bpf_object__find_program_by_name(tgt_obj, "classifier_0"); + if (!ASSERT_OK_PTR(prog, "find classifier_0 prog")) + goto out; + + prog_fd = bpf_program__fd(prog); + if (!ASSERT_FALSE(prog_fd < 0, "find classifier_0 prog fd")) + goto out; + + i = 0; + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o", + NULL); + if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file")) + goto out; + + prog = bpf_object__find_program_by_name(fentry_obj, "fentry"); + if (!ASSERT_OK_PTR(prog, "find fentry prog")) + goto out; + + err = bpf_program__set_attach_target(prog, prog_fd, "classifier_0"); + if (!ASSERT_OK(err, "set_attach_target classifier_0")) + goto out; + + err = bpf_object__load(fentry_obj); + if (!ASSERT_OK(err, "load fentry_obj")) + goto out; + + fentry_link = bpf_program__attach_trace(prog); + if (!ASSERT_OK_PTR(fentry_link, "attach_trace")) + goto out; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "tailcall"); + ASSERT_EQ(topts.retval, 1, "tailcall retval"); + + data_map = bpf_object__find_map_by_name(tgt_obj, "tailcall.bss"); + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), + "find tailcall.bss map")) + goto out; + + data_fd = bpf_map__fd(data_map); + if (!ASSERT_FALSE(data_fd < 0, "find tailcall.bss map fd")) + goto out; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + ASSERT_OK(err, "tailcall count"); + ASSERT_EQ(val, 34, "tailcall count"); + + data_map = bpf_object__find_map_by_name(fentry_obj, ".bss"); + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), + "find tailcall_bpf2bpf_fentry.bss map")) + goto out; + + data_fd = bpf_map__fd(data_map); + if (!ASSERT_FALSE(data_fd < 0, + "find tailcall_bpf2bpf_fentry.bss map fd")) + goto out; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + ASSERT_OK(err, "fentry count"); + ASSERT_EQ(val, 1, "fentry count"); + +out: + bpf_link__destroy(fentry_link); + bpf_object__close(fentry_obj); + bpf_object__close(tgt_obj); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -910,4 +1131,12 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_4(true); if (test__start_subtest("tailcall_bpf2bpf_6")) test_tailcall_bpf2bpf_6(); + if (test__start_subtest("tailcall_bpf2bpf_fentry")) + test_tailcall_bpf2bpf_fentry(); + if (test__start_subtest("tailcall_bpf2bpf_fexit")) + test_tailcall_bpf2bpf_fexit(); + if (test__start_subtest("tailcall_bpf2bpf_fentry_fexit")) + test_tailcall_bpf2bpf_fentry_fexit(); + if (test__start_subtest("tailcall_bpf2bpf_fentry_entry")) + test_tailcall_bpf2bpf_fentry_entry(); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index ce2c61d62fc6..760ad96b4be0 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -15,6 +15,7 @@ static int timer(struct timer *timer_skel) ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1"); ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1"); + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1"); prog_fd = bpf_program__fd(timer_skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); @@ -33,6 +34,9 @@ static int timer(struct timer *timer_skel) /* check that timer_cb3() was executed twice */ ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data"); + /* check that timer_cb_pinned() was executed twice */ + ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check"); + /* check that there were no errors in timer execution */ ASSERT_EQ(timer_skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c new file mode 100644 index 000000000000..cf3e0e7a64fa --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Hengqi Chen */ + +#include <test_progs.h> +#include "test_uprobe.skel.h" + +static FILE *urand_spawn(int *pid) +{ + FILE *f; + + /* urandom_read's stdout is wired into f */ + f = popen("./urandom_read 1 report-pid", "r"); + if (!f) + return NULL; + + if (fscanf(f, "%d", pid) != 1) { + pclose(f); + errno = EINVAL; + return NULL; + } + + return f; +} + +static int urand_trigger(FILE **urand_pipe) +{ + int exit_code; + + /* pclose() waits for child process to exit and returns their exit code */ + exit_code = pclose(*urand_pipe); + *urand_pipe = NULL; + + return exit_code; +} + +void test_uprobe(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + FILE *urand_pipe = NULL; + int urand_pid = 0, err; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + urand_pipe = urand_spawn(&urand_pid); + if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn")) + goto cleanup; + + skel->bss->my_pid = urand_pid; + + /* Manual attach uprobe to urandlib_api + * There are two `urandlib_api` symbols in .dynsym section: + * - urandlib_api@LIBURANDOM_READ_1.0.0 + * - urandlib_api@@LIBURANDOM_READ_2.0.0 + * Both are global bind and would cause a conflict if user + * specify the symbol name without a version suffix + */ + uprobe_opts.func_name = "urandlib_api"; + skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4, + urand_pid, + "./liburandom_read.so", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_ERR_PTR(skel->links.test4, "urandlib_api_attach_conflict")) + goto cleanup; + + uprobe_opts.func_name = "urandlib_api@LIBURANDOM_READ_1.0.0"; + skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4, + urand_pid, + "./liburandom_read.so", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test4, "urandlib_api_attach_ok")) + goto cleanup; + + /* Auto attach 3 u[ret]probes to urandlib_api_sameoffset */ + err = test_uprobe__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger urandom_read */ + ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code"); + + ASSERT_EQ(skel->bss->test1_result, 1, "urandlib_api_sameoffset"); + ASSERT_EQ(skel->bss->test2_result, 1, "urandlib_api_sameoffset@v1"); + ASSERT_EQ(skel->bss->test3_result, 3, "urandlib_api_sameoffset@@v2"); + ASSERT_EQ(skel->bss->test4_result, 1, "urandlib_api"); + +cleanup: + if (urand_pipe) + pclose(urand_pipe); + test_uprobe__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 626c461fa34d..4439ba9392f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -226,7 +226,7 @@ static int verify_xsk_metadata(struct xsk *xsk) __u64 comp_addr; void *data; __u64 addr; - __u32 idx; + __u32 idx = 0; int ret; ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index dd923dc637d5..dd923dc637d5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 38a57a2e70db..799fff4995d8 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -99,6 +99,9 @@ #elif defined(__TARGET_ARCH_arm64) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__arm64_" +#elif defined(__TARGET_ARCH_riscv) +#define SYSCALL_WRAPPER 1 +#define SYS_PREFIX "__riscv_" #else #define SYSCALL_WRAPPER 0 #define SYS_PREFIX "__se_" diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c new file mode 100644 index 000000000000..ca8aa2f116b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/connect_unix") +int connect_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + /* Rewrite destination. */ + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 0; + + if (sa_kern->uaddrlen != unaddrlen) + return 0; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c new file mode 100644 index 000000000000..2811ee842b01 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/exceptions.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_endian.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +#ifndef ETH_P_IP +#define ETH_P_IP 0x0800 +#endif + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 4); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static __noinline int static_func(u64 i) +{ + bpf_throw(32); + return i; +} + +__noinline int global2static_simple(u64 i) +{ + static_func(i + 2); + return i - 1; +} + +__noinline int global2static(u64 i) +{ + if (i == ETH_P_IP) + bpf_throw(16); + return static_func(i); +} + +static __noinline int static2global(u64 i) +{ + return global2static(i) + i; +} + +SEC("tc") +int exception_throw_always_1(struct __sk_buff *ctx) +{ + bpf_throw(64); + return 0; +} + +/* In this case, the global func will never be seen executing after call to + * static subprog, hence verifier will DCE the remaining instructions. Ensure we + * are resilient to that. + */ +SEC("tc") +int exception_throw_always_2(struct __sk_buff *ctx) +{ + return global2static_simple(ctx->protocol); +} + +SEC("tc") +int exception_throw_unwind_1(struct __sk_buff *ctx) +{ + return static2global(bpf_ntohs(ctx->protocol)); +} + +SEC("tc") +int exception_throw_unwind_2(struct __sk_buff *ctx) +{ + return static2global(bpf_ntohs(ctx->protocol) - 1); +} + +SEC("tc") +int exception_throw_default(struct __sk_buff *ctx) +{ + bpf_throw(0); + return 1; +} + +SEC("tc") +int exception_throw_default_value(struct __sk_buff *ctx) +{ + bpf_throw(5); + return 1; +} + +SEC("tc") +int exception_tail_call_target(struct __sk_buff *ctx) +{ + bpf_throw(16); + return 0; +} + +static __noinline +int exception_tail_call_subprog(struct __sk_buff *ctx) +{ + volatile int ret = 10; + + bpf_tail_call_static(ctx, &jmp_table, 0); + return ret; +} + +SEC("tc") +int exception_tail_call(struct __sk_buff *ctx) { + volatile int ret = 0; + + ret = exception_tail_call_subprog(ctx); + return ret + 8; +} + +__noinline int exception_ext_global(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + return ret; +} + +static __noinline int exception_ext_static(struct __sk_buff *ctx) +{ + return exception_ext_global(ctx); +} + +SEC("tc") +int exception_ext(struct __sk_buff *ctx) +{ + return exception_ext_static(ctx); +} + +__noinline int exception_cb_mod_global(u64 cookie) +{ + volatile int ret = 0; + + return ret; +} + +/* Example of how the exception callback supplied during verification can still + * introduce extensions by calling to dummy global functions, and alter runtime + * behavior. + * + * Right now we don't allow freplace attachment to exception callback itself, + * but if the need arises this restriction is technically feasible to relax in + * the future. + */ +__noinline int exception_cb_mod(u64 cookie) +{ + return exception_cb_mod_global(cookie) + cookie + 10; +} + +SEC("tc") +__exception_cb(exception_cb_mod) +int exception_ext_mod_cb_runtime(struct __sk_buff *ctx) +{ + bpf_throw(25); + return 0; +} + +__noinline static int subprog(struct __sk_buff *ctx) +{ + return bpf_ktime_get_ns(); +} + +__noinline static int throwing_subprog(struct __sk_buff *ctx) +{ + if (ctx->tstamp) + bpf_throw(0); + return bpf_ktime_get_ns(); +} + +__noinline int global_subprog(struct __sk_buff *ctx) +{ + return bpf_ktime_get_ns(); +} + +__noinline int throwing_global_subprog(struct __sk_buff *ctx) +{ + if (ctx->tstamp) + bpf_throw(0); + return bpf_ktime_get_ns(); +} + +SEC("tc") +int exception_throw_subprog(struct __sk_buff *ctx) +{ + switch (ctx->protocol) { + case 1: + return subprog(ctx); + case 2: + return global_subprog(ctx); + case 3: + return throwing_subprog(ctx); + case 4: + return throwing_global_subprog(ctx); + default: + break; + } + bpf_throw(1); + return 0; +} + +__noinline int assert_nz_gfunc(u64 c) +{ + volatile u64 cookie = c; + + bpf_assert(cookie != 0); + return 0; +} + +__noinline int assert_zero_gfunc(u64 c) +{ + volatile u64 cookie = c; + + bpf_assert_eq(cookie, 0); + return 0; +} + +__noinline int assert_neg_gfunc(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_lt(cookie, 0); + return 0; +} + +__noinline int assert_pos_gfunc(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_gt(cookie, 0); + return 0; +} + +__noinline int assert_negeq_gfunc(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_le(cookie, -1); + return 0; +} + +__noinline int assert_poseq_gfunc(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_ge(cookie, 1); + return 0; +} + +__noinline int assert_nz_gfunc_with(u64 c) +{ + volatile u64 cookie = c; + + bpf_assert_with(cookie != 0, cookie + 100); + return 0; +} + +__noinline int assert_zero_gfunc_with(u64 c) +{ + volatile u64 cookie = c; + + bpf_assert_eq_with(cookie, 0, cookie + 100); + return 0; +} + +__noinline int assert_neg_gfunc_with(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_lt_with(cookie, 0, cookie + 100); + return 0; +} + +__noinline int assert_pos_gfunc_with(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_gt_with(cookie, 0, cookie + 100); + return 0; +} + +__noinline int assert_negeq_gfunc_with(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_le_with(cookie, -1, cookie + 100); + return 0; +} + +__noinline int assert_poseq_gfunc_with(s64 c) +{ + volatile s64 cookie = c; + + bpf_assert_ge_with(cookie, 1, cookie + 100); + return 0; +} + +#define check_assert(name, cookie, tag) \ +SEC("tc") \ +int exception##tag##name(struct __sk_buff *ctx) \ +{ \ + return name(cookie) + 1; \ +} + +check_assert(assert_nz_gfunc, 5, _); +check_assert(assert_zero_gfunc, 0, _); +check_assert(assert_neg_gfunc, -100, _); +check_assert(assert_pos_gfunc, 100, _); +check_assert(assert_negeq_gfunc, -1, _); +check_assert(assert_poseq_gfunc, 1, _); + +check_assert(assert_nz_gfunc_with, 5, _); +check_assert(assert_zero_gfunc_with, 0, _); +check_assert(assert_neg_gfunc_with, -100, _); +check_assert(assert_pos_gfunc_with, 100, _); +check_assert(assert_negeq_gfunc_with, -1, _); +check_assert(assert_poseq_gfunc_with, 1, _); + +check_assert(assert_nz_gfunc, 0, _bad_); +check_assert(assert_zero_gfunc, 5, _bad_); +check_assert(assert_neg_gfunc, 100, _bad_); +check_assert(assert_pos_gfunc, -100, _bad_); +check_assert(assert_negeq_gfunc, 1, _bad_); +check_assert(assert_poseq_gfunc, -1, _bad_); + +check_assert(assert_nz_gfunc_with, 0, _bad_); +check_assert(assert_zero_gfunc_with, 5, _bad_); +check_assert(assert_neg_gfunc_with, 100, _bad_); +check_assert(assert_pos_gfunc_with, -100, _bad_); +check_assert(assert_negeq_gfunc_with, 1, _bad_); +check_assert(assert_poseq_gfunc_with, -1, _bad_); + +SEC("tc") +int exception_assert_range(struct __sk_buff *ctx) +{ + u64 time = bpf_ktime_get_ns(); + + bpf_assert_range(time, 0, ~0ULL); + return 1; +} + +SEC("tc") +int exception_assert_range_with(struct __sk_buff *ctx) +{ + u64 time = bpf_ktime_get_ns(); + + bpf_assert_range_with(time, 0, ~0ULL, 10); + return 1; +} + +SEC("tc") +int exception_bad_assert_range(struct __sk_buff *ctx) +{ + u64 time = bpf_ktime_get_ns(); + + bpf_assert_range(time, -100, 100); + return 1; +} + +SEC("tc") +int exception_bad_assert_range_with(struct __sk_buff *ctx) +{ + u64 time = bpf_ktime_get_ns(); + + bpf_assert_range_with(time, -1000, 1000, 10); + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c new file mode 100644 index 000000000000..e1e5c54a6a11 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <limits.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_endian.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +#define check_assert(type, op, name, value) \ + SEC("?tc") \ + __log_level(2) __failure \ + int check_assert_##op##_##name(void *ctx) \ + { \ + type num = bpf_ktime_get_ns(); \ + bpf_assert_##op(num, value); \ + return *(u64 *)num; \ + } + +__msg(": R0_w=-2147483648 R10=fp0") +check_assert(s64, eq, int_min, INT_MIN); +__msg(": R0_w=2147483647 R10=fp0") +check_assert(s64, eq, int_max, INT_MAX); +__msg(": R0_w=0 R10=fp0") +check_assert(s64, eq, zero, 0); +__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0") +check_assert(s64, eq, llong_min, LLONG_MIN); +__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0") +check_assert(s64, eq, llong_max, LLONG_MAX); + +__msg(": R0_w=scalar(smax=2147483646) R10=fp0") +check_assert(s64, lt, pos, INT_MAX); +__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +check_assert(s64, lt, zero, 0); +__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +check_assert(s64, lt, neg, INT_MIN); + +__msg(": R0_w=scalar(smax=2147483647) R10=fp0") +check_assert(s64, le, pos, INT_MAX); +__msg(": R0_w=scalar(smax=0) R10=fp0") +check_assert(s64, le, zero, 0); +__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +check_assert(s64, le, neg, INT_MIN); + +__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, gt, pos, INT_MAX); +__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, gt, zero, 0); +__msg(": R0_w=scalar(smin=-2147483647) R10=fp0") +check_assert(s64, gt, neg, INT_MIN); + +__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, ge, pos, INT_MAX); +__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") +check_assert(s64, ge, zero, 0); +__msg(": R0_w=scalar(smin=-2147483648) R10=fp0") +check_assert(s64, ge, neg, INT_MIN); + +SEC("?tc") +__log_level(2) __failure +__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") +int check_assert_range_s64(struct __sk_buff *ctx) +{ + struct bpf_sock *sk = ctx->sk; + s64 num; + + _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match"); + if (!sk) + return 0; + num = sk->rx_queue_mapping; + bpf_assert_range(num, INT_MIN + 2, INT_MAX - 2); + return *((u8 *)ctx + num); +} + +SEC("?tc") +__log_level(2) __failure +__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") +int check_assert_range_u64(struct __sk_buff *ctx) +{ + u64 num = ctx->len; + + bpf_assert_range(num, 4096, 8192); + return *((u8 *)ctx + num); +} + +SEC("?tc") +__log_level(2) __failure +__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +int check_assert_single_range_s64(struct __sk_buff *ctx) +{ + struct bpf_sock *sk = ctx->sk; + s64 num; + + _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match"); + if (!sk) + return 0; + num = sk->rx_queue_mapping; + + bpf_assert_range(num, 4096, 4096); + return *((u8 *)ctx + num); +} + +SEC("?tc") +__log_level(2) __failure +__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +int check_assert_single_range_u64(struct __sk_buff *ctx) +{ + u64 num = ctx->len; + + bpf_assert_range(num, 4096, 4096); + return *((u8 *)ctx + num); +} + +SEC("?tc") +__log_level(2) __failure +__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0") +int check_assert_generic(struct __sk_buff *ctx) +{ + u8 *data_end = (void *)(long)ctx->data_end; + u8 *data = (void *)(long)ctx->data; + + bpf_assert(data + 64 <= data_end); + return data[128]; +} + +SEC("?fentry/bpf_check") +__failure __msg("At program exit the register R0 has value (0x40; 0x0)") +int check_assert_with_return(void *ctx) +{ + bpf_assert_with(!ctx, 64); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exceptions_ext.c b/tools/testing/selftests/bpf/progs/exceptions_ext.c new file mode 100644 index 000000000000..743c05185d9b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/exceptions_ext.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" + +SEC("?fentry") +int pfentry(void *ctx) +{ + return 0; +} + +SEC("?fentry") +int throwing_fentry(void *ctx) +{ + bpf_throw(0); + return 0; +} + +__noinline int exception_cb(u64 cookie) +{ + return cookie + 64; +} + +SEC("?freplace") +int extension(struct __sk_buff *ctx) +{ + return 0; +} + +SEC("?freplace") +__exception_cb(exception_cb) +int throwing_exception_cb_extension(u64 cookie) +{ + bpf_throw(32); + return 0; +} + +SEC("?freplace") +__exception_cb(exception_cb) +int throwing_extension(struct __sk_buff *ctx) +{ + bpf_throw(64); + return 0; +} + +SEC("?fexit") +int pfexit(void *ctx) +{ + return 0; +} + +SEC("?fexit") +int throwing_fexit(void *ctx) +{ + bpf_throw(0); + return 0; +} + +SEC("?fmod_ret") +int pfmod_ret(void *ctx) +{ + return 0; +} + +SEC("?fmod_ret") +int throwing_fmod_ret(void *ctx) +{ + bpf_throw(0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c new file mode 100644 index 000000000000..4c39e920dac2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +extern void bpf_rcu_read_lock(void) __ksym; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) + +struct foo { + struct bpf_rb_node node; +}; + +struct hmap_elem { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 64); + __type(key, int); + __type(value, struct hmap_elem); +} hmap SEC(".maps"); + +private(A) struct bpf_spin_lock lock; +private(A) struct bpf_rb_root rbtree __contains(foo, node); + +__noinline void *exception_cb_bad_ret_type(u64 cookie) +{ + return NULL; +} + +__noinline int exception_cb_bad_arg_0(void) +{ + return 0; +} + +__noinline int exception_cb_bad_arg_2(int a, int b) +{ + return 0; +} + +__noinline int exception_cb_ok_arg_small(int a) +{ + return 0; +} + +SEC("?tc") +__exception_cb(exception_cb_bad_ret_type) +__failure __msg("Global function exception_cb_bad_ret_type() doesn't return scalar.") +int reject_exception_cb_type_1(struct __sk_buff *ctx) +{ + bpf_throw(0); + return 0; +} + +SEC("?tc") +__exception_cb(exception_cb_bad_arg_0) +__failure __msg("exception cb only supports single integer argument") +int reject_exception_cb_type_2(struct __sk_buff *ctx) +{ + bpf_throw(0); + return 0; +} + +SEC("?tc") +__exception_cb(exception_cb_bad_arg_2) +__failure __msg("exception cb only supports single integer argument") +int reject_exception_cb_type_3(struct __sk_buff *ctx) +{ + bpf_throw(0); + return 0; +} + +SEC("?tc") +__exception_cb(exception_cb_ok_arg_small) +__success +int reject_exception_cb_type_4(struct __sk_buff *ctx) +{ + bpf_throw(0); + return 0; +} + +__noinline +static int timer_cb(void *map, int *key, struct bpf_timer *timer) +{ + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("cannot be called from callback subprog") +int reject_async_callback_throw(struct __sk_buff *ctx) +{ + struct hmap_elem *elem; + + elem = bpf_map_lookup_elem(&hmap, &(int){0}); + if (!elem) + return 0; + return bpf_timer_set_callback(&elem->timer, timer_cb); +} + +__noinline static int subprog_lock(struct __sk_buff *ctx) +{ + volatile int ret = 0; + + bpf_spin_lock(&lock); + if (ctx->len) + bpf_throw(0); + return ret; +} + +SEC("?tc") +__failure __msg("function calls are not allowed while holding a lock") +int reject_with_lock(void *ctx) +{ + bpf_spin_lock(&lock); + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("function calls are not allowed while holding a lock") +int reject_subprog_with_lock(void *ctx) +{ + return subprog_lock(ctx); +} + +SEC("?tc") +__failure __msg("bpf_rcu_read_unlock is missing") +int reject_with_rcu_read_lock(void *ctx) +{ + bpf_rcu_read_lock(); + bpf_throw(0); + return 0; +} + +__noinline static int throwing_subprog(struct __sk_buff *ctx) +{ + if (ctx->len) + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_rcu_read_unlock is missing") +int reject_subprog_with_rcu_read_lock(void *ctx) +{ + bpf_rcu_read_lock(); + return throwing_subprog(ctx); +} + +static bool rbless(struct bpf_rb_node *n1, const struct bpf_rb_node *n2) +{ + bpf_throw(0); + return true; +} + +SEC("?tc") +__failure __msg("function calls are not allowed while holding a lock") +int reject_with_rbtree_add_throw(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_spin_lock(&lock); + bpf_rbtree_add(&rbtree, &f->node, rbless); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference") +int reject_with_reference(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_throw(0); + return 0; +} + +__noinline static int subprog_ref(struct __sk_buff *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_throw(0); + return 0; +} + +__noinline static int subprog_cb_ref(u32 i, void *ctx) +{ + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference") +int reject_with_cb_reference(void *ctx) +{ + struct foo *f; + + f = bpf_obj_new(typeof(*f)); + if (!f) + return 0; + bpf_loop(5, subprog_cb_ref, NULL, 0); + return 0; +} + +SEC("?tc") +__failure __msg("cannot be called from callback") +int reject_with_cb(void *ctx) +{ + bpf_loop(5, subprog_cb_ref, NULL, 0); + return 0; +} + +SEC("?tc") +__failure __msg("Unreleased reference") +int reject_with_subprog_reference(void *ctx) +{ + return subprog_ref(ctx) + 1; +} + +__noinline int throwing_exception_cb(u64 c) +{ + bpf_throw(0); + return c; +} + +__noinline int exception_cb1(u64 c) +{ + return c; +} + +__noinline int exception_cb2(u64 c) +{ + return c; +} + +static __noinline int static_func(struct __sk_buff *ctx) +{ + return exception_cb1(ctx->tstamp); +} + +__noinline int global_func(struct __sk_buff *ctx) +{ + return exception_cb1(ctx->tstamp); +} + +SEC("?tc") +__exception_cb(throwing_exception_cb) +__failure __msg("cannot be called from callback subprog") +int reject_throwing_exception_cb(struct __sk_buff *ctx) +{ + return 0; +} + +SEC("?tc") +__exception_cb(exception_cb1) +__failure __msg("cannot call exception cb directly") +int reject_exception_cb_call_global_func(struct __sk_buff *ctx) +{ + return global_func(ctx); +} + +SEC("?tc") +__exception_cb(exception_cb1) +__failure __msg("cannot call exception cb directly") +int reject_exception_cb_call_static_func(struct __sk_buff *ctx) +{ + return static_func(ctx); +} + +SEC("?tc") +__exception_cb(exception_cb1) +__exception_cb(exception_cb2) +__failure __msg("multiple exception callback tags for main subprog") +int reject_multiple_exception_cb(struct __sk_buff *ctx) +{ + bpf_throw(0); + return 16; +} + +__noinline int exception_cb_bad_ret(u64 c) +{ + return c; +} + +SEC("?fentry/bpf_check") +__exception_cb(exception_cb_bad_ret) +__failure __msg("At program exit the register R0 has unknown scalar value should") +int reject_set_exception_cb_bad_ret1(void *ctx) +{ + return 0; +} + +SEC("?fentry/bpf_check") +__failure __msg("At program exit the register R0 has value (0x40; 0x0) should") +int reject_set_exception_cb_bad_ret2(void *ctx) +{ + bpf_throw(64); + return 0; +} + +__noinline static int loop_cb1(u32 index, int *ctx) +{ + bpf_throw(0); + return 0; +} + +__noinline static int loop_cb2(u32 index, int *ctx) +{ + bpf_throw(0); + return 0; +} + +SEC("?tc") +__failure __msg("cannot be called from callback") +int reject_exception_throw_cb(struct __sk_buff *ctx) +{ + bpf_loop(5, loop_cb1, NULL, 0); + return 0; +} + +SEC("?tc") +__failure __msg("cannot be called from callback") +int reject_exception_throw_cb_diff(struct __sk_buff *ctx) +{ + if (ctx->protocol) + bpf_loop(5, loop_cb1, NULL, 0); + else + bpf_loop(5, loop_cb2, NULL, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c new file mode 100644 index 000000000000..9c078f34bbb2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/getpeername_unix") +int getpeername_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c new file mode 100644 index 000000000000..ac7145111497 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/getsockname_unix") +int getsockname_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c new file mode 100644 index 000000000000..44edecfdfaee --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_experimental.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +pid_t target_pid = 0; +unsigned int vmas_seen = 0; + +struct { + __u64 vm_start; + __u64 vm_end; +} vm_ranges[1000]; + +SEC("raw_tp/sys_enter") +int iter_task_vma_for_each(const void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct vm_area_struct *vma; + unsigned int seen = 0; + + if (task->pid != target_pid) + return 0; + + if (vmas_seen) + return 0; + + bpf_for_each(task_vma, vma, task, 0) { + if (seen >= 1000) + break; + + vm_ranges[seen].vm_start = vma->vm_start; + vm_ranges[seen].vm_end = vma->vm_end; + seen++; + } + + vmas_seen = seen; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c new file mode 100644 index 000000000000..7f9ef701f5de --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_kfunc_common_test(); + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test2(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c new file mode 100644 index 000000000000..8ea71cbd6c45 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe.multi/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_kfunc_common_test(); + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test2(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test3(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe/bpf_kfunc_common_test") +int test4(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe.multi/bpf_kfunc_common_test") +int test5(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/missed_tp_recursion.c b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c new file mode 100644 index 000000000000..762385f827c5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe/bpf_fentry_test1") +int test1(struct pt_regs *ctx) +{ + bpf_printk("test"); + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test2(struct pt_regs *ctx) +{ + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test3(struct pt_regs *ctx) +{ + return 0; +} + +SEC("tp/bpf_trace/bpf_trace_printk") +int test4(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c new file mode 100644 index 000000000000..37c2d2608ec0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c @@ -0,0 +1,190 @@ +#include "bpf_experimental.h" + +struct val_t { + long b, c, d; +}; + +struct elem { + long sum; + struct val_t __percpu_kptr *pc; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + +const volatile int nr_cpus; + +/* Initialize the percpu object */ +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(test_array_map_1) +{ + struct val_t __percpu_kptr *p; + struct elem *e; + int index = 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + p = bpf_percpu_obj_new(struct val_t); + if (!p) + return 0; + + p = bpf_kptr_xchg(&e->pc, p); + if (p) + bpf_percpu_obj_drop(p); + + return 0; +} + +/* Update percpu data */ +SEC("?fentry/bpf_fentry_test2") +int BPF_PROG(test_array_map_2) +{ + struct val_t __percpu_kptr *p; + struct val_t *v; + struct elem *e; + int index = 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + p = e->pc; + if (!p) + return 0; + + v = bpf_per_cpu_ptr(p, 0); + if (!v) + return 0; + v->c = 1; + v->d = 2; + + return 0; +} + +int cpu0_field_d, sum_field_c; +int my_pid; + +/* Summarize percpu data */ +SEC("?fentry/bpf_fentry_test3") +int BPF_PROG(test_array_map_3) +{ + struct val_t __percpu_kptr *p; + int i, index = 0; + struct val_t *v; + struct elem *e; + + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + p = e->pc; + if (!p) + return 0; + + bpf_for(i, 0, nr_cpus) { + v = bpf_per_cpu_ptr(p, i); + if (v) { + if (i == 0) + cpu0_field_d = v->d; + sum_field_c += v->c; + } + } + + return 0; +} + +/* Explicitly free allocated percpu data */ +SEC("?fentry/bpf_fentry_test4") +int BPF_PROG(test_array_map_4) +{ + struct val_t __percpu_kptr *p; + struct elem *e; + int index = 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + /* delete */ + p = bpf_kptr_xchg(&e->pc, NULL); + if (p) { + bpf_percpu_obj_drop(p); + } + + return 0; +} + +SEC("?fentry.s/bpf_fentry_test1") +int BPF_PROG(test_array_map_10) +{ + struct val_t __percpu_kptr *p, *p1; + int i, index = 0; + struct val_t *v; + struct elem *e; + + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + bpf_rcu_read_lock(); + p = e->pc; + if (!p) { + p = bpf_percpu_obj_new(struct val_t); + if (!p) + goto out; + + p1 = bpf_kptr_xchg(&e->pc, p); + if (p1) { + /* race condition */ + bpf_percpu_obj_drop(p1); + } + } + + v = bpf_this_cpu_ptr(p); + v->c = 3; + v = bpf_this_cpu_ptr(p); + v->c = 0; + + v = bpf_per_cpu_ptr(p, 0); + if (!v) + goto out; + v->c = 1; + v->d = 2; + + /* delete */ + p1 = bpf_kptr_xchg(&e->pc, NULL); + if (!p1) + goto out; + + bpf_for(i, 0, nr_cpus) { + v = bpf_per_cpu_ptr(p, i); + if (v) { + if (i == 0) + cpu0_field_d = v->d; + sum_field_c += v->c; + } + } + + /* finally release p */ + bpf_percpu_obj_drop(p1); +out: + bpf_rcu_read_unlock(); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c new file mode 100644 index 000000000000..a2acf9aa6c24 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c @@ -0,0 +1,109 @@ +#include "bpf_experimental.h" + +struct val_t { + long b, c, d; +}; + +struct elem { + long sum; + struct val_t __percpu_kptr *pc; +}; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct elem); +} cgrp SEC(".maps"); + +const volatile int nr_cpus; + +/* Initialize the percpu object */ +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test_cgrp_local_storage_1) +{ + struct task_struct *task; + struct val_t __percpu_kptr *p; + struct elem *e; + + task = bpf_get_current_task_btf(); + e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!e) + return 0; + + p = bpf_percpu_obj_new(struct val_t); + if (!p) + return 0; + + p = bpf_kptr_xchg(&e->pc, p); + if (p) + bpf_percpu_obj_drop(p); + + return 0; +} + +/* Percpu data collection */ +SEC("fentry/bpf_fentry_test2") +int BPF_PROG(test_cgrp_local_storage_2) +{ + struct task_struct *task; + struct val_t __percpu_kptr *p; + struct val_t *v; + struct elem *e; + + task = bpf_get_current_task_btf(); + e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0); + if (!e) + return 0; + + p = e->pc; + if (!p) + return 0; + + v = bpf_per_cpu_ptr(p, 0); + if (!v) + return 0; + v->c = 1; + v->d = 2; + return 0; +} + +int cpu0_field_d, sum_field_c; +int my_pid; + +/* Summarize percpu data collection */ +SEC("fentry/bpf_fentry_test3") +int BPF_PROG(test_cgrp_local_storage_3) +{ + struct task_struct *task; + struct val_t __percpu_kptr *p; + struct val_t *v; + struct elem *e; + int i; + + if ((bpf_get_current_pid_tgid() >> 32) != my_pid) + return 0; + + task = bpf_get_current_task_btf(); + e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0); + if (!e) + return 0; + + p = e->pc; + if (!p) + return 0; + + bpf_for(i, 0, nr_cpus) { + v = bpf_per_cpu_ptr(p, i); + if (v) { + if (i == 0) + cpu0_field_d = v->d; + sum_field_c += v->c; + } + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c new file mode 100644 index 000000000000..1a891d30f1fe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c @@ -0,0 +1,164 @@ +#include "bpf_experimental.h" +#include "bpf_misc.h" + +struct val_t { + long b, c, d; +}; + +struct val2_t { + long b; +}; + +struct val_with_ptr_t { + char *p; +}; + +struct val_with_rb_root_t { + struct bpf_spin_lock lock; +}; + +struct elem { + long sum; + struct val_t __percpu_kptr *pc; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +long ret; + +SEC("?fentry/bpf_fentry_test1") +__failure __msg("store to referenced kptr disallowed") +int BPF_PROG(test_array_map_1) +{ + struct val_t __percpu_kptr *p; + struct elem *e; + int index = 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + p = bpf_percpu_obj_new(struct val_t); + if (!p) + return 0; + + p = bpf_kptr_xchg(&e->pc, p); + if (p) + bpf_percpu_obj_drop(p); + + e->pc = (struct val_t __percpu_kptr *)ret; + return 0; +} + +SEC("?fentry/bpf_fentry_test1") +__failure __msg("invalid kptr access, R2 type=percpu_ptr_val2_t expected=ptr_val_t") +int BPF_PROG(test_array_map_2) +{ + struct val2_t __percpu_kptr *p2; + struct val_t __percpu_kptr *p; + struct elem *e; + int index = 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + p2 = bpf_percpu_obj_new(struct val2_t); + if (!p2) + return 0; + + p = bpf_kptr_xchg(&e->pc, p2); + if (p) + bpf_percpu_obj_drop(p); + + return 0; +} + +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("R1 type=scalar expected=percpu_ptr_, percpu_rcu_ptr_, percpu_trusted_ptr_") +int BPF_PROG(test_array_map_3) +{ + struct val_t __percpu_kptr *p, *p1; + struct val_t *v; + struct elem *e; + int index = 0; + + e = bpf_map_lookup_elem(&array, &index); + if (!e) + return 0; + + p = bpf_percpu_obj_new(struct val_t); + if (!p) + return 0; + + p1 = bpf_kptr_xchg(&e->pc, p); + if (p1) + bpf_percpu_obj_drop(p1); + + v = bpf_this_cpu_ptr(p); + ret = v->b; + return 0; +} + +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("arg#0 expected for bpf_percpu_obj_drop_impl()") +int BPF_PROG(test_array_map_4) +{ + struct val_t __percpu_kptr *p; + + p = bpf_percpu_obj_new(struct val_t); + if (!p) + return 0; + + bpf_obj_drop(p); + return 0; +} + +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("arg#0 expected for bpf_obj_drop_impl()") +int BPF_PROG(test_array_map_5) +{ + struct val_t *p; + + p = bpf_obj_new(struct val_t); + if (!p) + return 0; + + bpf_percpu_obj_drop(p); + return 0; +} + +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("bpf_percpu_obj_new type ID argument must be of a struct of scalars") +int BPF_PROG(test_array_map_6) +{ + struct val_with_ptr_t __percpu_kptr *p; + + p = bpf_percpu_obj_new(struct val_with_ptr_t); + if (!p) + return 0; + + bpf_percpu_obj_drop(p); + return 0; +} + +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("bpf_percpu_obj_new type ID argument must not contain special fields") +int BPF_PROG(test_array_map_7) +{ + struct val_with_rb_root_t __percpu_kptr *p; + + p = bpf_percpu_obj_new(struct val_with_rb_root_t); + if (!p) + return 0; + + bpf_percpu_obj_drop(p); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c new file mode 100644 index 000000000000..55907ef961bf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_experimental.h" + +struct bin_data { + char data[256]; + struct bpf_spin_lock lock; +}; + +struct map_value { + struct bin_data __kptr * data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 2048); +} array SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +bool nomem_err = false; + +static int del_array(unsigned int i, int *from) +{ + struct map_value *value; + struct bin_data *old; + + value = bpf_map_lookup_elem(&array, from); + if (!value) + return 1; + + old = bpf_kptr_xchg(&value->data, NULL); + if (old) + bpf_obj_drop(old); + + (*from)++; + return 0; +} + +static int add_array(unsigned int i, int *from) +{ + struct bin_data *old, *new; + struct map_value *value; + + value = bpf_map_lookup_elem(&array, from); + if (!value) + return 1; + + new = bpf_obj_new(typeof(*new)); + if (!new) { + nomem_err = true; + return 1; + } + + old = bpf_kptr_xchg(&value->data, new); + if (old) + bpf_obj_drop(old); + + (*from)++; + return 0; +} + +static void del_then_add_array(int from) +{ + int i; + + i = from; + bpf_loop(512, del_array, &i, 0); + + i = from; + bpf_loop(512, add_array, &i, 0); +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG2(test0, int, a) +{ + del_then_add_array(0); + return 0; +} + +SEC("fentry/bpf_fentry_test2") +int BPF_PROG2(test1, int, a, u64, b) +{ + del_then_add_array(512); + return 0; +} + +SEC("fentry/bpf_fentry_test3") +int BPF_PROG2(test2, char, a, int, b, u64, c) +{ + del_then_add_array(1024); + return 0; +} + +SEC("fentry/bpf_fentry_test4") +int BPF_PROG2(test3, void *, a, char, b, int, c, u64, d) +{ + del_then_add_array(1536); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index f799d87e8700..897061930cb7 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -609,7 +609,7 @@ out: } SEC("tracepoint/syscalls/sys_enter_kill") -int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx) { struct bpf_func_stats_ctx stats_ctx; diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c new file mode 100644 index 000000000000..4dfbc8552558 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_ADDRESS[] = "\0bpf_cgroup_unix_test"; + +SEC("cgroup/recvmsg_unix") +int recvmsg_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_ADDRESS) - 1; + int ret; + + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_ADDRESS, + sizeof(SERVUN_ADDRESS) - 1); + if (ret) + return 1; + + if (sa_kern->uaddrlen != unaddrlen) + return 1; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS, + sizeof(SERVUN_ADDRESS) - 1) != 0) + return 1; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c new file mode 100644 index 000000000000..1f67e832666e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite"; + +SEC("cgroup/sendmsg_unix") +int sendmsg_unix_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx); + struct sockaddr_un *sa_kern_unaddr; + __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) + + sizeof(SERVUN_REWRITE_ADDRESS) - 1; + int ret; + + /* Rewrite destination. */ + ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1); + if (ret) + return 0; + + if (sa_kern->uaddrlen != unaddrlen) + return 0; + + sa_kern_unaddr = bpf_rdonly_cast(sa_kern->uaddr, + bpf_core_type_id_kernel(struct sockaddr_un)); + if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS, + sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c new file mode 100644 index 000000000000..8436c6729167 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Leon Hwang */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int count = 0; + +SEC("fentry/subprog_tail") +int BPF_PROG(fentry, struct sk_buff *skb) +{ + count++; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c new file mode 100644 index 000000000000..fe16412c6e6e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Leon Hwang */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +int count = 0; + +SEC("fexit/subprog_tail") +int BPF_PROG(fexit, struct sk_buff *skb) +{ + count++; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c index 67c14ba1e87b..3ddcb3777912 100644 --- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -6,7 +6,8 @@ #include <bpf/bpf_tracing.h> #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18 const volatile int skip = 0; #else const volatile int skip = 1; @@ -104,7 +105,11 @@ int _tc(volatile struct __sk_buff *skb) "%[tmp_mark] = r1" : [tmp_mark]"=r"(tmp_mark) : [ctx]"r"(skb), - [off_mark]"i"(offsetof(struct __sk_buff, mark)) + [off_mark]"i"(offsetof(struct __sk_buff, mark) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + + sizeof(skb->mark) - 1 +#endif + ) : "r1"); #else tmp_mark = (char)skb->mark; diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c new file mode 100644 index 000000000000..896c88a4960d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_uprobe.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Hengqi Chen */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +pid_t my_pid = 0; + +int test1_result = 0; +int test2_result = 0; +int test3_result = 0; +int test4_result = 0; + +SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset") +int BPF_UPROBE(test1) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test1_result = 1; + return 0; +} + +SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset@LIBURANDOM_READ_1.0.0") +int BPF_UPROBE(test2) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test2_result = 1; + return 0; +} + +SEC("uretprobe/./liburandom_read.so:urandlib_api_sameoffset@@LIBURANDOM_READ_2.0.0") +int BPF_URETPROBE(test3, int ret) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test3_result = ret; + return 0; +} + +SEC("uprobe") +int BPF_UPROBE(test4) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + test4_result = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 4b8e37f7fd06..78b23934d9f8 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -16,12 +16,12 @@ bool kprobe_called = false; bool fentry_called = false; SEC("tp/syscalls/sys_enter_nanosleep") -int handle__tp(struct trace_event_raw_sys_enter *args) +int handle__tp(struct syscall_trace_enter *args) { struct __kernel_timespec *ts; long tv_nsec; - if (args->id != __NR_nanosleep) + if (args->nr != __NR_nanosleep) return 0; ts = (void *)args->args[0]; diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 9a16d95213e1..8b946c8188c6 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -51,7 +51,7 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, struct elem); -} abs_timer SEC(".maps"); +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); __u64 bss_data; __u64 abs_data; @@ -59,6 +59,8 @@ __u64 err; __u64 ok; __u64 callback_check = 52; __u64 callback2_check = 52; +__u64 pinned_callback_check; +__s32 pinned_cpu; #define ARRAY 1 #define HTAB 2 @@ -329,3 +331,62 @@ int BPF_PROG2(test3, int, a) return 0; } + +/* callback for pinned timer */ +static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer) +{ + __s32 cpu = bpf_get_smp_processor_id(); + + if (cpu != pinned_cpu) + err |= 16384; + + pinned_callback_check++; + return 0; +} + +static void test_pinned_timer(bool soft) +{ + int key = 0; + void *map; + struct bpf_timer *timer; + __u64 flags = BPF_F_TIMER_CPU_PIN; + __u64 start_time; + + if (soft) { + map = &soft_timer_pinned; + start_time = 0; + } else { + map = &abs_timer_pinned; + start_time = bpf_ktime_get_boot_ns(); + flags |= BPF_F_TIMER_ABS; + } + + timer = bpf_map_lookup_elem(map, &key); + if (timer) { + if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0) + err |= 4096; + bpf_timer_set_callback(timer, timer_cb_pinned); + pinned_cpu = bpf_get_smp_processor_id(); + bpf_timer_start(timer, start_time + 1000, flags); + } else { + err |= 8192; + } +} + +SEC("fentry/bpf_fentry_test4") +int BPF_PROG2(test4, int, a) +{ + bpf_printk("test4"); + test_pinned_timer(true); + + return 0; +} + +SEC("fentry/bpf_fentry_test5") +int BPF_PROG2(test5, int, a) +{ + bpf_printk("test5"); + test_pinned_timer(false); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 8893094725f0..107525fb4a6a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -5,7 +5,9 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + __clang_major__ >= 18 SEC("socket") __description("BSWAP, 16") diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index 2dae5322a18e..9f202eda952f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -5,7 +5,9 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + __clang_major__ >= 18 SEC("socket") __description("gotol, small_imm") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 0c638f45aaf1..375525329637 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -5,19 +5,25 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + __clang_major__ >= 18 SEC("socket") __description("LDSX, S8") __success __success_unpriv __retval(-2) __naked void ldsx_s8(void) { - asm volatile (" \ - r1 = 0x3fe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s8 *)(r10 - 8); \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0x3fe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s8 *)(r10 - 8);" +#else + "r0 = *(s8 *)(r10 - 1);" +#endif + "exit;" + ::: __clobber_all); } SEC("socket") @@ -25,12 +31,16 @@ __description("LDSX, S16") __success __success_unpriv __retval(-2) __naked void ldsx_s16(void) { - asm volatile (" \ - r1 = 0x3fffe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s16 *)(r10 - 8); \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0x3fffe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s16 *)(r10 - 8);" +#else + "r0 = *(s16 *)(r10 - 2);" +#endif + "exit;" + ::: __clobber_all); } SEC("socket") @@ -38,35 +48,43 @@ __description("LDSX, S32") __success __success_unpriv __retval(-1) __naked void ldsx_s32(void) { - asm volatile (" \ - r1 = 0xfffffffe; \ - *(u64 *)(r10 - 8) = r1; \ - r0 = *(s32 *)(r10 - 8); \ - r0 >>= 1; \ - exit; \ -" ::: __clobber_all); + asm volatile ( + "r1 = 0xfffffffe;" + "*(u64 *)(r10 - 8) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s32 *)(r10 - 8);" +#else + "r0 = *(s32 *)(r10 - 4);" +#endif + "r0 >>= 1;" + "exit;" + ::: __clobber_all); } SEC("socket") __description("LDSX, S8 range checking, privileged") __log_level(2) __success __retval(1) -__msg("R1_w=scalar(smin=-128,smax=127)") +__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)") __naked void ldsx_s8_range_priv(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s8 *)(r10 - 8); \ - /* r1 with s8 range */ \ - if r1 s> 0x7f goto l0_%=; \ - if r1 s< -0x80 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s8 *)(r10 - 8);" +#else + "r1 = *(s8 *)(r10 - 1);" +#endif + /* r1 with s8 range */ + "if r1 s> 0x7f goto l0_%=;" + "if r1 s< -0x80 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -76,20 +94,24 @@ __description("LDSX, S16 range checking") __success __success_unpriv __retval(1) __naked void ldsx_s16_range(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s16 *)(r10 - 8); \ - /* r1 with s16 range */ \ - if r1 s> 0x7fff goto l0_%=; \ - if r1 s< -0x8000 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s16 *)(r10 - 8);" +#else + "r1 = *(s16 *)(r10 - 2);" +#endif + /* r1 with s16 range */ + "if r1 s> 0x7fff goto l0_%=;" + "if r1 s< -0x8000 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -99,20 +121,24 @@ __description("LDSX, S32 range checking") __success __success_unpriv __retval(1) __naked void ldsx_s32_range(void) { - asm volatile (" \ - call %[bpf_get_prandom_u32]; \ - *(u64 *)(r10 - 8) = r0; \ - r1 = *(s32 *)(r10 - 8); \ - /* r1 with s16 range */ \ - if r1 s> 0x7fffFFFF goto l0_%=; \ - if r1 s< -0x80000000 goto l0_%=; \ - r0 = 1; \ -l1_%=: \ - exit; \ -l0_%=: \ - r0 = 2; \ - goto l1_%=; \ -" : + asm volatile ( + "call %[bpf_get_prandom_u32];" + "*(u64 *)(r10 - 8) = r0;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r1 = *(s32 *)(r10 - 8);" +#else + "r1 = *(s32 *)(r10 - 4);" +#endif + /* r1 with s16 range */ + "if r1 s> 0x7fffFFFF goto l0_%=;" + "if r1 s< -0x80000000 goto l0_%=;" + "r0 = 1;" +"l1_%=:" + "exit;" +"l0_%=:" + "r0 = 2;" + "goto l1_%=;" + : : __imm(bpf_get_prandom_u32) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 3c8ac2c57b1b..b2a04d1179d0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -5,7 +5,9 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + __clang_major__ >= 18 SEC("socket") __description("MOV32SX, S8") diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index 0990f8825675..8fc5174808b2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -5,7 +5,9 @@ #include "bpf_misc.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + __clang_major__ >= 18 SEC("socket") __description("SDIV32, non-zero imm divisor, check 1") diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index 24369f242853..ccde6a4c6319 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -3,11 +3,12 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "xsk_xdp_metadata.h" +#include <linux/if_ether.h> +#include "xsk_xdp_common.h" struct { __uint(type, BPF_MAP_TYPE_XSKMAP); - __uint(max_entries, 1); + __uint(max_entries, 2); __uint(key_size, sizeof(int)); __uint(value_size, sizeof(int)); } xsk SEC(".maps"); @@ -52,4 +53,21 @@ SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp) return bpf_redirect_map(&xsk, 0, XDP_DROP); } +SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return XDP_DROP; + + /* Redirecting packets based on the destination MAC address */ + idx = ((unsigned int)(eth->h_dest[5])) / 2; + if (idx > MAX_SOCKETS) + return XDP_DROP; + + return bpf_redirect_map(&xsk, idx, XDP_DROP); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 0cfece7ff4f8..0ed67b6b31dd 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -509,6 +509,15 @@ def main(): source_map_types.remove('cgroup_storage_deprecated') source_map_types.add('cgroup_storage') + # The same applied to BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and + # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE which share the same enum value + # and source_map_types picks + # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED/percpu_cgroup_storage_deprecated. + # Replace 'percpu_cgroup_storage_deprecated' with 'percpu_cgroup_storage' + # so it aligns with what `bpftool map help` shows. + source_map_types.remove('percpu_cgroup_storage_deprecated') + source_map_types.add('percpu_cgroup_storage') + help_map_types = map_info.get_map_help() help_map_options = map_info.get_options() map_info.close() diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index b4edd8454934..37ffa57f28a1 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -69,7 +69,7 @@ static int tester_init(struct test_loader *tester) { if (!tester->log_buf) { tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ; - tester->log_buf = malloc(tester->log_buf_sz); + tester->log_buf = calloc(tester->log_buf_sz, 1); if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf")) return -ENOMEM; } @@ -538,7 +538,7 @@ void run_subtest(struct test_loader *tester, bool unpriv) { struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv; - struct bpf_program *tprog, *tprog_iter; + struct bpf_program *tprog = NULL, *tprog_iter; struct test_spec *spec_iter; struct cap_state caps = {}; struct bpf_object *tobj; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 4d582cac2c09..1b9387890148 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -255,7 +255,7 @@ static void print_subtest_name(int test_num, int subtest_num, const char *test_name, char *subtest_name, char *result) { - char test_num_str[TEST_NUM_WIDTH + 1]; + char test_num_str[32]; snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 77bd492c6024..2f9f6f250f17 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -417,6 +417,8 @@ int get_bpf_max_tramp_links(void); #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep" #elif defined(__aarch64__) #define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep" +#elif defined(__riscv) +#define SYS_NANOSLEEP_KPROBE_NAME "__riscv_sys_nanosleep" #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 2aa5a3445056..65aafe0003db 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -73,17 +73,33 @@ # # Run test suite for physical device in loopback mode # sudo ./test_xsk.sh -i IFACE +# +# Run test suite in a specific mode only [skb,drv,zc] +# sudo ./test_xsk.sh -m MODE +# +# List available tests +# ./test_xsk.sh -l +# +# Run a specific test from the test suite +# sudo ./test_xsk.sh -t TEST_NAME +# +# Display the available command line options +# ./test_xsk.sh -h . xsk_prereqs.sh ETH="" -while getopts "vi:d" flag +while getopts "vi:dm:lt:h" flag do case "${flag}" in v) verbose=1;; d) debug=1;; i) ETH=${OPTARG};; + m) MODE=${OPTARG};; + l) list=1;; + t) TEST=${OPTARG};; + h) help=1;; esac done @@ -131,6 +147,16 @@ setup_vethPairs() { ip link set ${VETH0} up } +if [[ $list -eq 1 ]]; then + ./${XSKOBJ} -l + exit +fi + +if [[ $help -eq 1 ]]; then + ./${XSKOBJ} + exit +fi + if [ ! -z $ETH ]; then VETH0=${ETH} VETH1=${ETH} @@ -153,6 +179,14 @@ if [[ $verbose -eq 1 ]]; then ARGS+="-v " fi +if [ -n "$MODE" ]; then + ARGS+="-m ${MODE} " +fi + +if [ -n "$TEST" ]; then + ARGS+="-t ${TEST} " +fi + retval=$? test_status $retval "${TEST_NAME}" @@ -175,6 +209,10 @@ else cleanup_iface ${ETH} ${MTU} fi +if [[ $list -eq 1 ]]; then + exit +fi + TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL" busy_poll=1 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index f83d9f65c65b..4faa898ff7fc 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -7,6 +7,7 @@ #include <errno.h> #include <fcntl.h> #include <poll.h> +#include <pthread.h> #include <unistd.h> #include <linux/perf_event.h> #include <sys/mman.h> @@ -14,104 +15,165 @@ #include <linux/limits.h> #include <libelf.h> #include <gelf.h> +#include "bpf/libbpf_internal.h" #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" -#define MAX_SYMS 400000 -static struct ksym syms[MAX_SYMS]; -static int sym_cnt; +struct ksyms { + struct ksym *syms; + size_t sym_cap; + size_t sym_cnt; +}; + +static struct ksyms *ksyms; +static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER; + +static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, + unsigned long addr) +{ + void *tmp; + + tmp = strdup(name); + if (!tmp) + return -ENOMEM; + ksyms->syms[ksyms->sym_cnt].addr = addr; + ksyms->syms[ksyms->sym_cnt].name = tmp; + ksyms->sym_cnt++; + return 0; +} + +void free_kallsyms_local(struct ksyms *ksyms) +{ + unsigned int i; + + if (!ksyms) + return; + + if (!ksyms->syms) { + free(ksyms); + return; + } + + for (i = 0; i < ksyms->sym_cnt; i++) + free(ksyms->syms[i].name); + free(ksyms->syms); + free(ksyms); +} static int ksym_cmp(const void *p1, const void *p2) { return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; } -int load_kallsyms_refresh(void) +struct ksyms *load_kallsyms_local(void) { FILE *f; char func[256], buf[256]; char symbol; void *addr; - int i = 0; - - sym_cnt = 0; + int ret; + struct ksyms *ksyms; f = fopen("/proc/kallsyms", "r"); if (!f) - return -ENOENT; + return NULL; + + ksyms = calloc(1, sizeof(struct ksyms)); + if (!ksyms) { + fclose(f); + return NULL; + } while (fgets(buf, sizeof(buf), f)) { if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) break; if (!addr) continue; - if (i >= MAX_SYMS) - return -EFBIG; - syms[i].addr = (long) addr; - syms[i].name = strdup(func); - i++; + ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap, + sizeof(struct ksym), ksyms->sym_cnt + 1); + if (ret) + goto error; + ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr); + if (ret) + goto error; } fclose(f); - sym_cnt = i; - qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); - return 0; + qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp); + return ksyms; + +error: + fclose(f); + free_kallsyms_local(ksyms); + return NULL; } int load_kallsyms(void) { - /* - * This is called/used from multiplace places, - * load symbols just once. - */ - if (sym_cnt) - return 0; - return load_kallsyms_refresh(); + pthread_mutex_lock(&ksyms_mutex); + if (!ksyms) + ksyms = load_kallsyms_local(); + pthread_mutex_unlock(&ksyms_mutex); + return ksyms ? 0 : 1; } -struct ksym *ksym_search(long key) +struct ksym *ksym_search_local(struct ksyms *ksyms, long key) { - int start = 0, end = sym_cnt; + int start = 0, end = ksyms->sym_cnt; int result; /* kallsyms not loaded. return NULL */ - if (sym_cnt <= 0) + if (ksyms->sym_cnt <= 0) return NULL; while (start < end) { size_t mid = start + (end - start) / 2; - result = key - syms[mid].addr; + result = key - ksyms->syms[mid].addr; if (result < 0) end = mid; else if (result > 0) start = mid + 1; else - return &syms[mid]; + return &ksyms->syms[mid]; } - if (start >= 1 && syms[start - 1].addr < key && - key < syms[start].addr) + if (start >= 1 && ksyms->syms[start - 1].addr < key && + key < ksyms->syms[start].addr) /* valid ksym */ - return &syms[start - 1]; + return &ksyms->syms[start - 1]; /* out of range. return _stext */ - return &syms[0]; + return &ksyms->syms[0]; } -long ksym_get_addr(const char *name) +struct ksym *ksym_search(long key) +{ + if (!ksyms) + return NULL; + return ksym_search_local(ksyms, key); +} + +long ksym_get_addr_local(struct ksyms *ksyms, const char *name) { int i; - for (i = 0; i < sym_cnt; i++) { - if (strcmp(syms[i].name, name) == 0) - return syms[i].addr; + for (i = 0; i < ksyms->sym_cnt; i++) { + if (strcmp(ksyms->syms[i].name, name) == 0) + return ksyms->syms[i].addr; } return 0; } +long ksym_get_addr(const char *name) +{ + if (!ksyms) + return 0; + return ksym_get_addr_local(ksyms, name); +} + /* open kallsyms and read symbol addresses on the fly. Without caching all symbols, * this is faster than load + find. */ diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 876f3e711df6..04fd1da7079d 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -11,13 +11,17 @@ struct ksym { long addr; char *name; }; +struct ksyms; int load_kallsyms(void); -int load_kallsyms_refresh(void); - struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); +struct ksyms *load_kallsyms_local(void); +struct ksym *ksym_search_local(struct ksyms *ksyms, long key); +long ksym_get_addr_local(struct ksyms *ksyms, const char *name); +void free_kallsyms_local(struct ksyms *ksyms); + /* open kallsyms and find addresses on the fly, faster than load + search. */ int kallsyms_find(const char *sym, unsigned long long *addr); diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index e92644d0fa75..4ed795655b9f 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -11,6 +11,9 @@ #define _SDT_HAS_SEMAPHORES 1 #include "sdt.h" +#define SHARED 1 +#include "bpf/libbpf_internal.h" + #define SEC(name) __attribute__((section(name), used)) #define BUF_SIZE 256 @@ -21,10 +24,14 @@ void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz); void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz); void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz); +int urandlib_api(void); +COMPAT_VERSION(urandlib_api_old, urandlib_api, LIBURANDOM_READ_1.0.0) +int urandlib_api_old(void); +int urandlib_api_sameoffset(void); + unsigned short urand_read_with_sema_semaphore SEC(".probes"); -static __attribute__((noinline)) -void urandom_read(int fd, int count) +static noinline void urandom_read(int fd, int count) { char buf[BUF_SIZE]; int i; @@ -83,6 +90,10 @@ int main(int argc, char *argv[]) urandom_read(fd, count); + urandlib_api(); + urandlib_api_old(); + urandlib_api_sameoffset(); + close(fd); return 0; } diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c index 86186e24b740..8c1356d8b4ee 100644 --- a/tools/testing/selftests/bpf/urandom_read_lib1.c +++ b/tools/testing/selftests/bpf/urandom_read_lib1.c @@ -3,6 +3,9 @@ #define _SDT_HAS_SEMAPHORES 1 #include "sdt.h" +#define SHARED 1 +#include "bpf/libbpf_internal.h" + #define SEC(name) __attribute__((section(name), used)) unsigned short urandlib_read_with_sema_semaphore SEC(".probes"); @@ -11,3 +14,22 @@ void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz) { STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz); } + +COMPAT_VERSION(urandlib_api_v1, urandlib_api, LIBURANDOM_READ_1.0.0) +int urandlib_api_v1(void) +{ + return 1; +} + +DEFAULT_VERSION(urandlib_api_v2, urandlib_api, LIBURANDOM_READ_2.0.0) +int urandlib_api_v2(void) +{ + return 2; +} + +COMPAT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_1.0.0) +DEFAULT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_2.0.0) +int urandlib_api_sameoffset(void) +{ + return 3; +} diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c index b449788fbd39..595c79141cf3 100644 --- a/tools/testing/selftests/bpf/xdp_features.c +++ b/tools/testing/selftests/bpf/xdp_features.c @@ -360,9 +360,9 @@ static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val, static int dut_run(struct xdp_features *skel) { int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE; - int state, err, *sockfd, ctrl_sockfd, echo_sockfd; + int state, err = 0, *sockfd, ctrl_sockfd, echo_sockfd; struct sockaddr_storage ctrl_addr; - pthread_t dut_thread; + pthread_t dut_thread = 0; socklen_t addrlen; sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL, diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 613321eb84c1..17c980138796 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -234,7 +234,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t struct pollfd fds[rxq + 1]; __u64 comp_addr; __u64 addr; - __u32 idx; + __u32 idx = 0; int ret; int i; diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index d9fb2b730a2c..e574711eeb84 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -442,10 +442,9 @@ void xsk_clear_xskmap(struct bpf_map *map) bpf_map_delete_elem(map_fd, &index); } -int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk) +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index) { int map_fd, sock_fd; - u32 index = 0; map_fd = bpf_map__fd(map); sock_fd = xsk_socket__fd(xsk); diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index d93200fdaa8d..771570bc3731 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -204,7 +204,7 @@ struct xsk_umem_config { int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags); void xsk_detach_xdp_program(int ifindex, u32 xdp_flags); -int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk); +int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index); void xsk_clear_xskmap(struct bpf_map *map); bool xsk_is_in_mode(u32 ifindex, int mode); diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index 29175682c44d..47c7b8064f38 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -83,9 +83,11 @@ exec_xskxceiver() fi ./${XSKOBJ} -i ${VETH0} -i ${VETH1} ${ARGS} - retval=$? - test_status $retval "${TEST_NAME}" - statusList+=($retval) - nameList+=(${TEST_NAME}) + + if [[ $list -ne 1 ]]; then + test_status $retval "${TEST_NAME}" + statusList+=($retval) + nameList+=(${TEST_NAME}) + fi } diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h new file mode 100644 index 000000000000..5a6f36f07383 --- /dev/null +++ b/tools/testing/selftests/bpf/xsk_xdp_common.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef XSK_XDP_COMMON_H_ +#define XSK_XDP_COMMON_H_ + +#define MAX_SOCKETS 2 + +struct xdp_info { + __u64 count; +} __attribute__((aligned(32))); + +#endif /* XSK_XDP_COMMON_H_ */ diff --git a/tools/testing/selftests/bpf/xsk_xdp_metadata.h b/tools/testing/selftests/bpf/xsk_xdp_metadata.h deleted file mode 100644 index 943133da378a..000000000000 --- a/tools/testing/selftests/bpf/xsk_xdp_metadata.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -struct xdp_info { - __u64 count; -} __attribute__((aligned(32))); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 2827f2d7cf30..591ca9637b23 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -80,6 +80,7 @@ #include <linux/if_ether.h> #include <linux/mman.h> #include <linux/netdev.h> +#include <linux/bitmap.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> @@ -102,10 +103,12 @@ #include <bpf/bpf.h> #include <linux/filter.h> #include "../kselftest.h" -#include "xsk_xdp_metadata.h" +#include "xsk_xdp_common.h" -static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; -static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; +static bool opt_verbose; +static bool opt_print_tests; +static enum test_mode opt_mode = TEST_MODE_ALL; +static u32 opt_run_test = RUN_ALL_TESTS; static void __exit_with_error(int error, const char *file, const char *func, int line) { @@ -154,10 +157,10 @@ static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) ptr[i] = htonl(pkt_nb << 16 | (i + start)); } -static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) +static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr) { - memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); - memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); + memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN); eth_hdr->h_proto = htons(ETH_P_LOOPBACK); } @@ -255,7 +258,7 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i cfg.bind_flags = ifobject->bind_flags; if (shared) cfg.bind_flags |= XDP_SHARED_UMEM; - if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE) + if (ifobject->mtu > MAX_ETH_PKT_SIZE) cfg.bind_flags |= XDP_USE_SG; txr = ifobject->tx_on ? &xsk->tx : NULL; @@ -310,19 +313,28 @@ static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"busy-poll", no_argument, 0, 'b'}, {"verbose", no_argument, 0, 'v'}, + {"mode", required_argument, 0, 'm'}, + {"list", no_argument, 0, 'l'}, + {"test", required_argument, 0, 't'}, + {"help", no_argument, 0, 'h'}, {0, 0, 0, 0} }; -static void usage(const char *prog) +static void print_usage(char **argv) { const char *str = - " Usage: %s [OPTIONS]\n" + " Usage: xskxceiver [OPTIONS]\n" " Options:\n" " -i, --interface Use interface\n" " -v, --verbose Verbose output\n" - " -b, --busy-poll Enable busy poll\n"; + " -b, --busy-poll Enable busy poll\n" + " -m, --mode Run only mode skb, drv, or zc\n" + " -l, --list List all available tests\n" + " -t, --test Run a specific test. Enter number from -l option.\n" + " -h, --help Display this help and exit\n"; - ksft_print_msg(str, prog); + ksft_print_msg(str, basename(argv[0])); + ksft_exit_xfail(); } static bool validate_interface(struct ifobject *ifobj) @@ -342,7 +354,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:vb", long_options, &option_index); + c = getopt_long(argc, argv, "i:vbm:lt:", long_options, &option_index); if (c == -1) break; @@ -371,9 +383,28 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj ifobj_tx->busy_poll = true; ifobj_rx->busy_poll = true; break; + case 'm': + if (!strncmp("skb", optarg, strlen(optarg))) + opt_mode = TEST_MODE_SKB; + else if (!strncmp("drv", optarg, strlen(optarg))) + opt_mode = TEST_MODE_DRV; + else if (!strncmp("zc", optarg, strlen(optarg))) + opt_mode = TEST_MODE_ZC; + else + print_usage(argv); + break; + case 'l': + opt_print_tests = true; + break; + case 't': + errno = 0; + opt_run_test = strtol(optarg, NULL, 0); + if (errno) + print_usage(argv); + break; + case 'h': default: - usage(basename(argv[0])); - ksft_exit_xfail(); + print_usage(argv); } } } @@ -396,11 +427,9 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, if (i == 0) { ifobj->rx_on = false; ifobj->tx_on = true; - ifobj->pkt_stream = test->tx_pkt_stream_default; } else { ifobj->rx_on = true; ifobj->tx_on = false; - ifobj->pkt_stream = test->rx_pkt_stream_default; } memset(ifobj->umem, 0, sizeof(*ifobj->umem)); @@ -410,6 +439,15 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, for (j = 0; j < MAX_SOCKETS; j++) { memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + if (i == 0) + ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default; + else + ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default; + + memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN); + memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN); + ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0); + ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1); } } @@ -427,7 +465,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, } static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, - struct ifobject *ifobj_rx, enum test_mode mode) + struct ifobject *ifobj_rx, enum test_mode mode, + const struct test_spec *test_to_run) { struct pkt_stream *tx_pkt_stream; struct pkt_stream *rx_pkt_stream; @@ -449,6 +488,8 @@ static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, ifobj->bind_flags |= XDP_COPY; } + strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE); + test->test_func = test_to_run->test_func; test->mode = mode; __test_spec_init(test, ifobj_tx, ifobj_rx); } @@ -458,11 +499,6 @@ static void test_spec_reset(struct test_spec *test) __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); } -static void test_spec_set_name(struct test_spec *test, const char *name) -{ - strncpy(test->name, name, MAX_TEST_NAME_SIZE); -} - static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx, struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx, struct bpf_map *xskmap_tx) @@ -495,8 +531,10 @@ static int test_spec_set_mtu(struct test_spec *test, int mtu) static void pkt_stream_reset(struct pkt_stream *pkt_stream) { - if (pkt_stream) + if (pkt_stream) { pkt_stream->current_pkt_nb = 0; + pkt_stream->nb_rx_pkts = 0; + } } static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) @@ -526,17 +564,17 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream) static void pkt_stream_restore_default(struct test_spec *test) { - struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream; - struct pkt_stream *rx_pkt_stream = test->ifobj_rx->pkt_stream; + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream; + struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream; if (tx_pkt_stream != test->tx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_tx->pkt_stream); - test->ifobj_tx->pkt_stream = test->tx_pkt_stream_default; + pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream); + test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default; } if (rx_pkt_stream != test->rx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_rx->pkt_stream); - test->ifobj_rx->pkt_stream = test->rx_pkt_stream_default; + pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); + test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default; } } @@ -596,14 +634,16 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk return nb_frags; } -static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) +static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) { pkt->offset = offset; pkt->len = len; - if (len > MAX_ETH_JUMBO_SIZE) + if (len > MAX_ETH_JUMBO_SIZE) { pkt->valid = false; - else + } else { pkt->valid = true; + pkt_stream->nb_valid_entries++; + } } static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) @@ -611,7 +651,7 @@ static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) return ceil_u32(len, umem->frame_size) * umem->frame_size; } -static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) +static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off) { struct pkt_stream *pkt_stream; u32 i; @@ -625,41 +665,45 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb for (i = 0; i < nb_pkts; i++) { struct pkt *pkt = &pkt_stream->pkts[i]; - pkt_set(umem, pkt, 0, pkt_len); - pkt->pkt_nb = i; + pkt_set(pkt_stream, pkt, 0, pkt_len); + pkt->pkt_nb = nb_start + i * nb_off; } return pkt_stream; } -static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem, - struct pkt_stream *pkt_stream) +static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) { - return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len); + return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1); +} + +static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) +{ + return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); } static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; - pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len); - test->ifobj_tx->pkt_stream = pkt_stream; - pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, nb_pkts, pkt_len); - test->ifobj_rx->pkt_stream = pkt_stream; + pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + test->ifobj_tx->xsk->pkt_stream = pkt_stream; + pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + test->ifobj_rx->xsk->pkt_stream = pkt_stream; } static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, int offset) { - struct xsk_umem_info *umem = ifobj->umem; struct pkt_stream *pkt_stream; u32 i; - pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); - for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) - pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); + pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); + for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) + pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); - ifobj->pkt_stream = pkt_stream; + ifobj->xsk->pkt_stream = pkt_stream; + pkt_stream->nb_valid_entries /= 2; } static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) @@ -670,15 +714,34 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off static void pkt_stream_receive_half(struct test_spec *test) { - struct xsk_umem_info *umem = test->ifobj_rx->umem; - struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream; + struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream; u32 i; - test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts, - pkt_stream->pkts[0].len); - pkt_stream = test->ifobj_rx->pkt_stream; + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + pkt_stream = test->ifobj_rx->xsk->pkt_stream; for (i = 1; i < pkt_stream->nb_pkts; i += 2) pkt_stream->pkts[i].valid = false; + + pkt_stream->nb_valid_entries /= 2; +} + +static void pkt_stream_even_odd_sequence(struct test_spec *test) +{ + struct pkt_stream *pkt_stream; + u32 i; + + for (i = 0; i < test->nb_sockets; i++) { + pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream; + + pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream; + } } static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) @@ -693,16 +756,16 @@ static void pkt_stream_cancel(struct pkt_stream *pkt_stream) pkt_stream->current_pkt_nb--; } -static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, - u32 bytes_written) +static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len, + u32 pkt_nb, u32 bytes_written) { - void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); + void *data = xsk_umem__get_data(umem->buffer, addr); if (len < MIN_PKT_SIZE) return; if (!bytes_written) { - gen_eth_hdr(ifobject, data); + gen_eth_hdr(xsk, data); len -= PKT_HDR_SIZE; data += PKT_HDR_SIZE; @@ -747,8 +810,15 @@ static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, s len = 0; } + print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n", + pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb); + if (pkt->valid && pkt->len > pkt_stream->max_pkt_len) pkt_stream->max_pkt_len = pkt->len; + + if (pkt->valid) + pkt_stream->nb_valid_entries++; + pkt_nb++; } @@ -762,10 +832,10 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, struct pkt_stream *pkt_stream; pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true); - test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_tx->xsk->pkt_stream = pkt_stream; pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false); - test->ifobj_rx->pkt_stream = pkt_stream; + test->ifobj_rx->xsk->pkt_stream = pkt_stream; } static void pkt_print_data(u32 *data, u32 cnt) @@ -777,7 +847,7 @@ static void pkt_print_data(u32 *data, u32 cnt) seqnum = ntohl(*data) & 0xffff; pkt_nb = ntohl(*data) >> 16; - fprintf(stdout, "%u:%u ", pkt_nb, seqnum); + ksft_print_msg("%u:%u ", pkt_nb, seqnum); data++; } } @@ -789,13 +859,13 @@ static void pkt_dump(void *pkt, u32 len, bool eth_header) if (eth_header) { /*extract L2 frame */ - fprintf(stdout, "DEBUG>> L2: dst mac: "); + ksft_print_msg("DEBUG>> L2: dst mac: "); for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_dest[i]); + ksft_print_msg("%02X", ethhdr->h_dest[i]); - fprintf(stdout, "\nDEBUG>> L2: src mac: "); + ksft_print_msg("\nDEBUG>> L2: src mac: "); for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_source[i]); + ksft_print_msg("%02X", ethhdr->h_source[i]); data = pkt + PKT_HDR_SIZE; } else { @@ -803,15 +873,15 @@ static void pkt_dump(void *pkt, u32 len, bool eth_header) } /*extract L5 frame */ - fprintf(stdout, "\nDEBUG>> L5: seqnum: "); + ksft_print_msg("\nDEBUG>> L5: seqnum: "); pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); - fprintf(stdout, "...."); + ksft_print_msg("...."); if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { - fprintf(stdout, "\n.... "); + ksft_print_msg("\n.... "); pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, PKT_DUMP_NB_TO_PRINT); } - fprintf(stdout, "\n---------------------------------------\n"); + ksft_print_msg("\n---------------------------------------\n"); } static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) @@ -916,36 +986,42 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) return true; } -static void kick_tx(struct xsk_socket_info *xsk) +static int kick_tx(struct xsk_socket_info *xsk) { int ret; ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); if (ret >= 0) - return; + return TEST_PASS; if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { usleep(100); - return; + return TEST_PASS; } - exit_with_error(errno); + return TEST_FAILURE; } -static void kick_rx(struct xsk_socket_info *xsk) +static int kick_rx(struct xsk_socket_info *xsk) { int ret; ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); if (ret < 0) - exit_with_error(errno); + return TEST_FAILURE; + + return TEST_PASS; } static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) { unsigned int rcvd; u32 idx; + int ret; - if (xsk_ring_prod__needs_wakeup(&xsk->tx)) - kick_tx(xsk); + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { + ret = kick_tx(xsk); + if (ret) + return TEST_FAILURE; + } rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd) { @@ -964,153 +1040,207 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) return TEST_PASS; } -static int receive_pkts(struct test_spec *test, struct pollfd *fds) +static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk) { - struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; - struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream; - struct xsk_socket_info *xsk = test->ifobj_rx->xsk; + u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; + struct pkt_stream *pkt_stream = xsk->pkt_stream; struct ifobject *ifobj = test->ifobj_rx; struct xsk_umem_info *umem = xsk->umem; + struct pollfd fds = { }; struct pkt *pkt; + u64 first_addr = 0; int ret; - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - timeradd(&tv_now, &tv_timeout, &tv_end); + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLIN; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - while (pkt) { - u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; - u64 first_addr; + ret = kick_rx(xsk); + if (ret) + return TEST_FAILURE; - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - if (timercmp(&tv_now, &tv_end, >)) { - ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + if (ifobj->use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) return TEST_FAILURE; - } - - kick_rx(xsk); - if (ifobj->use_poll) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - exit_with_error(errno); - if (!ret) { - if (!is_umem_valid(test->ifobj_tx)) - return TEST_PASS; - - ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); - return TEST_FAILURE; - } + if (!ret) { + if (!is_umem_valid(test->ifobj_tx)) + return TEST_PASS; - if (!(fds->revents & POLLIN)) - continue; + ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); + return TEST_CONTINUE; } - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) - continue; + if (!(fds.revents & POLLIN)) + return TEST_CONTINUE; + } - if (ifobj->use_fill_ring) { - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - while (ret != rcvd) { + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) + return TEST_CONTINUE; + + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(&fds, 1, POLL_TMOUT); if (ret < 0) - exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - exit_with_error(errno); - } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + return TEST_FAILURE; } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } + } - while (frags_processed < rcvd) { - const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); - u64 addr = desc->addr, orig; + while (frags_processed < rcvd) { + const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + u64 addr = desc->addr, orig; - orig = xsk_umem__extract_addr(addr); - addr = xsk_umem__add_offset_to_addr(addr); + orig = xsk_umem__extract_addr(addr); + addr = xsk_umem__add_offset_to_addr(addr); + if (!nb_frags) { + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); if (!pkt) { ksft_print_msg("[%s] received too many packets addr: %lx len %u\n", __func__, addr, desc->len); return TEST_FAILURE; } + } - if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || - !is_offset_correct(umem, pkt, addr) || - (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) - return TEST_FAILURE; + print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", + addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); - if (!nb_frags++) - first_addr = addr; - frags_processed++; - pkt_len += desc->len; - if (ifobj->use_fill_ring) - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && + !is_metadata_correct(pkt, umem->buffer, addr))) + return TEST_FAILURE; - if (pkt_continues(desc->options)) - continue; + if (!nb_frags++) + first_addr = addr; + frags_processed++; + pkt_len += desc->len; + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - /* The complete packet has been received */ - if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || - !is_offset_correct(umem, pkt, addr)) - return TEST_FAILURE; + if (pkt_continues(desc->options)) + continue; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - nb_frags = 0; - pkt_len = 0; - } + /* The complete packet has been received */ + if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || + !is_offset_correct(umem, pkt, addr)) + return TEST_FAILURE; - if (nb_frags) { - /* In the middle of a packet. Start over from beginning of packet. */ - idx_rx -= nb_frags; - xsk_ring_cons__cancel(&xsk->rx, nb_frags); - if (ifobj->use_fill_ring) { - idx_fq -= nb_frags; - xsk_ring_prod__cancel(&umem->fq, nb_frags); - } - frags_processed -= nb_frags; + pkt_stream->nb_rx_pkts++; + nb_frags = 0; + pkt_len = 0; + } + + if (nb_frags) { + /* In the middle of a packet. Start over from beginning of packet. */ + idx_rx -= nb_frags; + xsk_ring_cons__cancel(&xsk->rx, nb_frags); + if (ifobj->use_fill_ring) { + idx_fq -= nb_frags; + xsk_ring_prod__cancel(&umem->fq, nb_frags); } + frags_processed -= nb_frags; + } - if (ifobj->use_fill_ring) - xsk_ring_prod__submit(&umem->fq, frags_processed); - if (ifobj->release_rx) - xsk_ring_cons__release(&xsk->rx, frags_processed); + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, frags_processed); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, frags_processed); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= pkts_sent; + pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; + +return TEST_CONTINUE; +} + +bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num, + unsigned long *bitmap) +{ + struct pkt_stream *pkt_stream = xsk->pkt_stream; + + if (!pkt_stream) { + __set_bit(sock_num, bitmap); + return false; + } + + if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) { + __set_bit(sock_num, bitmap); + if (bitmap_full(bitmap, test->nb_sockets)) + return true; + } + + return false; +} + +static int receive_pkts(struct test_spec *test) +{ + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + DECLARE_BITMAP(bitmap, test->nb_sockets); + struct xsk_socket_info *xsk; + u32 sock_num = 0; + int res, ret; - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= pkts_sent; - pthread_mutex_unlock(&pacing_mutex); - pkts_sent = 0; + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (1) { + xsk = &test->ifobj_rx->xsk_arr[sock_num]; + + if ((all_packets_received(test, xsk, sock_num, bitmap))) + break; + + res = __receive_pkts(test, xsk); + if (!(res == TEST_PASS || res == TEST_CONTINUE)) + return res; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + sock_num = (sock_num + 1) % test->nb_sockets; } return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) +static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout) { u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; - struct pkt_stream *pkt_stream = ifobject->pkt_stream; - struct xsk_socket_info *xsk = ifobject->xsk; + struct pkt_stream *pkt_stream = xsk->pkt_stream; struct xsk_umem_info *umem = ifobject->umem; bool use_poll = ifobject->use_poll; + struct pollfd fds = { }; int ret; buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); /* pkts_in_flight might be negative if many invalid packets are sent */ if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { - kick_tx(xsk); + ret = kick_tx(xsk); + if (ret) + return TEST_FAILURE; return TEST_CONTINUE; } + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLOUT; + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { - ret = poll(fds, 1, POLL_TMOUT); + ret = poll(&fds, 1, POLL_TMOUT); if (timeout) { if (ret < 0) { ksft_print_msg("ERROR: [%s] Poll error %d\n", @@ -1161,10 +1291,13 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo tx_desc->options = 0; } if (pkt->valid) - pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, + pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb, bytes_written); bytes_written += tx_desc->len; + print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n", + tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb); + if (nb_frags_left) { i++; if (pkt_stream->verbatim) @@ -1186,7 +1319,7 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo xsk->outstanding_tx += valid_frags; if (use_poll) { - ret = poll(fds, 1, POLL_TMOUT); + ret = poll(&fds, 1, POLL_TMOUT); if (ret <= 0) { if (ret == 0 && timeout) return TEST_PASS; @@ -1207,33 +1340,67 @@ static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeo return TEST_CONTINUE; } -static void wait_for_tx_completion(struct xsk_socket_info *xsk) +static int wait_for_tx_completion(struct xsk_socket_info *xsk) { - while (xsk->outstanding_tx) + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + int ret; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (xsk->outstanding_tx) { + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__); + return TEST_FAILURE; + } + complete_pkts(xsk, BATCH_SIZE); + } + + return TEST_PASS; +} + +bool all_packets_sent(struct test_spec *test, unsigned long *bitmap) +{ + return bitmap_full(bitmap, test->nb_sockets); } static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { - struct pkt_stream *pkt_stream = ifobject->pkt_stream; bool timeout = !is_umem_valid(test->ifobj_rx); - struct pollfd fds = { }; - u32 ret; + DECLARE_BITMAP(bitmap, test->nb_sockets); + u32 i, ret; - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLOUT; + while (!(all_packets_sent(test, bitmap))) { + for (i = 0; i < test->nb_sockets; i++) { + struct pkt_stream *pkt_stream; - while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { - ret = __send_pkts(ifobject, &fds, timeout); - if (ret == TEST_CONTINUE && !test->fail) - continue; - if ((ret || test->fail) && !timeout) - return TEST_FAILURE; - if (ret == TEST_PASS && timeout) - return ret; + pkt_stream = ifobject->xsk_arr[i].pkt_stream; + if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) { + __set_bit(i, bitmap); + continue; + } + ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; + + if ((ret || test->fail) && !timeout) + return TEST_FAILURE; + + if (ret == TEST_PASS && timeout) + return ret; + + ret = wait_for_tx_completion(&ifobject->xsk_arr[i]); + if (ret) + return TEST_FAILURE; + } } - wait_for_tx_completion(ifobject->xsk); return TEST_PASS; } @@ -1266,7 +1433,9 @@ static int validate_rx_dropped(struct ifobject *ifobject) struct xdp_statistics stats; int err; - kick_rx(ifobject->xsk); + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; err = get_xsk_stats(xsk, &stats); if (err) @@ -1278,8 +1447,8 @@ static int validate_rx_dropped(struct ifobject *ifobject) * packet being invalid). Since the last packet may or may not have * been dropped already, both outcomes must be allowed. */ - if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 || - stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2 - 1) + if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 || + stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1) return TEST_PASS; return TEST_FAILURE; @@ -1292,7 +1461,9 @@ static int validate_rx_full(struct ifobject *ifobject) int err; usleep(1000); - kick_rx(ifobject->xsk); + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; err = get_xsk_stats(xsk, &stats); if (err) @@ -1311,7 +1482,9 @@ static int validate_fill_empty(struct ifobject *ifobject) int err; usleep(1000); - kick_rx(ifobject->xsk); + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; err = get_xsk_stats(xsk, &stats); if (err) @@ -1339,9 +1512,10 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) return TEST_FAILURE; } - if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) { + if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + __func__, stats.tx_invalid_descs, + ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; } @@ -1433,6 +1607,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) LIBBPF_OPTS(bpf_xdp_query_opts, opts); void *bufs; int ret; + u32 i; if (ifobject->umem->unaligned_mode) mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB; @@ -1455,11 +1630,14 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifobject->rx_on) return; - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); + xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring); - ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); - if (ret) - exit_with_error(errno); + for (i = 0; i < test->nb_sockets; i++) { + ifobject->xsk = &ifobject->xsk_arr[i]; + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i); + if (ret) + exit_with_error(errno); + } } static void *worker_testapp_validate_tx(void *arg) @@ -1475,8 +1653,6 @@ static void *worker_testapp_validate_tx(void *arg) thread_common_ops_tx(test, ifobject); } - print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts, - ifobject->ifname); err = send_pkts(test, ifobject); if (!err && ifobject->validation_func) @@ -1491,26 +1667,23 @@ static void *worker_testapp_validate_rx(void *arg) { struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_rx; - struct pollfd fds = { }; int err; if (test->current_step == 1) { thread_common_ops(test, ifobject); } else { xsk_clear_xskmap(ifobject->xskmap); - err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); + err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0); if (err) { - printf("Error: Failed to update xskmap, error %s\n", strerror(-err)); + ksft_print_msg("Error: Failed to update xskmap, error %s\n", + strerror(-err)); exit_with_error(-err); } } - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLIN; - pthread_barrier_wait(&barr); - err = receive_pkts(test, &fds); + err = receive_pkts(test); if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); @@ -1564,7 +1737,7 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode)); err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode)); if (err) { - printf("Error attaching XDP program\n"); + ksft_print_msg("Error attaching XDP program\n"); exit_with_error(-err); } @@ -1619,11 +1792,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i if (ifobj2) { if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); - pkt_stream_reset(ifobj2->pkt_stream); + pkt_stream_reset(ifobj2->xsk->pkt_stream); } test->current_step++; - pkt_stream_reset(ifobj1->pkt_stream); + pkt_stream_reset(ifobj1->xsk->pkt_stream); pkts_in_flight = 0; signal(SIGUSR1, handler); @@ -1647,9 +1820,15 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i pthread_join(t0, NULL); if (test->total_steps == test->current_step || test->fail) { + u32 i; + if (ifobj2) - xsk_socket__delete(ifobj2->xsk->xsk); - xsk_socket__delete(ifobj1->xsk->xsk); + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj2->xsk_arr[i].xsk); + + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj1->xsk_arr[i].xsk); + testapp_clean_xsk_umem(ifobj1); if (ifobj2 && !ifobj2->shared_umem) testapp_clean_xsk_umem(ifobj2); @@ -1682,7 +1861,6 @@ static int testapp_teardown(struct test_spec *test) { int i; - test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { if (testapp_validate_traffic(test)) return TEST_FAILURE; @@ -1704,18 +1882,17 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) *ifobj2 = tmp_ifobj; } -static int testapp_bidi(struct test_spec *test) +static int testapp_bidirectional(struct test_spec *test) { int res; - test_spec_set_name(test, "BIDIRECTIONAL"); test->ifobj_tx->rx_on = true; test->ifobj_rx->tx_on = true; test->total_steps = 2; if (testapp_validate_traffic(test)) return TEST_FAILURE; - print_verbose("Switching Tx/Rx vectors\n"); + print_verbose("Switching Tx/Rx direction\n"); swap_directions(&test->ifobj_rx, &test->ifobj_tx); res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); @@ -1723,42 +1900,44 @@ static int testapp_bidi(struct test_spec *test) return res; } -static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) +static int swap_xsk_resources(struct test_spec *test) { int ret; - xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_socket__delete(ifobj_rx->xsk->xsk); - ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; - ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; + test->ifobj_tx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_rx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default; + test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default; + test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1]; + test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1]; - ret = xsk_update_xskmap(ifobj_rx->xskmap, ifobj_rx->xsk->xsk); + ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0); if (ret) - exit_with_error(errno); + return TEST_FAILURE; + + return TEST_PASS; } -static int testapp_bpf_res(struct test_spec *test) +static int testapp_xdp_prog_cleanup(struct test_spec *test) { - test_spec_set_name(test, "BPF_RES"); test->total_steps = 2; test->nb_sockets = 2; if (testapp_validate_traffic(test)) return TEST_FAILURE; - swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); + if (swap_xsk_resources(test)) + return TEST_FAILURE; return testapp_validate_traffic(test); } static int testapp_headroom(struct test_spec *test) { - test_spec_set_name(test, "UMEM_HEADROOM"); test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; return testapp_validate_traffic(test); } static int testapp_stats_rx_dropped(struct test_spec *test) { - test_spec_set_name(test, "STAT_RX_DROPPED"); if (test->mode == TEST_MODE_ZC) { ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); return TEST_SKIP; @@ -1774,7 +1953,6 @@ static int testapp_stats_rx_dropped(struct test_spec *test) static int testapp_stats_tx_invalid_descs(struct test_spec *test) { - test_spec_set_name(test, "STAT_TX_INVALID"); pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); test->ifobj_tx->validation_func = validate_tx_invalid_descs; return testapp_validate_traffic(test); @@ -1782,10 +1960,8 @@ static int testapp_stats_tx_invalid_descs(struct test_spec *test) static int testapp_stats_rx_full(struct test_spec *test) { - test_spec_set_name(test, "STAT_RX_FULL"); pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; @@ -1795,19 +1971,16 @@ static int testapp_stats_rx_full(struct test_spec *test) static int testapp_stats_fill_empty(struct test_spec *test) { - test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; return testapp_validate_traffic(test); } -static int testapp_unaligned(struct test_spec *test) +static int testapp_send_receive_unaligned(struct test_spec *test) { - test_spec_set_name(test, "UNALIGNED_MODE"); test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; /* Let half of the packets straddle a 4K buffer boundary */ @@ -1816,9 +1989,8 @@ static int testapp_unaligned(struct test_spec *test) return testapp_validate_traffic(test); } -static int testapp_unaligned_mb(struct test_spec *test) +static int testapp_send_receive_unaligned_mb(struct test_spec *test) { - test_spec_set_name(test, "UNALIGNED_MODE_9K"); test->mtu = MAX_ETH_JUMBO_SIZE; test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; @@ -1834,9 +2006,8 @@ static int testapp_single_pkt(struct test_spec *test) return testapp_validate_traffic(test); } -static int testapp_multi_buffer(struct test_spec *test) +static int testapp_send_receive_mb(struct test_spec *test) { - test_spec_set_name(test, "RUN_TO_COMPLETION_9K_PACKETS"); test->mtu = MAX_ETH_JUMBO_SIZE; pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE); @@ -1933,7 +2104,6 @@ static int testapp_xdp_drop(struct test_spec *test) struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; - test_spec_set_name(test, "XDP_DROP_HALF"); test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop, skel_rx->maps.xsk, skel_tx->maps.xsk); @@ -1941,7 +2111,7 @@ static int testapp_xdp_drop(struct test_spec *test) return testapp_validate_traffic(test); } -static int testapp_xdp_metadata_count(struct test_spec *test) +static int testapp_xdp_metadata_copy(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; @@ -1955,19 +2125,38 @@ static int testapp_xdp_metadata_count(struct test_spec *test) test->ifobj_rx->use_metadata = true; data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); - if (!data_map || !bpf_map__is_internal(data_map)) - exit_with_error(ENOMEM); + if (!data_map || !bpf_map__is_internal(data_map)) { + ksft_print_msg("Error: could not find bss section of XDP program\n"); + return TEST_FAILURE; + } - if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) - exit_with_error(errno); + if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) { + ksft_print_msg("Error: could not update count element\n"); + return TEST_FAILURE; + } return testapp_validate_traffic(test); } -static int testapp_poll_txq_tmout(struct test_spec *test) +static int testapp_xdp_shared_umem(struct test_spec *test) { - test_spec_set_name(test, "POLL_TXQ_FULL"); + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test->total_steps = 1; + test->nb_sockets = 2; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem, + skel_tx->progs.xsk_xdp_shared_umem, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + pkt_stream_even_odd_sequence(test); + + return testapp_validate_traffic(test); +} +static int testapp_poll_txq_tmout(struct test_spec *test) +{ test->ifobj_tx->use_poll = true; /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ test->ifobj_tx->umem->frame_size = 2048; @@ -1977,7 +2166,6 @@ static int testapp_poll_txq_tmout(struct test_spec *test) static int testapp_poll_rxq_tmout(struct test_spec *test) { - test_spec_set_name(test, "POLL_RXQ_EMPTY"); test->ifobj_rx->use_poll = true; return testapp_validate_traffic_single_thread(test, test->ifobj_rx); } @@ -1987,7 +2175,6 @@ static int testapp_too_many_frags(struct test_spec *test) struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {}; u32 max_frags, i; - test_spec_set_name(test, "TOO_MANY_FRAGS"); if (test->mode == TEST_MODE_ZC) max_frags = test->ifobj_tx->xdp_zc_max_segs; else @@ -2054,20 +2241,16 @@ static bool hugepages_present(void) return true; } -static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - thread_func_t func_ptr) +static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr) { LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); int err; - memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); - memcpy(ifobj->src_mac, src_mac, ETH_ALEN); - ifobj->func_ptr = func_ptr; err = xsk_load_xdp_programs(ifobj); if (err) { - printf("Error loading XDP program\n"); + ksft_print_msg("Error loading XDP program\n"); exit_with_error(err); } @@ -2091,138 +2274,98 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * } } -static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) -{ - int ret = TEST_SKIP; - - switch (type) { - case TEST_TYPE_STATS_RX_DROPPED: - ret = testapp_stats_rx_dropped(test); - break; - case TEST_TYPE_STATS_TX_INVALID_DESCS: - ret = testapp_stats_tx_invalid_descs(test); - break; - case TEST_TYPE_STATS_RX_FULL: - ret = testapp_stats_rx_full(test); - break; - case TEST_TYPE_STATS_FILL_EMPTY: - ret = testapp_stats_fill_empty(test); - break; - case TEST_TYPE_TEARDOWN: - ret = testapp_teardown(test); - break; - case TEST_TYPE_BIDI: - ret = testapp_bidi(test); - break; - case TEST_TYPE_BPF_RES: - ret = testapp_bpf_res(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION: - test_spec_set_name(test, "RUN_TO_COMPLETION"); - ret = testapp_validate_traffic(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION_MB: - ret = testapp_multi_buffer(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: - test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); - ret = testapp_single_pkt(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: - test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); - ret = testapp_validate_traffic(test); - break; - case TEST_TYPE_RX_POLL: - test->ifobj_rx->use_poll = true; - test_spec_set_name(test, "POLL_RX"); - ret = testapp_validate_traffic(test); - break; - case TEST_TYPE_TX_POLL: - test->ifobj_tx->use_poll = true; - test_spec_set_name(test, "POLL_TX"); - ret = testapp_validate_traffic(test); - break; - case TEST_TYPE_POLL_TXQ_TMOUT: - ret = testapp_poll_txq_tmout(test); - break; - case TEST_TYPE_POLL_RXQ_TMOUT: - ret = testapp_poll_rxq_tmout(test); - break; - case TEST_TYPE_ALIGNED_INV_DESC: - test_spec_set_name(test, "ALIGNED_INV_DESC"); - ret = testapp_invalid_desc(test); - break; - case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: - test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - ret = testapp_invalid_desc(test); - break; - case TEST_TYPE_UNALIGNED_INV_DESC: - test_spec_set_name(test, "UNALIGNED_INV_DESC"); - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - ret = testapp_invalid_desc(test); - break; - case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: { - u64 page_size, umem_size; - - test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE"); - /* Odd frame size so the UMEM doesn't end near a page boundary. */ - test->ifobj_tx->umem->frame_size = 4001; - test->ifobj_rx->umem->frame_size = 4001; - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - /* This test exists to test descriptors that staddle the end of - * the UMEM but not a page. - */ - page_size = sysconf(_SC_PAGESIZE); - umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; - assert(umem_size % page_size > MIN_PKT_SIZE); - assert(umem_size % page_size < page_size - MIN_PKT_SIZE); - ret = testapp_invalid_desc(test); - break; - } - case TEST_TYPE_ALIGNED_INV_DESC_MB: - test_spec_set_name(test, "ALIGNED_INV_DESC_MULTI_BUFF"); - ret = testapp_invalid_desc_mb(test); - break; - case TEST_TYPE_UNALIGNED_INV_DESC_MB: - test_spec_set_name(test, "UNALIGNED_INV_DESC_MULTI_BUFF"); - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - ret = testapp_invalid_desc_mb(test); - break; - case TEST_TYPE_UNALIGNED: - ret = testapp_unaligned(test); - break; - case TEST_TYPE_UNALIGNED_MB: - ret = testapp_unaligned_mb(test); - break; - case TEST_TYPE_HEADROOM: - ret = testapp_headroom(test); - break; - case TEST_TYPE_XDP_DROP_HALF: - ret = testapp_xdp_drop(test); - break; - case TEST_TYPE_XDP_METADATA_COUNT: - test_spec_set_name(test, "XDP_METADATA_COUNT"); - ret = testapp_xdp_metadata_count(test); - break; - case TEST_TYPE_XDP_METADATA_COUNT_MB: - test_spec_set_name(test, "XDP_METADATA_COUNT_MULTI_BUFF"); - test->mtu = MAX_ETH_JUMBO_SIZE; - ret = testapp_xdp_metadata_count(test); - break; - case TEST_TYPE_TOO_MANY_FRAGS: - ret = testapp_too_many_frags(test); - break; - default: - break; - } +static int testapp_send_receive(struct test_spec *test) +{ + return testapp_validate_traffic(test); +} + +static int testapp_send_receive_2k_frame(struct test_spec *test) +{ + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + return testapp_validate_traffic(test); +} + +static int testapp_poll_rx(struct test_spec *test) +{ + test->ifobj_rx->use_poll = true; + return testapp_validate_traffic(test); +} + +static int testapp_poll_tx(struct test_spec *test) +{ + test->ifobj_tx->use_poll = true; + return testapp_validate_traffic(test); +} + +static int testapp_aligned_inv_desc(struct test_spec *test) +{ + return testapp_invalid_desc(test); +} + +static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test) +{ + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + return testapp_invalid_desc(test); +} + +static int testapp_unaligned_inv_desc(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + return testapp_invalid_desc(test); +} + +static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test) +{ + u64 page_size, umem_size; + + /* Odd frame size so the UMEM doesn't end near a page boundary. */ + test->ifobj_tx->umem->frame_size = 4001; + test->ifobj_rx->umem->frame_size = 4001; + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* This test exists to test descriptors that staddle the end of + * the UMEM but not a page. + */ + page_size = sysconf(_SC_PAGESIZE); + umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; + assert(umem_size % page_size > MIN_PKT_SIZE); + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); + + return testapp_invalid_desc(test); +} + +static int testapp_aligned_inv_desc_mb(struct test_spec *test) +{ + return testapp_invalid_desc_mb(test); +} + +static int testapp_unaligned_inv_desc_mb(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + return testapp_invalid_desc_mb(test); +} + +static int testapp_xdp_metadata(struct test_spec *test) +{ + return testapp_xdp_metadata_copy(test); +} + +static int testapp_xdp_metadata_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + return testapp_xdp_metadata_copy(test); +} + +static void run_pkt_test(struct test_spec *test) +{ + int ret; + + ret = test->test_func(test); if (ret == TEST_PASS) ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), @@ -2290,13 +2433,56 @@ static bool is_xdp_supported(int ifindex) return true; } +static const struct test_spec tests[] = { + {.name = "SEND_RECEIVE", .test_func = testapp_send_receive}, + {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame}, + {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt}, + {.name = "POLL_RX", .test_func = testapp_poll_rx}, + {.name = "POLL_TX", .test_func = testapp_poll_tx}, + {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout}, + {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout}, + {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned}, + {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc}, + {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame}, + {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc}, + {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE", + .test_func = testapp_unaligned_inv_desc_4001_frame}, + {.name = "UMEM_HEADROOM", .test_func = testapp_headroom}, + {.name = "TEARDOWN", .test_func = testapp_teardown}, + {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional}, + {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped}, + {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs}, + {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full}, + {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty}, + {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup}, + {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop}, + {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem}, + {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata}, + {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb}, + {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb}, + {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS", + .test_func = testapp_send_receive_unaligned_mb}, + {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb}, + {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb}, + {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, +}; + +static void print_tests(void) +{ + u32 i; + + printf("Tests:\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) + printf("%u: %s\n", i, tests[i].name); +} + int main(int argc, char **argv) { struct pkt_stream *rx_pkt_stream_default; struct pkt_stream *tx_pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; + u32 i, j, failed_tests = 0, nb_tests; int modes = TEST_MODE_SKB + 1; - u32 i, j, failed_tests = 0; struct test_spec test; bool shared_netdev; @@ -2314,14 +2500,21 @@ int main(int argc, char **argv) parse_command_line(ifobj_tx, ifobj_rx, argc, argv); + if (opt_print_tests) { + print_tests(); + ksft_exit_xpass(); + } + if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) { + ksft_print_msg("Error: test %u does not exist.\n", opt_run_test); + ksft_exit_xfail(); + } + shared_netdev = (ifobj_tx->ifindex == ifobj_rx->ifindex); ifobj_tx->shared_umem = shared_netdev; ifobj_rx->shared_umem = shared_netdev; - if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) { - usage(basename(argv[0])); - ksft_exit_xfail(); - } + if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) + print_usage(argv); if (is_xdp_supported(ifobj_tx->ifindex)) { modes++; @@ -2329,23 +2522,46 @@ int main(int argc, char **argv) modes++; } - init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); - init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); + init_iface(ifobj_rx, worker_testapp_validate_rx); + init_iface(ifobj_tx, worker_testapp_validate_tx); - test_spec_init(&test, ifobj_tx, ifobj_rx, 0); - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]); + tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); + rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); if (!tx_pkt_stream_default || !rx_pkt_stream_default) exit_with_error(ENOMEM); test.tx_pkt_stream_default = tx_pkt_stream_default; test.rx_pkt_stream_default = rx_pkt_stream_default; - ksft_set_plan(modes * TEST_TYPE_MAX); + if (opt_run_test == RUN_ALL_TESTS) + nb_tests = ARRAY_SIZE(tests); + else + nb_tests = 1; + if (opt_mode == TEST_MODE_ALL) { + ksft_set_plan(modes * nb_tests); + } else { + if (opt_mode == TEST_MODE_DRV && modes <= TEST_MODE_DRV) { + ksft_print_msg("Error: XDP_DRV mode not supported.\n"); + ksft_exit_xfail(); + } + if (opt_mode == TEST_MODE_ZC && modes <= TEST_MODE_ZC) { + ksft_print_msg("Error: zero-copy mode not supported.\n"); + ksft_exit_xfail(); + } + + ksft_set_plan(nb_tests); + } for (i = 0; i < modes; i++) { - for (j = 0; j < TEST_TYPE_MAX; j++) { - test_spec_init(&test, ifobj_tx, ifobj_rx, i); - run_pkt_test(&test, i, j); + if (opt_mode != TEST_MODE_ALL && i != opt_mode) + continue; + + for (j = 0; j < ARRAY_SIZE(tests); j++) { + if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test) + continue; + + test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]); + run_pkt_test(&test); usleep(USLEEP_MAX); if (test.fail) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 233b66cef64a..f174df2d693f 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -5,7 +5,10 @@ #ifndef XSKXCEIVER_H_ #define XSKXCEIVER_H_ +#include <limits.h> + #include "xsk_xdp_progs.skel.h" +#include "xsk_xdp_common.h" #ifndef SOL_XDP #define SOL_XDP 283 @@ -33,8 +36,7 @@ #define TEST_SKIP 2 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 16 -#define MAX_SOCKETS 2 -#define MAX_TEST_NAME_SIZE 32 +#define MAX_TEST_NAME_SIZE 48 #define MAX_TEARDOWN_ITER 10 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ #define MIN_PKT_SIZE 64 @@ -56,6 +58,8 @@ #define XSK_DESC__MAX_SKB_FRAGS 18 #define HUGEPAGE_SIZE (2 * 1024 * 1024) #define PKT_DUMP_NB_TO_PRINT 16 +#define RUN_ALL_TESTS UINT_MAX +#define NUM_MAC_ADDRESSES 4 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -63,43 +67,9 @@ enum test_mode { TEST_MODE_SKB, TEST_MODE_DRV, TEST_MODE_ZC, - TEST_MODE_MAX -}; - -enum test_type { - TEST_TYPE_RUN_TO_COMPLETION, - TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, - TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, - TEST_TYPE_RX_POLL, - TEST_TYPE_TX_POLL, - TEST_TYPE_POLL_RXQ_TMOUT, - TEST_TYPE_POLL_TXQ_TMOUT, - TEST_TYPE_UNALIGNED, - TEST_TYPE_ALIGNED_INV_DESC, - TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, - TEST_TYPE_UNALIGNED_INV_DESC, - TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME, - TEST_TYPE_HEADROOM, - TEST_TYPE_TEARDOWN, - TEST_TYPE_BIDI, - TEST_TYPE_STATS_RX_DROPPED, - TEST_TYPE_STATS_TX_INVALID_DESCS, - TEST_TYPE_STATS_RX_FULL, - TEST_TYPE_STATS_FILL_EMPTY, - TEST_TYPE_BPF_RES, - TEST_TYPE_XDP_DROP_HALF, - TEST_TYPE_XDP_METADATA_COUNT, - TEST_TYPE_XDP_METADATA_COUNT_MB, - TEST_TYPE_RUN_TO_COMPLETION_MB, - TEST_TYPE_UNALIGNED_MB, - TEST_TYPE_ALIGNED_INV_DESC_MB, - TEST_TYPE_UNALIGNED_INV_DESC_MB, - TEST_TYPE_TOO_MANY_FRAGS, - TEST_TYPE_MAX + TEST_MODE_ALL }; -static bool opt_verbose; - struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; @@ -118,8 +88,11 @@ struct xsk_socket_info { struct xsk_ring_prod tx; struct xsk_umem_info *umem; struct xsk_socket *xsk; + struct pkt_stream *pkt_stream; u32 outstanding_tx; u32 rxqsize; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; }; struct pkt { @@ -135,12 +108,16 @@ struct pkt_stream { u32 current_pkt_nb; struct pkt *pkts; u32 max_pkt_len; + u32 nb_rx_pkts; + u32 nb_valid_entries; bool verbatim; }; struct ifobject; +struct test_spec; typedef int (*validation_func_t)(struct ifobject *ifobj); typedef void *(*thread_func_t)(void *arg); +typedef int (*test_func_t)(struct test_spec *test); struct ifobject { char ifname[MAX_INTERFACE_NAME_CHARS]; @@ -149,7 +126,6 @@ struct ifobject { struct xsk_umem_info *umem; thread_func_t func_ptr; validation_func_t validation_func; - struct pkt_stream *pkt_stream; struct xsk_xdp_progs *xdp_progs; struct bpf_map *xskmap; struct bpf_program *xdp_prog; @@ -169,8 +145,6 @@ struct ifobject { bool unaligned_supp; bool multi_buff_supp; bool multi_buff_zc_supp; - u8 dst_mac[ETH_ALEN]; - u8 src_mac[ETH_ALEN]; }; struct test_spec { @@ -182,6 +156,7 @@ struct test_spec { struct bpf_program *xdp_prog_tx; struct bpf_map *xskmap_rx; struct bpf_map *xskmap_tx; + test_func_t test_func; int mtu; u16 total_steps; u16 current_step; @@ -196,4 +171,6 @@ pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; int pkts_in_flight; +static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; + #endif /* XSKXCEIVER_H_ */ diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 488f4964365e..5f2b3f6c0d74 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -31,6 +31,9 @@ ALL_TESTS=" " devdummy="test-dummy0" +VERBOSE=0 +PAUSE=no +PAUSE_ON_FAIL=no # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -51,35 +54,102 @@ check_fail() fi } +run_cmd_common() +{ + local cmd="$*" + local out + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${cmd}" + fi + out=$($cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + return $rc +} + +run_cmd() { + run_cmd_common "$@" + rc=$? + check_err $rc + return $rc +} +run_cmd_fail() +{ + run_cmd_common "$@" + rc=$? + check_fail $rc + return $rc +} + +run_cmd_grep_common() +{ + local find="$1"; shift + local cmd="$*" + local out + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${cmd} 2>&1 | grep -q '${find}'" + fi + out=$($cmd 2>&1 | grep -q "${find}" 2>&1) + return $? +} + +run_cmd_grep() { + run_cmd_grep_common "$@" + rc=$? + check_err $rc + return $rc +} + +run_cmd_grep_fail() +{ + run_cmd_grep_common "$@" + rc=$? + check_fail $rc + return $rc +} + +end_test() +{ + echo "$*" + [ "${VERBOSE}" = "1" ] && echo + + if [[ $ret -ne 0 ]] && [[ "${PAUSE_ON_FAIL}" = "yes" ]]; then + echo "Hit enter to continue" + read a + fi; + + if [ "${PAUSE}" = "yes" ]; then + echo "Hit enter to continue" + read a + fi + +} + + kci_add_dummy() { - ip link add name "$devdummy" type dummy - check_err $? - ip link set "$devdummy" up - check_err $? + run_cmd ip link add name "$devdummy" type dummy + run_cmd ip link set "$devdummy" up } kci_del_dummy() { - ip link del dev "$devdummy" - check_err $? + run_cmd ip link del dev "$devdummy" } kci_test_netconf() { dev="$1" r=$ret - - ip netconf show dev "$dev" > /dev/null - check_err $? - + run_cmd ip netconf show dev "$dev" for f in 4 6; do - ip -$f netconf show dev "$dev" > /dev/null - check_err $? + run_cmd ip -$f netconf show dev "$dev" done if [ $ret -ne 0 ] ;then - echo "FAIL: ip netconf show $dev" + end_test "FAIL: ip netconf show $dev" test $r -eq 0 && ret=0 return 1 fi @@ -92,43 +162,27 @@ kci_test_bridge() vlandev="testbr-vlan1" local ret=0 - ip link add name "$devbr" type bridge - check_err $? - - ip link set dev "$devdummy" master "$devbr" - check_err $? - - ip link set "$devbr" up - check_err $? - - ip link add link "$devbr" name "$vlandev" type vlan id 1 - check_err $? - ip addr add dev "$vlandev" 10.200.7.23/30 - check_err $? - ip -6 addr add dev "$vlandev" dead:42::1234/64 - check_err $? - ip -d link > /dev/null - check_err $? - ip r s t all > /dev/null - check_err $? + run_cmd ip link add name "$devbr" type bridge + run_cmd ip link set dev "$devdummy" master "$devbr" + run_cmd ip link set "$devbr" up + run_cmd ip link add link "$devbr" name "$vlandev" type vlan id 1 + run_cmd ip addr add dev "$vlandev" 10.200.7.23/30 + run_cmd ip -6 addr add dev "$vlandev" dead:42::1234/64 + run_cmd ip -d link + run_cmd ip r s t all for name in "$devbr" "$vlandev" "$devdummy" ; do kci_test_netconf "$name" done - - ip -6 addr del dev "$vlandev" dead:42::1234/64 - check_err $? - - ip link del dev "$vlandev" - check_err $? - ip link del dev "$devbr" - check_err $? + run_cmd ip -6 addr del dev "$vlandev" dead:42::1234/64 + run_cmd ip link del dev "$vlandev" + run_cmd ip link del dev "$devbr" if [ $ret -ne 0 ];then - echo "FAIL: bridge setup" + end_test "FAIL: bridge setup" return 1 fi - echo "PASS: bridge setup" + end_test "PASS: bridge setup" } @@ -139,34 +193,23 @@ kci_test_gre() loc=10.0.0.1 local ret=0 - ip tunnel add $gredev mode gre remote $rem local $loc ttl 1 - check_err $? - ip link set $gredev up - check_err $? - ip addr add 10.23.7.10 dev $gredev - check_err $? - ip route add 10.23.8.0/30 dev $gredev - check_err $? - ip addr add dev "$devdummy" 10.23.7.11/24 - check_err $? - ip link > /dev/null - check_err $? - ip addr > /dev/null - check_err $? + run_cmd ip tunnel add $gredev mode gre remote $rem local $loc ttl 1 + run_cmd ip link set $gredev up + run_cmd ip addr add 10.23.7.10 dev $gredev + run_cmd ip route add 10.23.8.0/30 dev $gredev + run_cmd ip addr add dev "$devdummy" 10.23.7.11/24 + run_cmd ip link + run_cmd ip addr kci_test_netconf "$gredev" - - ip addr del dev "$devdummy" 10.23.7.11/24 - check_err $? - - ip link del $gredev - check_err $? + run_cmd ip addr del dev "$devdummy" 10.23.7.11/24 + run_cmd ip link del $gredev if [ $ret -ne 0 ];then - echo "FAIL: gre tunnel endpoint" + end_test "FAIL: gre tunnel endpoint" return 1 fi - echo "PASS: gre tunnel endpoint" + end_test "PASS: gre tunnel endpoint" } # tc uses rtnetlink too, for full tc testing @@ -176,56 +219,40 @@ kci_test_tc() dev=lo local ret=0 - tc qdisc add dev "$dev" root handle 1: htb - check_err $? - tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit - check_err $? - tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256 - check_err $? - tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256 - check_err $? - tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256 - check_err $? - tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10 - check_err $? - tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10 - check_err $? - tc filter show dev "$dev" parent 1:0 > /dev/null - check_err $? - tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 - check_err $? - tc filter show dev "$dev" parent 1:0 > /dev/null - check_err $? - tc qdisc del dev "$dev" root handle 1: htb - check_err $? + run_cmd tc qdisc add dev "$dev" root handle 1: htb + run_cmd tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit + run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256 + run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256 + run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256 + run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10 + run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10 + run_cmd tc filter show dev "$dev" parent 1:0 + run_cmd tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 + run_cmd tc filter show dev "$dev" parent 1:0 + run_cmd tc qdisc del dev "$dev" root handle 1: htb if [ $ret -ne 0 ];then - echo "FAIL: tc htb hierarchy" + end_test "FAIL: tc htb hierarchy" return 1 fi - echo "PASS: tc htb hierarchy" + end_test "PASS: tc htb hierarchy" } kci_test_polrouting() { local ret=0 - ip rule add fwmark 1 lookup 100 - check_err $? - ip route add local 0.0.0.0/0 dev lo table 100 - check_err $? - ip r s t all > /dev/null - check_err $? - ip rule del fwmark 1 lookup 100 - check_err $? - ip route del local 0.0.0.0/0 dev lo table 100 - check_err $? + run_cmd ip rule add fwmark 1 lookup 100 + run_cmd ip route add local 0.0.0.0/0 dev lo table 100 + run_cmd ip r s t all + run_cmd ip rule del fwmark 1 lookup 100 + run_cmd ip route del local 0.0.0.0/0 dev lo table 100 if [ $ret -ne 0 ];then - echo "FAIL: policy route test" + end_test "FAIL: policy route test" return 1 fi - echo "PASS: policy routing" + end_test "PASS: policy routing" } kci_test_route_get() @@ -233,65 +260,51 @@ kci_test_route_get() local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) local ret=0 - - ip route get 127.0.0.1 > /dev/null - check_err $? - ip route get 127.0.0.1 dev "$devdummy" > /dev/null - check_err $? - ip route get ::1 > /dev/null - check_err $? - ip route get fe80::1 dev "$devdummy" > /dev/null - check_err $? - ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1 > /dev/null - check_err $? - ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1 > /dev/null - check_err $? - ip addr add dev "$devdummy" 10.23.7.11/24 - check_err $? - ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null - check_err $? - ip route add 10.23.8.0/24 \ + run_cmd ip route get 127.0.0.1 + run_cmd ip route get 127.0.0.1 dev "$devdummy" + run_cmd ip route get ::1 + run_cmd ip route get fe80::1 dev "$devdummy" + run_cmd ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1 + run_cmd ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1 + run_cmd ip addr add dev "$devdummy" 10.23.7.11/24 + run_cmd ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" + run_cmd ip route add 10.23.8.0/24 \ nexthop via 10.23.7.13 dev "$devdummy" \ nexthop via 10.23.7.14 dev "$devdummy" - check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=0 - ip route get 10.23.8.11 > /dev/null - check_err $? + run_cmd ip route get 10.23.8.11 sysctl -wq net.ipv4.fib_multipath_hash_policy=1 - ip route get 10.23.8.11 > /dev/null - check_err $? + run_cmd ip route get 10.23.8.11 sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy" - ip route del 10.23.8.0/24 - check_err $? - ip addr del dev "$devdummy" 10.23.7.11/24 - check_err $? + run_cmd ip route del 10.23.8.0/24 + run_cmd ip addr del dev "$devdummy" 10.23.7.11/24 + if [ $ret -ne 0 ];then - echo "FAIL: route get" + end_test "FAIL: route get" return 1 fi - echo "PASS: route get" + end_test "PASS: route get" } kci_test_addrlft() { for i in $(seq 10 100) ;do lft=$(((RANDOM%3) + 1)) - ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) - check_err $? + run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) done sleep 5 - - ip addr show dev "$devdummy" | grep "10.23.11." + run_cmd_grep "10.23.11." ip addr show dev "$devdummy" if [ $? -eq 0 ]; then - echo "FAIL: preferred_lft addresses remaining" check_err 1 + end_test "FAIL: preferred_lft addresses remaining" return fi - echo "PASS: preferred_lft addresses have expired" + end_test "PASS: preferred_lft addresses have expired" } kci_test_promote_secondaries() @@ -310,27 +323,17 @@ kci_test_promote_secondaries() [ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0 - echo "PASS: promote_secondaries complete" + end_test "PASS: promote_secondaries complete" } kci_test_addrlabel() { local ret=0 - - ip addrlabel add prefix dead::/64 dev lo label 1 - check_err $? - - ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1" - check_err $? - - ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null - check_err $? - - ip addrlabel add prefix dead::/64 label 1 2> /dev/null - check_err $? - - ip addrlabel del prefix dead::/64 label 1 2> /dev/null - check_err $? + run_cmd ip addrlabel add prefix dead::/64 dev lo label 1 + run_cmd_grep "prefix dead::/64 dev lo label 1" ip addrlabel list + run_cmd ip addrlabel del prefix dead::/64 dev lo label 1 + run_cmd ip addrlabel add prefix dead::/64 label 1 + run_cmd ip addrlabel del prefix dead::/64 label 1 # concurrent add/delete for i in $(seq 1 1000); do @@ -346,11 +349,11 @@ kci_test_addrlabel() ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null if [ $ret -ne 0 ];then - echo "FAIL: ipv6 addrlabel" + end_test "FAIL: ipv6 addrlabel" return 1 fi - echo "PASS: ipv6 addrlabel" + end_test "PASS: ipv6 addrlabel" } kci_test_ifalias() @@ -358,35 +361,28 @@ kci_test_ifalias() local ret=0 namewant=$(uuidgen) syspathname="/sys/class/net/$devdummy/ifalias" - - ip link set dev "$devdummy" alias "$namewant" - check_err $? + run_cmd ip link set dev "$devdummy" alias "$namewant" if [ $ret -ne 0 ]; then - echo "FAIL: cannot set interface alias of $devdummy to $namewant" + end_test "FAIL: cannot set interface alias of $devdummy to $namewant" return 1 fi - - ip link show "$devdummy" | grep -q "alias $namewant" - check_err $? + run_cmd_grep "alias $namewant" ip link show "$devdummy" if [ -r "$syspathname" ] ; then read namehave < "$syspathname" if [ "$namewant" != "$namehave" ]; then - echo "FAIL: did set ifalias $namewant but got $namehave" + end_test "FAIL: did set ifalias $namewant but got $namehave" return 1 fi namewant=$(uuidgen) echo "$namewant" > "$syspathname" - ip link show "$devdummy" | grep -q "alias $namewant" - check_err $? + run_cmd_grep "alias $namewant" ip link show "$devdummy" # sysfs interface allows to delete alias again echo "" > "$syspathname" - - ip link show "$devdummy" | grep -q "alias $namewant" - check_fail $? + run_cmd_grep_fail "alias $namewant" ip link show "$devdummy" for i in $(seq 1 100); do uuidgen > "$syspathname" & @@ -395,57 +391,48 @@ kci_test_ifalias() wait # re-add the alias -- kernel should free mem when dummy dev is removed - ip link set dev "$devdummy" alias "$namewant" - check_err $? + run_cmd ip link set dev "$devdummy" alias "$namewant" + fi if [ $ret -ne 0 ]; then - echo "FAIL: set interface alias $devdummy to $namewant" + end_test "FAIL: set interface alias $devdummy to $namewant" return 1 fi - echo "PASS: set ifalias $namewant for $devdummy" + end_test "PASS: set ifalias $namewant for $devdummy" } kci_test_vrf() { vrfname="test-vrf" local ret=0 - - ip link show type vrf 2>/dev/null + run_cmd ip link show type vrf if [ $? -ne 0 ]; then - echo "SKIP: vrf: iproute2 too old" + end_test "SKIP: vrf: iproute2 too old" return $ksft_skip fi - - ip link add "$vrfname" type vrf table 10 - check_err $? + run_cmd ip link add "$vrfname" type vrf table 10 if [ $ret -ne 0 ];then - echo "FAIL: can't add vrf interface, skipping test" + end_test "FAIL: can't add vrf interface, skipping test" return 0 fi - - ip -br link show type vrf | grep -q "$vrfname" - check_err $? + run_cmd_grep "$vrfname" ip -br link show type vrf if [ $ret -ne 0 ];then - echo "FAIL: created vrf device not found" + end_test "FAIL: created vrf device not found" return 1 fi - ip link set dev "$vrfname" up - check_err $? - - ip link set dev "$devdummy" master "$vrfname" - check_err $? - ip link del dev "$vrfname" - check_err $? + run_cmd ip link set dev "$vrfname" up + run_cmd ip link set dev "$devdummy" master "$vrfname" + run_cmd ip link del dev "$vrfname" if [ $ret -ne 0 ];then - echo "FAIL: vrf" + end_test "FAIL: vrf" return 1 fi - echo "PASS: vrf" + end_test "PASS: vrf" } kci_test_encap_vxlan() @@ -454,84 +441,44 @@ kci_test_encap_vxlan() vxlan="test-vxlan0" vlan="test-vlan0" testns="$1" - - ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ - dev "$devdummy" dstport 4789 2>/dev/null + run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ + dev "$devdummy" dstport 4789 if [ $? -ne 0 ]; then - echo "FAIL: can't add vxlan interface, skipping test" + end_test "FAIL: can't add vxlan interface, skipping test" return 0 fi - check_err $? - ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan" - check_err $? - - ip -netns "$testns" link set up dev "$vxlan" - check_err $? - - ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1 - check_err $? + run_cmd ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan" + run_cmd ip -netns "$testns" link set up dev "$vxlan" + run_cmd ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1 # changelink testcases - ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64 - check_err $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning - check_err $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan proxy 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan norsc 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan external 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan gbp 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan gpe 2>/dev/null - check_fail $? - - ip -netns "$testns" link del "$vxlan" - check_err $? + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit + + run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64 + run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning + + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan proxy + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan norsc + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan external + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gbp + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gpe + run_cmd ip -netns "$testns" link del "$vxlan" if [ $ret -ne 0 ]; then - echo "FAIL: vxlan" + end_test "FAIL: vxlan" return 1 fi - echo "PASS: vxlan" + end_test "PASS: vxlan" } kci_test_encap_fou() @@ -539,39 +486,32 @@ kci_test_encap_fou() local ret=0 name="test-fou" testns="$1" - - ip fou help 2>&1 |grep -q 'Usage: ip fou' + run_cmd_grep 'Usage: ip fou' ip fou help if [ $? -ne 0 ];then - echo "SKIP: fou: iproute2 too old" + end_test "SKIP: fou: iproute2 too old" return $ksft_skip fi if ! /sbin/modprobe -q -n fou; then - echo "SKIP: module fou is not found" + end_test "SKIP: module fou is not found" return $ksft_skip fi /sbin/modprobe -q fou - ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null + + run_cmd ip -netns "$testns" fou add port 7777 ipproto 47 if [ $? -ne 0 ];then - echo "FAIL: can't add fou port 7777, skipping test" + end_test "FAIL: can't add fou port 7777, skipping test" return 1 fi - - ip -netns "$testns" fou add port 8888 ipproto 4 - check_err $? - - ip -netns "$testns" fou del port 9999 2>/dev/null - check_fail $? - - ip -netns "$testns" fou del port 7777 - check_err $? - + run_cmd ip -netns "$testns" fou add port 8888 ipproto 4 + run_cmd_fail ip -netns "$testns" fou del port 9999 + run_cmd ip -netns "$testns" fou del port 7777 if [ $ret -ne 0 ]; then - echo "FAIL: fou" + end_test "FAIL: fou"s return 1 fi - echo "PASS: fou" + end_test "PASS: fou" } # test various encap methods, use netns to avoid unwanted interference @@ -579,25 +519,16 @@ kci_test_encap() { testns="testns" local ret=0 - - ip netns add "$testns" + run_cmd ip netns add "$testns" if [ $? -ne 0 ]; then - echo "SKIP encap tests: cannot add net namespace $testns" + end_test "SKIP encap tests: cannot add net namespace $testns" return $ksft_skip fi - - ip -netns "$testns" link set lo up - check_err $? - - ip -netns "$testns" link add name "$devdummy" type dummy - check_err $? - ip -netns "$testns" link set "$devdummy" up - check_err $? - - kci_test_encap_vxlan "$testns" - check_err $? - kci_test_encap_fou "$testns" - check_err $? + run_cmd ip -netns "$testns" link set lo up + run_cmd ip -netns "$testns" link add name "$devdummy" type dummy + run_cmd ip -netns "$testns" link set "$devdummy" up + run_cmd kci_test_encap_vxlan "$testns" + run_cmd kci_test_encap_fou "$testns" ip netns del "$testns" return $ret @@ -607,41 +538,28 @@ kci_test_macsec() { msname="test_macsec0" local ret=0 - - ip macsec help 2>&1 | grep -q "^Usage: ip macsec" + run_cmd_grep "^Usage: ip macsec" ip macsec help if [ $? -ne 0 ]; then - echo "SKIP: macsec: iproute2 too old" + end_test "SKIP: macsec: iproute2 too old" return $ksft_skip fi - - ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on - check_err $? + run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on if [ $ret -ne 0 ];then - echo "FAIL: can't add macsec interface, skipping test" + end_test "FAIL: can't add macsec interface, skipping test" return 1 fi - - ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 - check_err $? - - ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" - check_err $? - - ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef - check_err $? - - ip macsec show > /dev/null - check_err $? - - ip link del dev "$msname" - check_err $? + run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef + run_cmd ip macsec show + run_cmd ip link del dev "$msname" if [ $ret -ne 0 ];then - echo "FAIL: macsec" + end_test "FAIL: macsec" return 1 fi - echo "PASS: macsec" + end_test "PASS: macsec" } kci_test_macsec_offload() @@ -650,19 +568,18 @@ kci_test_macsec_offload() sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false local ret=0 - - ip macsec help 2>&1 | grep -q "^Usage: ip macsec" + run_cmd_grep "^Usage: ip macsec" ip macsec help if [ $? -ne 0 ]; then - echo "SKIP: macsec: iproute2 too old" + end_test "SKIP: macsec: iproute2 too old" return $ksft_skip fi # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then - modprobe -q netdevsim - check_err $? + run_cmd modprobe -q netdevsim + if [ $ret -ne 0 ]; then - echo "SKIP: macsec_offload can't load netdevsim" + end_test "SKIP: macsec_offload can't load netdevsim" return $ksft_skip fi probed=true @@ -675,43 +592,25 @@ kci_test_macsec_offload() ip link set $dev up if [ ! -d $sysfsd ] ; then - echo "FAIL: macsec_offload can't create device $dev" + end_test "FAIL: macsec_offload can't create device $dev" return 1 fi - - ethtool -k $dev | grep -q 'macsec-hw-offload: on' + run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev if [ $? -eq 1 ] ; then - echo "FAIL: macsec_offload netdevsim doesn't support MACsec offload" + end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload" return 1 fi - - ip link add link $dev kci_macsec1 type macsec port 4 offload mac - check_err $? - - ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac - check_err $? - - ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac - check_err $? - - ip link add link $dev kci_macsec4 type macsec port 8 offload mac 2> /dev/null - check_fail $? + run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac + run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac + run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac + run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac msname=kci_macsec1 - - ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 - check_err $? - - ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" - check_err $? - - ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ + run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ key 00 0123456789abcdef0123456789abcdef - check_err $? - - ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null - check_fail $? - + run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" # clean up any leftovers for msdev in kci_macsec{1,2,3,4} ; do ip link del $msdev 2> /dev/null @@ -720,10 +619,10 @@ kci_test_macsec_offload() $probed && rmmod netdevsim if [ $ret -ne 0 ]; then - echo "FAIL: macsec_offload" + end_test "FAIL: macsec_offload" return 1 fi - echo "PASS: macsec_offload" + end_test "PASS: macsec_offload" } #------------------------------------------------------------------- @@ -755,8 +654,7 @@ kci_test_ipsec() ip addr add $srcip dev $devdummy # flush to be sure there's nothing configured - ip x s flush ; ip x p flush - check_err $? + run_cmd ip x s flush ; ip x p flush # start the monitor in the background tmpfile=`mktemp /var/run/ipsectestXXX` @@ -764,72 +662,57 @@ kci_test_ipsec() sleep 0.2 ipsecid="proto esp src $srcip dst $dstip spi 0x07" - ip x s add $ipsecid \ + run_cmd ip x s add $ipsecid \ mode transport reqid 0x07 replay-window 32 \ $algo sel src $srcip/24 dst $dstip/24 - check_err $? - lines=`ip x s list | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? - ip x s count | grep -q "SAD count 1" - check_err $? + lines=`ip x s list | grep $srcip | grep $dstip | wc -l` + run_cmd test $lines -eq 2 + run_cmd_grep "SAD count 1" ip x s count lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? - - ip x s delete $ipsecid - check_err $? + run_cmd test $lines -eq 2 + run_cmd ip x s delete $ipsecid lines=`ip x s list | wc -l` - test $lines -eq 0 - check_err $? + run_cmd test $lines -eq 0 ipsecsel="dir out src $srcip/24 dst $dstip/24" - ip x p add $ipsecsel \ + run_cmd ip x p add $ipsecsel \ tmpl proto esp src $srcip dst $dstip \ spi 0x07 mode transport reqid 0x07 - check_err $? + lines=`ip x p list | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? + run_cmd test $lines -eq 2 - ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0" - check_err $? + run_cmd_grep "SPD IN 0 OUT 1 FWD 0" ip x p count lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? + run_cmd test $lines -eq 2 - ip x p delete $ipsecsel - check_err $? + run_cmd ip x p delete $ipsecsel lines=`ip x p list | wc -l` - test $lines -eq 0 - check_err $? + run_cmd test $lines -eq 0 # check the monitor results kill $mpid lines=`wc -l $tmpfile | cut "-d " -f1` - test $lines -eq 20 - check_err $? + run_cmd test $lines -eq 20 rm -rf $tmpfile # clean up any leftovers - ip x s flush - check_err $? - ip x p flush - check_err $? + run_cmd ip x s flush + run_cmd ip x p flush ip addr del $srcip/32 dev $devdummy if [ $ret -ne 0 ]; then - echo "FAIL: ipsec" + end_test "FAIL: ipsec" return 1 fi - echo "PASS: ipsec" + end_test "PASS: ipsec" } #------------------------------------------------------------------- @@ -857,10 +740,9 @@ kci_test_ipsec_offload() # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then - modprobe -q netdevsim - check_err $? + run_cmd modprobe -q netdevsim if [ $ret -ne 0 ]; then - echo "SKIP: ipsec_offload can't load netdevsim" + end_test "SKIP: ipsec_offload can't load netdevsim" return $ksft_skip fi probed=true @@ -874,11 +756,11 @@ kci_test_ipsec_offload() ip addr add $srcip dev $dev ip link set $dev up if [ ! -d $sysfsd ] ; then - echo "FAIL: ipsec_offload can't create device $dev" + end_test "FAIL: ipsec_offload can't create device $dev" return 1 fi if [ ! -f $sysfsf ] ; then - echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload" + end_test "FAIL: ipsec_offload netdevsim doesn't support IPsec offload" return 1 fi @@ -886,40 +768,39 @@ kci_test_ipsec_offload() ip x s flush ; ip x p flush # create offloaded SAs, both in and out - ip x p add dir out src $srcip/24 dst $dstip/24 \ + run_cmd ip x p add dir out src $srcip/24 dst $dstip/24 \ tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 - check_err $? - ip x p add dir in src $dstip/24 dst $srcip/24 \ + + run_cmd ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 - check_err $? - ip x s add proto esp src $srcip dst $dstip spi 9 \ + run_cmd ip x s add proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \ offload dev $dev dir out - check_err $? - ip x s add proto esp src $dstip dst $srcip spi 9 \ + + run_cmd ip x s add proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \ offload dev $dev dir in - check_err $? + if [ $ret -ne 0 ]; then - echo "FAIL: ipsec_offload can't create SA" + end_test "FAIL: ipsec_offload can't create SA" return 1 fi # does offload show up in ip output lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"` if [ $lines -ne 2 ] ; then - echo "FAIL: ipsec_offload SA offload missing from list output" check_err 1 + end_test "FAIL: ipsec_offload SA offload missing from list output" fi # use ping to exercise the Tx path ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null # does driver have correct offload info - diff $sysfsf - << EOF + run_cmd diff $sysfsf - << EOF SA count=2 tx=3 sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000 sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 @@ -929,7 +810,7 @@ sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[1] key=0x34333231 38373635 32313039 36353433 EOF if [ $? -ne 0 ] ; then - echo "FAIL: ipsec_offload incorrect driver data" + end_test "FAIL: ipsec_offload incorrect driver data" check_err 1 fi @@ -938,8 +819,8 @@ EOF ip x p flush lines=`grep -c "SA count=0" $sysfsf` if [ $lines -ne 1 ] ; then - echo "FAIL: ipsec_offload SA not removed from driver" check_err 1 + end_test "FAIL: ipsec_offload SA not removed from driver" fi # clean up any leftovers @@ -947,10 +828,10 @@ EOF $probed && rmmod netdevsim if [ $ret -ne 0 ]; then - echo "FAIL: ipsec_offload" + end_test "FAIL: ipsec_offload" return 1 fi - echo "PASS: ipsec_offload" + end_test "PASS: ipsec_offload" } kci_test_gretap() @@ -959,46 +840,38 @@ kci_test_gretap() DEV_NS=gretap00 local ret=0 - ip netns add "$testns" + run_cmd ip netns add "$testns" if [ $? -ne 0 ]; then - echo "SKIP gretap tests: cannot add net namespace $testns" + end_test "SKIP gretap tests: cannot add net namespace $testns" return $ksft_skip fi - ip link help gretap 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help gretap if [ $? -ne 0 ];then - echo "SKIP: gretap: iproute2 too old" + end_test "SKIP: gretap: iproute2 too old" ip netns del "$testns" return $ksft_skip fi # test native tunnel - ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \ key 102 local 172.16.1.100 remote 172.16.1.200 - check_err $? - - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS ups + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" type gretap external - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap external + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: gretap" + end_test "FAIL: gretap" ip netns del "$testns" return 1 fi - echo "PASS: gretap" + end_test "PASS: gretap" ip netns del "$testns" } @@ -1009,46 +882,38 @@ kci_test_ip6gretap() DEV_NS=ip6gretap00 local ret=0 - ip netns add "$testns" + run_cmd ip netns add "$testns" if [ $? -ne 0 ]; then - echo "SKIP ip6gretap tests: cannot add net namespace $testns" + end_test "SKIP ip6gretap tests: cannot add net namespace $testns" return $ksft_skip fi - ip link help ip6gretap 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help ip6gretap if [ $? -ne 0 ];then - echo "SKIP: ip6gretap: iproute2 too old" + end_test "SKIP: ip6gretap: iproute2 too old" ip netns del "$testns" return $ksft_skip fi # test native tunnel - ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \ key 102 local fc00:100::1 remote fc00:100::2 - check_err $? - ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: ip6gretap" + end_test "FAIL: ip6gretap" ip netns del "$testns" return 1 fi - echo "PASS: ip6gretap" + end_test "PASS: ip6gretap" ip netns del "$testns" } @@ -1058,62 +923,47 @@ kci_test_erspan() testns="testns" DEV_NS=erspan00 local ret=0 - - ip link help erspan 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help erspan if [ $? -ne 0 ];then - echo "SKIP: erspan: iproute2 too old" + end_test "SKIP: erspan: iproute2 too old" return $ksft_skip fi - - ip netns add "$testns" + run_cmd ip netns add "$testns" if [ $? -ne 0 ]; then - echo "SKIP erspan tests: cannot add net namespace $testns" + end_test "SKIP erspan tests: cannot add net namespace $testns" return $ksft_skip fi # test native tunnel erspan v1 - ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ key 102 local 172.16.1.100 remote 172.16.1.200 \ erspan_ver 1 erspan 488 - check_err $? - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test native tunnel erspan v2 - ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ key 102 local 172.16.1.100 remote 172.16.1.200 \ erspan_ver 2 erspan_dir ingress erspan_hwid 7 - check_err $? - - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" type erspan external - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan external + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: erspan" + end_test "FAIL: erspan" ip netns del "$testns" return 1 fi - echo "PASS: erspan" + end_test "PASS: erspan" ip netns del "$testns" } @@ -1123,63 +973,49 @@ kci_test_ip6erspan() testns="testns" DEV_NS=ip6erspan00 local ret=0 - - ip link help ip6erspan 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help ip6erspan if [ $? -ne 0 ];then - echo "SKIP: ip6erspan: iproute2 too old" + end_test "SKIP: ip6erspan: iproute2 too old" return $ksft_skip fi - - ip netns add "$testns" + run_cmd ip netns add "$testns" if [ $? -ne 0 ]; then - echo "SKIP ip6erspan tests: cannot add net namespace $testns" + end_test "SKIP ip6erspan tests: cannot add net namespace $testns" return $ksft_skip fi # test native tunnel ip6erspan v1 - ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ key 102 local fc00:100::1 remote fc00:100::2 \ erspan_ver 1 erspan 488 - check_err $? - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test native tunnel ip6erspan v2 - ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ key 102 local fc00:100::1 remote fc00:100::2 \ erspan_ver 2 erspan_dir ingress erspan_hwid 7 - check_err $? - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" \ type ip6erspan external - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: ip6erspan" + end_test "FAIL: ip6erspan" ip netns del "$testns" return 1 fi - echo "PASS: ip6erspan" + end_test "PASS: ip6erspan" ip netns del "$testns" } @@ -1195,45 +1031,35 @@ kci_test_fdb_get() dstip="10.0.2.3" local ret=0 - bridge fdb help 2>&1 |grep -q 'bridge fdb get' + run_cmd_grep 'bridge fdb get' bridge fdb help if [ $? -ne 0 ];then - echo "SKIP: fdb get tests: iproute2 too old" + end_test "SKIP: fdb get tests: iproute2 too old" return $ksft_skip fi - ip netns add testns + run_cmd ip netns add testns if [ $? -ne 0 ]; then - echo "SKIP fdb get tests: cannot add net namespace $testns" + end_test "SKIP fdb get tests: cannot add net namespace $testns" return $ksft_skip fi - - $IP link add "$vxlandev" type vxlan id 10 local $localip \ - dstport 4789 2>/dev/null - check_err $? - $IP link add name "$brdev" type bridge &>/dev/null - check_err $? - $IP link set dev "$vxlandev" master "$brdev" &>/dev/null - check_err $? - $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null - check_err $? - $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null - check_err $? - - $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" - check_err $? - $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" - check_err $? - $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip" - check_err $? + run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \ + dstport 4789 + run_cmd $IP link add name "$brdev" type bridge + run_cmd $IP link set dev "$vxlandev" master "$brdev" + run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" master + run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self + run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac brport "$vxlandev" + run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev" + run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self ip netns del testns &>/dev/null if [ $ret -ne 0 ]; then - echo "FAIL: bridge fdb get" + end_test "FAIL: bridge fdb get" return 1 fi - echo "PASS: bridge fdb get" + end_test "PASS: bridge fdb get" } kci_test_neigh_get() @@ -1243,50 +1069,38 @@ kci_test_neigh_get() dstip6=dead::2 local ret=0 - ip neigh help 2>&1 |grep -q 'ip neigh get' + run_cmd_grep 'ip neigh get' ip neigh help if [ $? -ne 0 ];then - echo "SKIP: fdb get tests: iproute2 too old" + end_test "SKIP: fdb get tests: iproute2 too old" return $ksft_skip fi # ipv4 - ip neigh add $dstip lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? - ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac" - check_err $? - ip neigh del $dstip lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add $dstip lladdr $dstmac dev "$devdummy" + run_cmd_grep "$dstmac" ip neigh get $dstip dev "$devdummy" + run_cmd ip neigh del $dstip lladdr $dstmac dev "$devdummy" # ipv4 proxy - ip neigh add proxy $dstip dev "$devdummy" > /dev/null - check_err $? - ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip" - check_err $? - ip neigh del proxy $dstip dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add proxy $dstip dev "$devdummy" + run_cmd_grep "$dstip" ip neigh get proxy $dstip dev "$devdummy" + run_cmd ip neigh del proxy $dstip dev "$devdummy" # ipv6 - ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? - ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac" - check_err $? - ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" + run_cmd_grep "$dstmac" ip neigh get $dstip6 dev "$devdummy" + run_cmd ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" # ipv6 proxy - ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null - check_err $? - ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6" - check_err $? - ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add proxy $dstip6 dev "$devdummy" + run_cmd_grep "$dstip6" ip neigh get proxy $dstip6 dev "$devdummy" + run_cmd ip neigh del proxy $dstip6 dev "$devdummy" if [ $ret -ne 0 ];then - echo "FAIL: neigh get" + end_test "FAIL: neigh get" return 1 fi - echo "PASS: neigh get" + end_test "PASS: neigh get" } kci_test_bridge_parent_id() @@ -1296,10 +1110,9 @@ kci_test_bridge_parent_id() probed=false if [ ! -w /sys/bus/netdevsim/new_device ] ; then - modprobe -q netdevsim - check_err $? + run_cmd modprobe -q netdevsim if [ $ret -ne 0 ]; then - echo "SKIP: bridge_parent_id can't load netdevsim" + end_test "SKIP: bridge_parent_id can't load netdevsim" return $ksft_skip fi probed=true @@ -1312,13 +1125,11 @@ kci_test_bridge_parent_id() udevadm settle dev10=`ls ${sysfsnet}10/net/` dev20=`ls ${sysfsnet}20/net/` - - ip link add name test-bond0 type bond mode 802.3ad - ip link set dev $dev10 master test-bond0 - ip link set dev $dev20 master test-bond0 - ip link add name test-br0 type bridge - ip link set dev test-bond0 master test-br0 - check_err $? + run_cmd ip link add name test-bond0 type bond mode 802.3ad + run_cmd ip link set dev $dev10 master test-bond0 + run_cmd ip link set dev $dev20 master test-bond0 + run_cmd ip link add name test-br0 type bridge + run_cmd ip link set dev test-bond0 master test-br0 # clean up any leftovers ip link del dev test-br0 @@ -1328,10 +1139,10 @@ kci_test_bridge_parent_id() $probed && rmmod netdevsim if [ $ret -ne 0 ]; then - echo "FAIL: bridge_parent_id" + end_test "FAIL: bridge_parent_id" return 1 fi - echo "PASS: bridge_parent_id" + end_test "PASS: bridge_parent_id" } address_get_proto() @@ -1409,10 +1220,10 @@ do_test_address_proto() ip address del dev "$devdummy" "$addr3" if [ $ret -ne 0 ]; then - echo "FAIL: address proto $what" + end_test "FAIL: address proto $what" return 1 fi - echo "PASS: address proto $what" + end_test "PASS: address proto $what" } kci_test_address_proto() @@ -1435,7 +1246,7 @@ kci_test_rtnl() kci_add_dummy if [ $ret -ne 0 ];then - echo "FAIL: cannot add dummy interface" + end_test "FAIL: cannot add dummy interface" return 1 fi @@ -1455,31 +1266,39 @@ usage: ${0##*/} OPTS -t <test> Test(s) to run (default: all) (options: $(echo $ALL_TESTS)) + -v Verbose mode (show commands and output) + -P Pause after every test + -p Pause after every failing test before cleanup (for debugging) EOF } #check for needed privileges if [ "$(id -u)" -ne 0 ];then - echo "SKIP: Need root privileges" + end_test "SKIP: Need root privileges" exit $ksft_skip fi for x in ip tc;do $x -Version 2>/dev/null >/dev/null if [ $? -ne 0 ];then - echo "SKIP: Could not run test without the $x tool" + end_test "SKIP: Could not run test without the $x tool" exit $ksft_skip fi done -while getopts t:h o; do +while getopts t:hvpP o; do case $o in t) TESTS=$OPTARG;; + v) VERBOSE=1;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; h) usage; exit 0;; *) usage; exit 1;; esac done +[ $PAUSE = "yes" ] && PAUSE_ON_FAIL="no" + kci_test_rtnl exit $? diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json index 5272049566d6..a4a83fb3e96f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json @@ -1351,5 +1351,54 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "e470", + "name": "Try to delete class referenced by fw after a replace", + "category": [ + "filter", + "fw" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok", + "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action drop" + ], + "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DEV1", + "matchPattern": "class drr 10:1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] + }, + { + "id": "ec1a", + "name": "Replace fw classid with nil", + "category": [ + "filter", + "fw" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw action drop", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 parent 10:", + "matchPattern": "fw chain 0 handle 0x1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json index 1f6f19f02997..8d8de8f65aef 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json @@ -177,5 +177,30 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "b042", + "name": "Try to delete class referenced by route after a replace", + "category": [ + "filter", + "route" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: prio 1 route from 10 classid 10:1 action ok", + "$TC filter replace dev $DEV1 parent 10: prio 1 route from 5 classid 10:1 action drop" + ], + "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DEV1", + "matchPattern": "class drr 10:1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index bd64a4bf11ab..ddc7c355be0a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -247,5 +247,30 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "0c37", + "name": "Try to delete class referenced by u32 after a replace", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC class add dev $DEV1 parent root classid 1 drr", + "$TC filter add dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action ok", + "$TC filter replace dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action drop" + ], + "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DEV1", + "matchPattern": "class drr 10:1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 01b636d3039a..6779d5008b27 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -211,102 +211,138 @@ int vsock_seqpacket_accept(unsigned int cid, unsigned int port, return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET); } -/* Transmit one byte and check the return value. +/* Transmit bytes from a buffer and check the return value. * * expected_ret: * <0 Negative errno (for testing errors) * 0 End-of-file - * 1 Success + * >0 Success (bytes successfully written) */ -void send_byte(int fd, int expected_ret, int flags) +void send_buf(int fd, const void *buf, size_t len, int flags, + ssize_t expected_ret) { - const uint8_t byte = 'A'; - ssize_t nwritten; + ssize_t nwritten = 0; + ssize_t ret; timeout_begin(TIMEOUT); do { - nwritten = send(fd, &byte, sizeof(byte), flags); - timeout_check("write"); - } while (nwritten < 0 && errno == EINTR); + ret = send(fd, buf + nwritten, len - nwritten, flags); + timeout_check("send"); + + if (ret == 0 || (ret < 0 && errno != EINTR)) + break; + + nwritten += ret; + } while (nwritten < len); timeout_end(); if (expected_ret < 0) { - if (nwritten != -1) { - fprintf(stderr, "bogus send(2) return value %zd\n", - nwritten); + if (ret != -1) { + fprintf(stderr, "bogus send(2) return value %zd (expected %zd)\n", + ret, expected_ret); exit(EXIT_FAILURE); } if (errno != -expected_ret) { - perror("write"); + perror("send"); exit(EXIT_FAILURE); } return; } - if (nwritten < 0) { - perror("write"); + if (ret < 0) { + perror("send"); exit(EXIT_FAILURE); } - if (nwritten == 0) { - if (expected_ret == 0) - return; - fprintf(stderr, "unexpected EOF while sending byte\n"); - exit(EXIT_FAILURE); - } - if (nwritten != sizeof(byte)) { - fprintf(stderr, "bogus send(2) return value %zd\n", nwritten); + if (nwritten != expected_ret) { + if (ret == 0) + fprintf(stderr, "unexpected EOF while sending bytes\n"); + + fprintf(stderr, "bogus send(2) bytes written %zd (expected %zd)\n", + nwritten, expected_ret); exit(EXIT_FAILURE); } } -/* Receive one byte and check the return value. +/* Receive bytes in a buffer and check the return value. * * expected_ret: * <0 Negative errno (for testing errors) * 0 End-of-file - * 1 Success + * >0 Success (bytes successfully read) */ -void recv_byte(int fd, int expected_ret, int flags) +void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) { - uint8_t byte; - ssize_t nread; + ssize_t nread = 0; + ssize_t ret; timeout_begin(TIMEOUT); do { - nread = recv(fd, &byte, sizeof(byte), flags); - timeout_check("read"); - } while (nread < 0 && errno == EINTR); + ret = recv(fd, buf + nread, len - nread, flags); + timeout_check("recv"); + + if (ret == 0 || (ret < 0 && errno != EINTR)) + break; + + nread += ret; + } while (nread < len); timeout_end(); if (expected_ret < 0) { - if (nread != -1) { - fprintf(stderr, "bogus recv(2) return value %zd\n", - nread); + if (ret != -1) { + fprintf(stderr, "bogus recv(2) return value %zd (expected %zd)\n", + ret, expected_ret); exit(EXIT_FAILURE); } if (errno != -expected_ret) { - perror("read"); + perror("recv"); exit(EXIT_FAILURE); } return; } - if (nread < 0) { - perror("read"); + if (ret < 0) { + perror("recv"); exit(EXIT_FAILURE); } - if (nread == 0) { - if (expected_ret == 0) - return; - fprintf(stderr, "unexpected EOF while receiving byte\n"); - exit(EXIT_FAILURE); - } - if (nread != sizeof(byte)) { - fprintf(stderr, "bogus recv(2) return value %zd\n", nread); + if (nread != expected_ret) { + if (ret == 0) + fprintf(stderr, "unexpected EOF while receiving bytes\n"); + + fprintf(stderr, "bogus recv(2) bytes read %zd (expected %zd)\n", + nread, expected_ret); exit(EXIT_FAILURE); } +} + +/* Transmit one byte and check the return value. + * + * expected_ret: + * <0 Negative errno (for testing errors) + * 0 End-of-file + * 1 Success + */ +void send_byte(int fd, int expected_ret, int flags) +{ + const uint8_t byte = 'A'; + + send_buf(fd, &byte, sizeof(byte), flags, expected_ret); +} + +/* Receive one byte and check the return value. + * + * expected_ret: + * <0 Negative errno (for testing errors) + * 0 End-of-file + * 1 Success + */ +void recv_byte(int fd, int expected_ret, int flags) +{ + uint8_t byte; + + recv_buf(fd, &byte, sizeof(byte), flags, expected_ret); + if (byte != 'A') { fprintf(stderr, "unexpected byte read %c\n", byte); exit(EXIT_FAILURE); diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index fb99208a95ea..e5407677ce05 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -42,6 +42,9 @@ int vsock_stream_accept(unsigned int cid, unsigned int port, int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); +void send_buf(int fd, const void *buf, size_t len, int flags, + ssize_t expected_ret); +void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret); void send_byte(int fd, int expected_ret, int flags); void recv_byte(int fd, int expected_ret, int flags); void run_tests(const struct test_case *test_cases, diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 90718c2fd4ea..da4cb819a183 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -19,6 +19,7 @@ #include <time.h> #include <sys/mman.h> #include <poll.h> +#include <signal.h> #include "timeout.h" #include "control.h" @@ -261,7 +262,6 @@ static void test_msg_peek_client(const struct test_opts *opts, bool seqpacket) { unsigned char buf[MSG_PEEK_BUF_LEN]; - ssize_t send_size; int fd; int i; @@ -280,17 +280,7 @@ static void test_msg_peek_client(const struct test_opts *opts, control_expectln("SRVREADY"); - send_size = send(fd, buf, sizeof(buf), 0); - - if (send_size < 0) { - perror("send"); - exit(EXIT_FAILURE); - } - - if (send_size != sizeof(buf)) { - fprintf(stderr, "Invalid send size %zi\n", send_size); - exit(EXIT_FAILURE); - } + send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); close(fd); } @@ -301,7 +291,6 @@ static void test_msg_peek_server(const struct test_opts *opts, unsigned char buf_half[MSG_PEEK_BUF_LEN / 2]; unsigned char buf_normal[MSG_PEEK_BUF_LEN]; unsigned char buf_peek[MSG_PEEK_BUF_LEN]; - ssize_t res; int fd; if (seqpacket) @@ -315,34 +304,16 @@ static void test_msg_peek_server(const struct test_opts *opts, } /* Peek from empty socket. */ - res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT); - if (res != -1) { - fprintf(stderr, "expected recv(2) failure, got %zi\n", res); - exit(EXIT_FAILURE); - } - - if (errno != EAGAIN) { - perror("EAGAIN expected"); - exit(EXIT_FAILURE); - } + recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT, + -EAGAIN); control_writeln("SRVREADY"); /* Peek part of data. */ - res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK); - if (res != sizeof(buf_half)) { - fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n", - sizeof(buf_half), res); - exit(EXIT_FAILURE); - } + recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK, sizeof(buf_half)); /* Peek whole data. */ - res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK); - if (res != sizeof(buf_peek)) { - fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n", - sizeof(buf_peek), res); - exit(EXIT_FAILURE); - } + recv_buf(fd, buf_peek, sizeof(buf_peek), MSG_PEEK, sizeof(buf_peek)); /* Compare partial and full peek. */ if (memcmp(buf_half, buf_peek, sizeof(buf_half))) { @@ -355,22 +326,11 @@ static void test_msg_peek_server(const struct test_opts *opts, * so check it with MSG_PEEK. We must get length * of the message. */ - res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK | - MSG_TRUNC); - if (res != sizeof(buf_peek)) { - fprintf(stderr, - "recv(2) + MSG_PEEK | MSG_TRUNC, exp %zu, got %zi\n", - sizeof(buf_half), res); - exit(EXIT_FAILURE); - } + recv_buf(fd, buf_half, sizeof(buf_half), MSG_PEEK | MSG_TRUNC, + sizeof(buf_peek)); } - res = recv(fd, buf_normal, sizeof(buf_normal), 0); - if (res != sizeof(buf_normal)) { - fprintf(stderr, "recv(2), expected %zu, got %zi\n", - sizeof(buf_normal), res); - exit(EXIT_FAILURE); - } + recv_buf(fd, buf_normal, sizeof(buf_normal), 0, sizeof(buf_normal)); /* Compare full peek and normal read. */ if (memcmp(buf_peek, buf_normal, sizeof(buf_peek))) { @@ -415,7 +375,6 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; for (int i = 0; i < msg_count; i++) { - ssize_t send_size; size_t buf_size; int flags; void *buf; @@ -443,17 +402,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) flags = 0; } - send_size = send(fd, buf, buf_size, flags); - - if (send_size < 0) { - perror("send"); - exit(EXIT_FAILURE); - } - - if (send_size != buf_size) { - fprintf(stderr, "Invalid send size\n"); - exit(EXIT_FAILURE); - } + send_buf(fd, buf, buf_size, flags, buf_size); /* * Hash sum is computed at both client and server in @@ -554,10 +503,7 @@ static void test_seqpacket_msg_trunc_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - if (send(fd, buf, sizeof(buf), 0) != sizeof(buf)) { - perror("send failed"); - exit(EXIT_FAILURE); - } + send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_writeln("SENDDONE"); close(fd); @@ -679,7 +625,6 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) static void test_seqpacket_bigmsg_client(const struct test_opts *opts) { unsigned long sock_buf_size; - ssize_t send_size; socklen_t len; void *data; int fd; @@ -706,18 +651,7 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - send_size = send(fd, data, sock_buf_size, 0); - if (send_size != -1) { - fprintf(stderr, "expected 'send(2)' failure, got %zi\n", - send_size); - exit(EXIT_FAILURE); - } - - if (errno != EMSGSIZE) { - fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n", - errno); - exit(EXIT_FAILURE); - } + send_buf(fd, data, sock_buf_size, 0, -EMSGSIZE); control_writeln("CLISENT"); @@ -771,15 +705,9 @@ static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opt memset(buf1, BUF_PATTERN_1, buf_size); memset(buf2, BUF_PATTERN_2, buf_size); - if (send(fd, buf1, buf_size, 0) != buf_size) { - perror("send failed"); - exit(EXIT_FAILURE); - } + send_buf(fd, buf1, buf_size, 0, buf_size); - if (send(fd, buf2, buf_size, 0) != buf_size) { - perror("send failed"); - exit(EXIT_FAILURE); - } + send_buf(fd, buf2, buf_size, 0, buf_size); close(fd); } @@ -900,7 +828,6 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) unsigned long lowat_val = RCVLOWAT_BUF_SIZE; char buf[RCVLOWAT_BUF_SIZE]; struct pollfd fds; - ssize_t read_res; short poll_flags; int fd; @@ -955,12 +882,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) /* Use MSG_DONTWAIT, if call is going to wait, EAGAIN * will be returned. */ - read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); - if (read_res != RCVLOWAT_BUF_SIZE) { - fprintf(stderr, "Unexpected recv result %zi\n", - read_res); - exit(EXIT_FAILURE); - } + recv_buf(fd, buf, sizeof(buf), MSG_DONTWAIT, RCVLOWAT_BUF_SIZE); control_writeln("POLLDONE"); @@ -972,7 +894,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) static void test_inv_buf_client(const struct test_opts *opts, bool stream) { unsigned char data[INV_BUF_TEST_DATA_LEN] = {0}; - ssize_t ret; + ssize_t expected_ret; int fd; if (stream) @@ -988,39 +910,18 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream) control_expectln("SENDDONE"); /* Use invalid buffer here. */ - ret = recv(fd, NULL, sizeof(data), 0); - if (ret != -1) { - fprintf(stderr, "expected recv(2) failure, got %zi\n", ret); - exit(EXIT_FAILURE); - } - - if (errno != EFAULT) { - fprintf(stderr, "unexpected recv(2) errno %d\n", errno); - exit(EXIT_FAILURE); - } - - ret = recv(fd, data, sizeof(data), MSG_DONTWAIT); + recv_buf(fd, NULL, sizeof(data), 0, -EFAULT); if (stream) { /* For SOCK_STREAM we must continue reading. */ - if (ret != sizeof(data)) { - fprintf(stderr, "expected recv(2) success, got %zi\n", ret); - exit(EXIT_FAILURE); - } - /* Don't check errno in case of success. */ + expected_ret = sizeof(data); } else { /* For SOCK_SEQPACKET socket's queue must be empty. */ - if (ret != -1) { - fprintf(stderr, "expected recv(2) failure, got %zi\n", ret); - exit(EXIT_FAILURE); - } - - if (errno != EAGAIN) { - fprintf(stderr, "unexpected recv(2) errno %d\n", errno); - exit(EXIT_FAILURE); - } + expected_ret = -EAGAIN; } + recv_buf(fd, data, sizeof(data), MSG_DONTWAIT, expected_ret); + control_writeln("DONE"); close(fd); @@ -1029,7 +930,6 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream) static void test_inv_buf_server(const struct test_opts *opts, bool stream) { unsigned char data[INV_BUF_TEST_DATA_LEN] = {0}; - ssize_t res; int fd; if (stream) @@ -1042,11 +942,7 @@ static void test_inv_buf_server(const struct test_opts *opts, bool stream) exit(EXIT_FAILURE); } - res = send(fd, data, sizeof(data), 0); - if (res != sizeof(data)) { - fprintf(stderr, "unexpected send(2) result %zi\n", res); - exit(EXIT_FAILURE); - } + send_buf(fd, data, sizeof(data), 0, sizeof(data)); control_writeln("SENDDONE"); @@ -1080,7 +976,6 @@ static void test_seqpacket_inv_buf_server(const struct test_opts *opts) static void test_stream_virtio_skb_merge_client(const struct test_opts *opts) { - ssize_t res; int fd; fd = vsock_stream_connect(opts->peer_cid, 1234); @@ -1090,22 +985,14 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts) } /* Send first skbuff. */ - res = send(fd, HELLO_STR, strlen(HELLO_STR), 0); - if (res != strlen(HELLO_STR)) { - fprintf(stderr, "unexpected send(2) result %zi\n", res); - exit(EXIT_FAILURE); - } + send_buf(fd, HELLO_STR, strlen(HELLO_STR), 0, strlen(HELLO_STR)); control_writeln("SEND0"); /* Peer reads part of first skbuff. */ control_expectln("REPLY0"); /* Send second skbuff, it will be appended to the first. */ - res = send(fd, WORLD_STR, strlen(WORLD_STR), 0); - if (res != strlen(WORLD_STR)) { - fprintf(stderr, "unexpected send(2) result %zi\n", res); - exit(EXIT_FAILURE); - } + send_buf(fd, WORLD_STR, strlen(WORLD_STR), 0, strlen(WORLD_STR)); control_writeln("SEND1"); /* Peer reads merged skbuff packet. */ @@ -1116,8 +1003,8 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts) static void test_stream_virtio_skb_merge_server(const struct test_opts *opts) { + size_t read = 0, to_read; unsigned char buf[64]; - ssize_t res; int fd; fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); @@ -1129,26 +1016,21 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts) control_expectln("SEND0"); /* Read skbuff partially. */ - res = recv(fd, buf, 2, 0); - if (res != 2) { - fprintf(stderr, "expected recv(2) returns 2 bytes, got %zi\n", res); - exit(EXIT_FAILURE); - } + to_read = 2; + recv_buf(fd, buf + read, to_read, 0, to_read); + read += to_read; control_writeln("REPLY0"); control_expectln("SEND1"); - res = recv(fd, buf + 2, sizeof(buf) - 2, 0); - if (res != 8) { - fprintf(stderr, "expected recv(2) returns 8 bytes, got %zi\n", res); - exit(EXIT_FAILURE); - } + /* Read the rest of both buffers */ + to_read = strlen(HELLO_STR WORLD_STR) - read; + recv_buf(fd, buf + read, to_read, 0, to_read); + read += to_read; - res = recv(fd, buf, sizeof(buf) - 8 - 2, MSG_DONTWAIT); - if (res != -1) { - fprintf(stderr, "expected recv(2) failure, got %zi\n", res); - exit(EXIT_FAILURE); - } + /* No more bytes should be there */ + to_read = sizeof(buf) - read; + recv_buf(fd, buf + read, to_read, MSG_DONTWAIT, -EAGAIN); if (memcmp(buf, HELLO_STR WORLD_STR, strlen(HELLO_STR WORLD_STR))) { fprintf(stderr, "pattern mismatch\n"); @@ -1170,6 +1052,133 @@ static void test_seqpacket_msg_peek_server(const struct test_opts *opts) return test_msg_peek_server(opts, true); } +static sig_atomic_t have_sigpipe; + +static void sigpipe(int signo) +{ + have_sigpipe = 1; +} + +static void test_stream_check_sigpipe(int fd) +{ + ssize_t res; + + have_sigpipe = 0; + + res = send(fd, "A", 1, 0); + if (res != -1) { + fprintf(stderr, "expected send(2) failure, got %zi\n", res); + exit(EXIT_FAILURE); + } + + if (!have_sigpipe) { + fprintf(stderr, "SIGPIPE expected\n"); + exit(EXIT_FAILURE); + } + + have_sigpipe = 0; + + res = send(fd, "A", 1, MSG_NOSIGNAL); + if (res != -1) { + fprintf(stderr, "expected send(2) failure, got %zi\n", res); + exit(EXIT_FAILURE); + } + + if (have_sigpipe) { + fprintf(stderr, "SIGPIPE not expected\n"); + exit(EXIT_FAILURE); + } +} + +static void test_stream_shutwr_client(const struct test_opts *opts) +{ + int fd; + + struct sigaction act = { + .sa_handler = sigpipe, + }; + + sigaction(SIGPIPE, &act, NULL); + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (shutdown(fd, SHUT_WR)) { + perror("shutdown"); + exit(EXIT_FAILURE); + } + + test_stream_check_sigpipe(fd); + + control_writeln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutwr_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutrd_client(const struct test_opts *opts) +{ + int fd; + + struct sigaction act = { + .sa_handler = sigpipe, + }; + + sigaction(SIGPIPE, &act, NULL); + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SHUTRDDONE"); + + test_stream_check_sigpipe(fd); + + control_writeln("CLIENTDONE"); + + close(fd); +} + +static void test_stream_shutrd_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + if (shutdown(fd, SHUT_RD)) { + perror("shutdown"); + exit(EXIT_FAILURE); + } + + control_writeln("SHUTRDDONE"); + control_expectln("CLIENTDONE"); + + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1250,6 +1259,16 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_msg_peek_client, .run_server = test_seqpacket_msg_peek_server, }, + { + .name = "SOCK_STREAM SHUT_WR", + .run_client = test_stream_shutwr_client, + .run_server = test_stream_shutwr_server, + }, + { + .name = "SOCK_STREAM SHUT_RD", + .run_client = test_stream_shutrd_client, + .run_server = test_stream_shutrd_server, + }, {}, }; |