diff options
363 files changed, 12648 insertions, 5390 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-janz-cmodio b/Documentation/ABI/testing/sysfs-bus-pci-drivers-janz-cmodio new file mode 100644 index 000000000000..4d08f28dc871 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-janz-cmodio @@ -0,0 +1,8 @@ +What: /sys/bus/pci/drivers/janz-cmodio/.../modulbus_number +Date: May 2010 +KernelVersion: 2.6.35 +Contact: Ira W. Snyder <ira.snyder@gmail.com> +Description: + Value representing the HEX switch S2 of the janz carrier board CMOD-IO or CAN-PCI2 + + Read-only: value of the configuration switch (0..15) diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index 5ecfd72ba684..668604fc8e06 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -39,6 +39,25 @@ Description: Format is a string, e.g: 00:11:22:33:44:55 for an Ethernet MAC address. +What: /sys/class/net/<bridge iface>/bridge/group_fwd_mask +Date: January 2012 +KernelVersion: 3.2 +Contact: netdev@vger.kernel.org +Description: + Bitmask to allow forwarding of link local frames with address + 01-80-C2-00-00-0X on a bridge device. Only values that set bits + not matching BR_GROUPFWD_RESTRICTED in net/bridge/br_private.h + allowed. + Default value 0 does not forward any link local frames. + + Restricted bits: + 0: 01-80-C2-00-00-00 Bridge Group Address used for STP + 1: 01-80-C2-00-00-01 (MAC Control) 802.3 used for MAC PAUSE + 2: 01-80-C2-00-00-02 (Link Aggregation) 802.3ad + + Any values not setting these bits can be used. Take special + care when forwarding control frames e.g. 802.1X-PAE or LLDP. + What: /sys/class/net/<iface>/broadcast Date: April 2005 KernelVersion: 2.6.12 diff --git a/Documentation/ABI/testing/sysfs-class-net-janz-ican3 b/Documentation/ABI/testing/sysfs-class-net-janz-ican3 new file mode 100644 index 000000000000..fdbc03a2b8f8 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-net-janz-ican3 @@ -0,0 +1,19 @@ +What: /sys/class/net/<iface>/termination +Date: May 2010 +KernelVersion: 2.6.35 +Contact: Ira W. Snyder <ira.snyder@gmail.com> +Description: + Value representing the can bus termination + + Default: 1 (termination active) + Reading: get actual termination state + Writing: set actual termination state (0=no termination, 1=termination active) + +What: /sys/class/net/<iface>/fwinfo +Date: May 2015 +KernelVersion: 3.19 +Contact: Andreas Gröger <andreas24groeger@gmail.com> +Description: + Firmware stamp of ican3 module + Read-only: 32 byte string identification of the ICAN3 module + (known values: "JANZ-ICAN3 ICANOS 1.xx", "JANZ-ICAN3 CAL/CANopen 1.xx") diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index ba19d671e808..8ec5fdf444e9 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -9,6 +9,7 @@ Required properties: the Cadence GEM, or the generic form: "cdns,gem". Use "cdns,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs. Use "cdns,sama5d4-gem" for the Gigabit IP available on Atmel sama5d4 SoCs. + Use "cdns,zynqmp-gem" for Zynq Ultrascale+ MPSoC. - reg: Address and length of the register set for the device - interrupts: Should contain macb interrupt - phy-mode: See ethernet.txt file in the same directory. diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 83bf4986baea..334b49ef02d1 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -51,6 +51,7 @@ Table of Contents 3.4 Configuring Bonding Manually via Sysfs 3.5 Configuration with Interfaces Support 3.6 Overriding Configuration for Special Cases +3.7 Configuring LACP for 802.3ad mode in a more secure way 4. Querying Bonding Configuration 4.1 Bonding Configuration @@ -178,6 +179,27 @@ active_slave active slave, or the empty string if there is no active slave or the current mode does not use an active slave. +ad_actor_sys_prio + + In an AD system, this specifies the system priority. The allowed range + is 1 - 65535. If the value is not specified, it takes 65535 as the + default value. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + +ad_actor_system + + In an AD system, this specifies the mac-address for the actor in + protocol packet exchanges (LACPDUs). The value cannot be NULL or + multicast. It is preferred to have the local-admin bit set for this + mac but driver does not enforce it. If the value is not given then + system defaults to using the masters' mac address as actors' system + address. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + ad_select Specifies the 802.3ad aggregation selection logic to use. The @@ -220,6 +242,21 @@ ad_select This option was added in bonding version 3.4.0. +ad_user_port_key + + In an AD system, the port-key has three parts as shown below - + + Bits Use + 00 Duplex + 01-05 Speed + 06-15 User-defined + + This defines the upper 10 bits of the port key. The values can be + from 0 - 1023. If not given, the system defaults to 0. + + This parameter has effect only in 802.3ad mode and is available through + SysFs interface. + all_slaves_active Specifies that duplicate frames (received on inactive ports) should be @@ -1622,6 +1659,53 @@ output port selection. This feature first appeared in bonding driver version 3.7.0 and support for output slave selection was limited to round-robin and active-backup modes. +3.7 Configuring LACP for 802.3ad mode in a more secure way +---------------------------------------------------------- + +When using 802.3ad bonding mode, the Actor (host) and Partner (switch) +exchange LACPDUs. These LACPDUs cannot be sniffed, because they are +destined to link local mac addresses (which switches/bridges are not +supposed to forward). However, most of the values are easily predictable +or are simply the machine's MAC address (which is trivially known to all +other hosts in the same L2). This implies that other machines in the L2 +domain can spoof LACPDU packets from other hosts to the switch and potentially +cause mayhem by joining (from the point of view of the switch) another +machine's aggregate, thus receiving a portion of that hosts incoming +traffic and / or spoofing traffic from that machine themselves (potentially +even successfully terminating some portion of flows). Though this is not +a likely scenario, one could avoid this possibility by simply configuring +few bonding parameters: + + (a) ad_actor_system : You can set a random mac-address that can be used for + these LACPDU exchanges. The value can not be either NULL or Multicast. + Also it's preferable to set the local-admin bit. Following shell code + generates a random mac-address as described above. + + # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \ + $(( (RANDOM & 0xFE) | 0x02 )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF )) \ + $(( RANDOM & 0xFF ))) + # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system + + (b) ad_actor_sys_prio : Randomize the system priority. The default value + is 65535, but system can take the value from 1 - 65535. Following shell + code generates random priority and sets it. + + # sys_prio=$(( 1 + RANDOM + RANDOM )) + # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio + + (c) ad_user_port_key : Use the user portion of the port-key. The default + keeps this empty. These are the upper 10 bits of the port-key and value + ranges from 0 - 1023. Following shell code generates these 10 bits and + sets it. + + # usr_port_key=$(( RANDOM & 0x3FF )) + # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key + + 4 Querying Bonding Configuration ================================= diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 5abad1e921ca..b48d4a149411 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -268,6 +268,9 @@ solution for a couple of reasons: struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ __u8 can_dlc; /* frame payload length in byte (0 .. 8) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ __u8 data[8] __attribute__((aligned(8))); }; diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 071fb18dc57c..5095c63e50ed 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1213,6 +1213,14 @@ auto_flowlabels - BOOLEAN FALSE: disabled Default: false +flowlabel_state_ranges - BOOLEAN + Split the flow label number space into two ranges. 0-0x7FFFF is + reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF + is reserved for stateless flow labels as described in RFC6437. + TRUE: enabled + FALSE: disabled + Default: true + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index 0344f1d45b37..747faccc4120 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -145,6 +145,7 @@ Examples: UDPCSUM, IPSEC # IPsec encapsulation (needs CONFIG_XFRM) NODE_ALLOC # node specific memory allocation + NO_TIMESTAMP # disable timestamping pgset spi SPI_VALUE Set specific SA used to transform packet. @@ -192,6 +193,10 @@ Examples: pgset "rate 300M" set rate to 300 Mb/s pgset "ratep 1000000" set rate to 1Mpps + pgset "xmit_mode netif_receive" RX inject into stack netif_receive_skb() + Works with "burst" but not with "clone_skb". + Default xmit_mode is "start_xmit". + Sample scripts ============== @@ -287,6 +292,7 @@ flag UDPCSUM IPSEC NODE_ALLOC + NO_TIMESTAMP dst_min dst_max @@ -308,6 +314,9 @@ flowlen rate ratep +xmit_mode <start_xmit|netif_receive> + + References: ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/ ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/ diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index f981a9295a39..616f89267d23 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -1,59 +1,358 @@ -Switch (and switch-ish) device drivers HOWTO -=========================== - -Please note that the word "switch" is here used in very generic meaning. -This include devices supporting L2/L3 but also various flow offloading chips, -including switches embedded into SR-IOV NICs. - -Lets describe a topology a bit. Imagine the following example: - - +----------------------------+ +---------------+ - | SOME switch chip | | CPU | - +----------------------------+ +---------------+ - port1 port2 port3 port4 MNGMNT | PCI-E | - | | | | | +---------------+ - PHY PHY | | | | NIC0 NIC1 - | | | | | | - | | +- PCI-E -+ | | - | +------- MII -------+ | - +------------- MII ------------+ - -In this example, there are two independent lines between the switch silicon -and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are -separate from the switch driver. SOME switch chip is by managed by a driver -via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be -connected to some other type of bus. - -Now, for the previous example show the representation in kernel: - - +----------------------------+ +---------------+ - | SOME switch chip | | CPU | - +----------------------------+ +---------------+ - sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT | PCI-E | - | | | | | +---------------+ - PHY PHY | | | | eth0 eth1 - | | | | | | - | | +- PCI-E -+ | | - | +------- MII -------+ | - +------------- MII ------------+ - -Lets call the example switch driver for SOME switch chip "SOMEswitch". This -driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX -created for each port of a switch. These netdevices are instances -of "SOMEswitch" driver. sw0pX netdevices serve as a "representation" -of the switch chip. eth0 and eth1 are instances of some other existing driver. - -The only difference of the switch-port netdevice from the ordinary netdevice -is that is implements couple more NDOs: - - ndo_switch_parent_id_get - This returns the same ID for two port netdevices - of the same physical switch chip. This is - mandatory to be implemented by all switch drivers - and serves the caller for recognition of a port - netdevice. - ndo_switch_parent_* - Functions that serve for a manipulation of the switch - chip itself (it can be though of as a "parent" of the - port, therefore the name). They are not port-specific. - Caller might use arbitrary port netdevice of the same - switch and it will make no difference. - ndo_switch_port_* - Functions that serve for a port-specific manipulation. +Ethernet switch device driver model (switchdev) +=============================================== +Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> +Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> + + +The Ethernet switch device driver model (switchdev) is an in-kernel driver +model for switch devices which offload the forwarding (data) plane from the +kernel. + +Figure 1 is a block diagram showing the components of the switchdev model for +an example setup using a data-center-class switch ASIC chip. Other setups +with SR-IOV or soft switches, such as OVS, are possible. + + +                             User-space tools                                  +                                                                               +       user space                   |                                          +      +-------------------------------------------------------------------+    +       kernel                       | Netlink                                  +                                    |                                          +                     +--------------+-------------------------------+          +                     |         Network stack                        |          +                     |           (Linux)                            |          +                     |                                              |          +                     +----------------------------------------------+          +                                                                               + sw1p2 sw1p4 sw1p6 +                      sw1p1  + sw1p3 +  sw1p5 +         eth1              +                        +    |    +    |    +    |            +                +                        |    |    |    |    |    |            |                +                     +--+----+----+----+-+--+----+---+  +-----+-----+          +                     |         Switch driver         |  |    mgmt   |          +                     |        (this document)        |  |   driver  |          +                     |                               |  |           |          +                     +--------------+----------------+  +-----------+          +                                    |                                          +       kernel                       | HW bus (eg PCI)                          +      +-------------------------------------------------------------------+    +       hardware                     |                                          +                     +--------------+---+------------+                         +                     |         Switch device (sw1)   |                         +                     |  +----+                       +--------+                +                     |  |    v offloaded data path   | mgmt port               +                     |  |    |                       |                         +                     +--|----|----+----+----+----+---+                         +                        |    |    |    |    |    |                             +                        +    +    +    +    +    +                             +                       p1   p2   p3   p4   p5   p6 +                                        +                             front-panel ports                                 +                                                                               + + Fig 1. + + +Include Files +------------- + +#include <linux/netdevice.h> +#include <net/switchdev.h> + + +Configuration +------------- + +Use "depends NET_SWITCHDEV" in driver's Kconfig to ensure switchdev model +support is built for driver. + + +Switch Ports +------------ + +On switchdev driver initialization, the driver will allocate and register a +struct net_device (using register_netdev()) for each enumerated physical switch +port, called the port netdev. A port netdev is the software representation of +the physical port and provides a conduit for control traffic to/from the +controller (the kernel) and the network, as well as an anchor point for higher +level constructs such as bridges, bonds, VLANs, tunnels, and L3 routers. Using +standard netdev tools (iproute2, ethtool, etc), the port netdev can also +provide to the user access to the physical properties of the switch port such +as PHY link state and I/O statistics. + +There is (currently) no higher-level kernel object for the switch beyond the +port netdevs. All of the switchdev driver ops are netdev ops or switchdev ops. + +A switch management port is outside the scope of the switchdev driver model. +Typically, the management port is not participating in offloaded data plane and +is loaded with a different driver, such as a NIC driver, on the management port +device. + +Port Netdev Naming +^^^^^^^^^^^^^^^^^^ + +Udev rules should be used for port netdev naming, using some unique attribute +of the port as a key, for example the port MAC address or the port PHYS name. +Hard-coding of kernel netdev names within the driver is discouraged; let the +kernel pick the default netdev name, and let udev set the final name based on a +port attribute. + +Using port PHYS name (ndo_get_phys_port_name) for the key is particularly +useful for dynamically-named ports where the device names its ports based on +external configuration. For example, if a physical 40G port is split logically +into 4 10G ports, resulting in 4 port netdevs, the device can give a unique +name for each port using port PHYS name. The udev rule would be: + +SUBSYSTEM=="net", ACTION=="add", DRIVER="<driver>", ATTR{phys_port_name}!="", \ + NAME="$attr{phys_port_name}" + +Suggested naming convention is "swXpYsZ", where X is the switch name or ID, Y +is the port name or ID, and Z is the sub-port name or ID. For example, sw1p1s0 +would be sub-port 0 on port 1 on switch 1. + +Switch ID +^^^^^^^^^ + +The switchdev driver must implement the switchdev op switchdev_port_attr_get for +SWITCHDEV_ATTR_PORT_PARENT_ID for each port netdev, returning the same physical ID +for each port of a switch. The ID must be unique between switches on the same +system. The ID does not need to be unique between switches on different +systems. + +The switch ID is used to locate ports on a switch and to know if aggregated +ports belong to the same switch. + +Port Features +^^^^^^^^^^^^^ + +NETIF_F_NETNS_LOCAL + +If the switchdev driver (and device) only supports offloading of the default +network namespace (netns), the driver should set this feature flag to prevent +the port netdev from being moved out of the default netns. A netns-aware +driver/device would not set this flag and be responsible for partitioning +hardware to preserve netns containment. This means hardware cannot forward +traffic from a port in one namespace to another port in another namespace. + +Port Topology +^^^^^^^^^^^^^ + +The port netdevs representing the physical switch ports can be organized into +higher-level switching constructs. The default construct is a standalone +router port, used to offload L3 forwarding. Two or more ports can be bonded +together to form a LAG. Two or more ports (or LAGs) can be bridged to bridge +to L2 networks. VLANs can be applied to sub-divide L2 networks. L2-over-L3 +tunnels can be built on ports. These constructs are built using standard Linux +tools such as the bridge driver, the bonding/team drivers, and netlink-based +tools such as iproute2. + +The switchdev driver can know a particular port's position in the topology by +monitoring NETDEV_CHANGEUPPER notifications. For example, a port moved into a +bond will see it's upper master change. If that bond is moved into a bridge, +the bond's upper master will change. And so on. The driver will track such +movements to know what position a port is in in the overall topology by +registering for netdevice events and acting on NETDEV_CHANGEUPPER. + +L2 Forwarding Offload +--------------------- + +The idea is to offload the L2 data forwarding (switching) path from the kernel +to the switchdev device by mirroring bridge FDB entries down to the device. An +FDB entry is the {port, MAC, VLAN} tuple forwarding destination. + +To offloading L2 bridging, the switchdev driver/device should support: + + - Static FDB entries installed on a bridge port + - Notification of learned/forgotten src mac/vlans from device + - STP state changes on the port + - VLAN flooding of multicast/broadcast and unknown unicast packets + +Static FDB Entries +^^^^^^^^^^^^^^^^^^ + +The switchdev driver should implement ndo_fdb_add, ndo_fdb_del and ndo_fdb_dump +to support static FDB entries installed to the device. Static bridge FDB +entries are installed, for example, using iproute2 bridge cmd: + + bridge fdb add ADDR dev DEV [vlan VID] [self] + +XXX: what should be done if offloading this rule to hardware fails (for +example, due to full capacity in hardware tables) ? + +Note: by default, the bridge does not filter on VLAN and only bridges untagged +traffic. To enable VLAN support, turn on VLAN filtering: + + echo 1 >/sys/class/net/<bridge>/bridge/vlan_filtering + +Notification of Learned/Forgotten Source MAC/VLANs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The switch device will learn/forget source MAC address/VLAN on ingress packets +and notify the switch driver of the mac/vlan/port tuples. The switch driver, +in turn, will notify the bridge driver using the switchdev notifier call: + + err = call_switchdev_notifiers(val, dev, info); + +Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when forgetting, and +info points to a struct switchdev_notifier_fdb_info. On SWITCHDEV_FDB_ADD, the bridge +driver will install the FDB entry into the bridge's FDB and mark the entry as +NTF_EXT_LEARNED. The iproute2 bridge command will label these entries +"offload": + + $ bridge fdb + 52:54:00:12:35:01 dev sw1p1 master br0 permanent + 00:02:00:00:02:00 dev sw1p1 master br0 offload + 00:02:00:00:02:00 dev sw1p1 self + 52:54:00:12:35:02 dev sw1p2 master br0 permanent + 00:02:00:00:03:00 dev sw1p2 master br0 offload + 00:02:00:00:03:00 dev sw1p2 self + 33:33:00:00:00:01 dev eth0 self permanent + 01:00:5e:00:00:01 dev eth0 self permanent + 33:33:ff:00:00:00 dev eth0 self permanent + 01:80:c2:00:00:0e dev eth0 self permanent + 33:33:00:00:00:01 dev br0 self permanent + 01:00:5e:00:00:01 dev br0 self permanent + 33:33:ff:12:35:01 dev br0 self permanent + +Learning on the port should be disabled on the bridge using the bridge command: + + bridge link set dev DEV learning off + +Learning on the device port should be enabled, as well as learning_sync: + + bridge link set dev DEV learning on self + bridge link set dev DEV learning_sync on self + +Learning_sync attribute enables syncing of the learned/forgotton FDB entry to +the bridge's FDB. It's possible, but not optimal, to enable learning on the +device port and on the bridge port, and disable learning_sync. + +To support learning and learning_sync port attributes, the driver implements +switchdev op switchdev_port_attr_get/set for SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS. The driver +should initialize the attributes to the hardware defaults. + +FDB Ageing +^^^^^^^^^^ + +There are two FDB ageing models supported: 1) ageing by the device, and 2) +ageing by the kernel. Ageing by the device is preferred if many FDB entries +are supported. The driver calls call_switchdev_notifiers(SWITCHDEV_FDB_DEL, ...) to +age out the FDB entry. In this model, ageing by the kernel should be turned +off. XXX: how to turn off ageing in kernel on a per-port basis or otherwise +prevent the kernel from ageing out the FDB entry? + +In the kernel ageing model, the standard bridge ageing mechanism is used to age +out stale FDB entries. To keep an FDB entry "alive", the driver should refresh +the FDB entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...). The +notification will reset the FDB entry's last-used time to now. The driver +should rate limit refresh notifications, for example, no more than once a +second. If the FDB entry expires, ndo_fdb_del is called to remove entry from +the device. XXX: this last part isn't currently correct: ndo_fdb_del isn't +called, so the stale entry remains in device...this need to get fixed. + +FDB Flush +^^^^^^^^^ + +XXX: Unimplemented. Need to support FDB flush by bridge driver for port and +remove both static and learned FDB entries. + +STP State Change on Port +^^^^^^^^^^^^^^^^^^^^^^^^ + +Internally or with a third-party STP protocol implementation (e.g. mstpd), the +bridge driver maintains the STP state for ports, and will notify the switch +driver of STP state change on a port using the switchdev op switchdev_attr_port_set for +SWITCHDEV_ATTR_PORT_STP_UPDATE. + +State is one of BR_STATE_*. The switch driver can use STP state updates to +update ingress packet filter list for the port. For example, if port is +DISABLED, no packets should pass, but if port moves to BLOCKED, then STP BPDUs +and other IEEE 01:80:c2:xx:xx:xx link-local multicast packets can pass. + +Note that STP BDPUs are untagged and STP state applies to all VLANs on the port +so packet filters should be applied consistently across untagged and tagged +VLANs on the port. + +Flooding L2 domain +^^^^^^^^^^^^^^^^^^ + +For a given L2 VLAN domain, the switch device should flood multicast/broadcast +and unknown unicast packets to all ports in domain, if allowed by port's +current STP state. The switch driver, knowing which ports are within which +vlan L2 domain, can program the switch device for flooding. The packet should +also be sent to the port netdev for processing by the bridge driver. The +bridge should not reflood the packet to the same ports the device flooded. +XXX: the mechanism to avoid duplicate flood packets is being discuseed. + +It is possible for the switch device to not handle flooding and push the +packets up to the bridge driver for flooding. This is not ideal as the number +of ports scale in the L2 domain as the device is much more efficient at +flooding packets that software. + +IGMP Snooping +^^^^^^^^^^^^^ + +XXX: complete this section + + +L3 routing +---------- + +Offloading L3 routing requires that device be programmed with FIB entries from +the kernel, with the device doing the FIB lookup and forwarding. The device +does a longest prefix match (LPM) on FIB entries matching route prefix and +forwards the packet to the matching FIB entry's nexthop(s) egress ports. To +program the device, the switchdev driver is called with add/delete ops for IPv4 +and IPv6 FIB entries. For IPv4, the driver implements switchdev ops: + + int (*switchdev_fib_ipv4_add)(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id); + + int (*switchdev_fib_ipv4_del)(struct net_device *dev, + __be32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 tb_id); + +to add/delete IPv4 dst/dest_len prefix on table tb_id. The *fi structure holds +details on the route and route's nexthops. *dev is one of the port netdevs +mentioned in the routes next hop list. If the output port netdevs referenced +in the route's nexthop list don't all have the same switch ID, the driver is +not called to add/delete the FIB entry. + +Routes offloaded to the device are labeled with "offload" in the ip route +listing: + + $ ip route show + default via 192.168.0.2 dev eth0 + 11.0.0.0/30 dev sw1p1 proto kernel scope link src 11.0.0.2 offload + 11.0.0.4/30 via 11.0.0.1 dev sw1p1 proto zebra metric 20 offload + 11.0.0.8/30 dev sw1p2 proto kernel scope link src 11.0.0.10 offload + 11.0.0.12/30 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload + 12.0.0.2 proto zebra metric 30 offload + nexthop via 11.0.0.1 dev sw1p1 weight 1 + nexthop via 11.0.0.9 dev sw1p2 weight 1 + 12.0.0.3 via 11.0.0.1 dev sw1p1 proto zebra metric 20 offload + 12.0.0.4 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload + 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.15 + +XXX: add/del IPv6 FIB API + +Nexthop Resolution +^^^^^^^^^^^^^^^^^^ + +The FIB entry's nexthop list contains the nexthop tuple (gateway, dev), but for +the switch device to forward the packet with the correct dst mac address, the +nexthop gateways must be resolved to the neighbor's mac address. Neighbor mac +address discovery comes via the ARP (or ND) process and is available via the +arp_tbl neighbor table. To resolve the routes nexthop gateways, the driver +should trigger the kernel's neighbor resolution process. See the rocker +driver's rocker_port_ipv4_resolve() for an example. + +The driver can monitor for updates to arp_tbl using the netevent notifier +NETEVENT_NEIGH_UPDATE. The device can be programmed with resolved nexthops +for the routes as arp_tbl updates. diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt index 70d6cf608251..95c71716b2e2 100644 --- a/Documentation/networking/tc-actions-env-rules.txt +++ b/Documentation/networking/tc-actions-env-rules.txt @@ -14,8 +14,6 @@ resets them for you, so invoke skb_act_clone() rather than skb_clone(). 2) If you munge any packet thou shalt call pskb_expand_head in the case someone else is referencing the skb. After that you "own" the skb. -You must also tell us if it is ok to munge the packet (TC_OK2MUNGE), -this way any action downstream can stomp on the packet. 3) Dropping packets you don't own is a no-no. You simply return TC_ACT_SHOT to the caller and they will drop it. diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e0e23582c8b4..4550d247e308 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -873,6 +873,16 @@ b_epilogue: off = offsetof(struct sk_buff, queue_mapping); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); break; + case BPF_LDX | BPF_W | BPF_ABS: + /* + * load a 32bit word from struct seccomp_data. + * seccomp_check_filter() will already have checked + * that k is 32bit aligned and lies within the + * struct seccomp_data. + */ + ctx->seen |= SEEN_SKB; + emit(ARM_LDR_I(r_A, r_skb, k), ctx); + break; default: return -1; } diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f22cc56fd1b3..5ad0d5354535 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -244,7 +244,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) if (!type) goto unlock; - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto); + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0); err = -ENOMEM; if (!sk2) goto unlock; @@ -324,7 +324,7 @@ static int alg_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto); + sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto, kern); if (!sk) goto out; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cee20354ac37..c097909c589c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection) memcpy(&peer_in6, &connection->peer_addr, peer_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce memcpy(&my_addr, &connection->my_addr, my_addr_len); what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family, + err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 8dc7290089bb..0d29b5a6356d 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -601,14 +601,14 @@ static const struct proto_ops data_sock_ops = { }; static int -data_sock_create(struct net *net, struct socket *sock, int protocol) +data_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -756,14 +756,14 @@ static const struct proto_ops base_sock_ops = { static int -base_sock_create(struct net *net, struct socket *sock, int protocol) +base_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto); + sk = sk_alloc(net, PF_ISDN, GFP_KERNEL, &mISDN_proto, kern); if (!sk) return -ENOMEM; @@ -785,7 +785,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) switch (proto) { case ISDN_P_BASE: - err = base_sock_create(net, sock, proto); + err = base_sock_create(net, sock, proto, kern); break; case ISDN_P_TE_S0: case ISDN_P_NT_S0: @@ -799,7 +799,7 @@ mISDN_sock_create(struct net *net, struct socket *sock, int proto, int kern) case ISDN_P_B_L2DTMF: case ISDN_P_B_L2DSP: case ISDN_P_B_L2DSPHDLC: - err = data_sock_create(net, sock, proto); + err = data_sock_create(net, sock, proto, kern); break; default: return err; diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c index 433f823037dd..ec1f46a6be3a 100644 --- a/drivers/mfd/janz-cmodio.c +++ b/drivers/mfd/janz-cmodio.c @@ -267,6 +267,10 @@ static void cmodio_pci_remove(struct pci_dev *dev) static const struct pci_device_id cmodio_pci_ids[] = { { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_JANZ, 0x0101 }, { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_JANZ, 0x0100 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_JANZ, 0x0201 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030, PCI_VENDOR_ID_JANZ, 0x0202 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_JANZ, 0x0201 }, + { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, PCI_VENDOR_ID_JANZ, 0x0202 }, { 0, } }; MODULE_DEVICE_TABLE(pci, cmodio_pci_ids); diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index fbd54f0e32e8..7fde4d5c2b28 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -75,10 +75,10 @@ /* Port Key definitions * key is determined according to the link speed, duplex and * user key (which is yet not supported) - * -------------------------------------------------------------- - * Port key : | User key | Speed | Duplex | - * -------------------------------------------------------------- - * 16 6 1 0 + * -------------------------------------------------------------- + * Port key | User key (10 bits) | Speed (5 bits) | Duplex| + * -------------------------------------------------------------- + * |15 6|5 1|0 */ #define AD_DUPLEX_KEY_MASKS 0x1 #define AD_SPEED_KEY_MASKS 0x3E @@ -1908,8 +1908,14 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) BOND_AD_INFO(bond).aggregator_identifier = 0; - BOND_AD_INFO(bond).system.sys_priority = 0xFFFF; - BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); + BOND_AD_INFO(bond).system.sys_priority = + bond->params.ad_actor_sys_prio; + if (is_zero_ether_addr(bond->params.ad_actor_system)) + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->dev->dev_addr); + else + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->params.ad_actor_system); /* initialize how many times this module is called in one * second (should be about every 100ms) @@ -1945,10 +1951,10 @@ void bond_3ad_bind_slave(struct slave *slave) port->slave = slave; port->actor_port_number = SLAVE_AD_INFO(slave)->id; - /* key is determined according to the link speed, duplex and user key(which - * is yet not supported) + /* key is determined according to the link speed, duplex and + * user key */ - port->actor_admin_port_key = 0; + port->actor_admin_port_key = bond->params.ad_user_port_key << 6; port->actor_admin_port_key |= __get_duplex(port); port->actor_admin_port_key |= (__get_link_speed(port) << 1); port->actor_oper_port_key = port->actor_admin_port_key; @@ -1959,6 +1965,8 @@ void bond_3ad_bind_slave(struct slave *slave) port->sm_vars &= ~AD_PORT_LACP_ENABLED; /* actor system is the bond's system */ port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; + port->actor_system_priority = + BOND_AD_INFO(bond).system.sys_priority; /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d5fe5d5f490f..a2e25de98bde 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1015,10 +1015,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t mask; struct slave *slave; - /* If any slave has the offload feature flag set, - * set the offload flag on the bond. - */ - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; @@ -4039,8 +4036,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, - .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, - .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_features_check = passthru_features_check, }; @@ -4140,6 +4138,8 @@ static int bond_check_params(struct bond_params *params) struct bond_opt_value newval; const struct bond_opt_value *valptr; int arp_all_targets_value; + u16 ad_actor_sys_prio = 0; + u16 ad_user_port_key = 0; /* Convert string parameters. */ if (mode) { @@ -4434,6 +4434,24 @@ static int bond_check_params(struct bond_params *params) fail_over_mac_value = BOND_FOM_NONE; } + bond_opt_initstr(&newval, "default"); + valptr = bond_opt_parse( + bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO), + &newval); + if (!valptr) { + pr_err("Error: No ad_actor_sys_prio default value"); + return -EINVAL; + } + ad_actor_sys_prio = valptr->value; + + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY), + &newval); + if (!valptr) { + pr_err("Error: No ad_user_port_key default value"); + return -EINVAL; + } + ad_user_port_key = valptr->value; + if (lp_interval == 0) { pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); @@ -4462,6 +4480,9 @@ static int bond_check_params(struct bond_params *params) params->lp_interval = lp_interval; params->packets_per_slave = packets_per_slave; params->tlb_dynamic_lb = 1; /* Default value */ + params->ad_actor_sys_prio = ad_actor_sys_prio; + eth_zero_addr(params->ad_actor_system); + params->ad_user_port_key = ad_user_port_key; if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 7b1124366011..f7015eb4f8db 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -94,6 +94,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, + [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NLA_U16 }, + [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NLA_U16 }, + [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, + .len = ETH_ALEN }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { @@ -379,6 +383,36 @@ static int bond_changelink(struct net_device *bond_dev, if (err) return err; } + if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) { + int actor_sys_prio = + nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]); + + bond_opt_initval(&newval, actor_sys_prio); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval); + if (err) + return err; + } + + if (data[IFLA_BOND_AD_USER_PORT_KEY]) { + int port_key = + nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]); + + bond_opt_initval(&newval, port_key); + err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval); + if (err) + return err; + } + + if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) { + if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN) + return -EINVAL; + + bond_opt_initval(&newval, + nla_get_be64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval); + if (err) + return err; + } return 0; } @@ -426,6 +460,9 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */ + nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ 0; } @@ -551,6 +588,19 @@ static int bond_fill_info(struct sk_buff *skb, if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; + if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO, + bond->params.ad_actor_sys_prio)) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY, + bond->params.ad_user_port_key)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, + sizeof(bond->params.ad_actor_system), + &bond->params.ad_actor_system)) + goto nla_put_failure; + if (!bond_3ad_get_active_agg_info(bond, &info)) { struct nlattr *nest; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 4df28943d222..9a32bbd7724e 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -70,6 +70,12 @@ static int bond_option_slaves_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); +static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval); +static int bond_option_ad_user_port_key_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -186,6 +192,18 @@ static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = { { NULL, -1, 0} }; +static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { + { "minval", 1, BOND_VALFLAG_MIN}, + { "maxval", 65535, BOND_VALFLAG_MAX | BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0}, +}; + +static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { + { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, + { "maxval", 1023, BOND_VALFLAG_MAX}, + { NULL, -1, 0}, +}; + static const struct bond_option bond_opts[BOND_OPT_LAST] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -379,6 +397,29 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_tlb_dynamic_lb_tbl, .flags = BOND_OPTFLAG_IFDOWN, .set = bond_option_tlb_dynamic_lb_set, + }, + [BOND_OPT_AD_ACTOR_SYS_PRIO] = { + .id = BOND_OPT_AD_ACTOR_SYS_PRIO, + .name = "ad_actor_sys_prio", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_ad_actor_sys_prio_tbl, + .set = bond_option_ad_actor_sys_prio_set, + }, + [BOND_OPT_AD_ACTOR_SYSTEM] = { + .id = BOND_OPT_AD_ACTOR_SYSTEM, + .name = "ad_actor_system", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_RAWVAL | BOND_OPTFLAG_IFDOWN, + .set = bond_option_ad_actor_system_set, + }, + [BOND_OPT_AD_USER_PORT_KEY] = { + .id = BOND_OPT_AD_USER_PORT_KEY, + .name = "ad_user_port_key", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .flags = BOND_OPTFLAG_IFDOWN, + .values = bond_ad_user_port_key_tbl, + .set = bond_option_ad_user_port_key_set, } }; @@ -1349,3 +1390,53 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, return 0; } + +static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n", + newval->value); + + bond->params.ad_actor_sys_prio = newval->value; + return 0; +} + +static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + u8 macaddr[ETH_ALEN]; + u8 *mac; + int i; + + if (newval->string) { + i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &macaddr[0], &macaddr[1], &macaddr[2], + &macaddr[3], &macaddr[4], &macaddr[5]); + if (i != ETH_ALEN) + goto err; + mac = macaddr; + } else { + mac = (u8 *)&newval->value; + } + + if (!is_valid_ether_addr(mac)) + goto err; + + netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac); + ether_addr_copy(bond->params.ad_actor_system, mac); + return 0; + +err: + netdev_err(bond->dev, "Invalid MAC address.\n"); + return -EINVAL; +} + +static int bond_option_ad_user_port_key_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n", + newval->value); + + bond->params.ad_user_port_key = newval->value; + return 0; +} diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index b20b35acb47d..e7f3047a26df 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -135,6 +135,10 @@ static void bond_info_show_master(struct seq_file *seq) bond->params.ad_select); seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", optval->string); + seq_printf(seq, "System priority: %d\n", + BOND_AD_INFO(bond).system.sys_priority); + seq_printf(seq, "System MAC address: %pM\n", + &BOND_AD_INFO(bond).system.sys_mac_addr); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { seq_printf(seq, "bond %s has no active aggregator\n", @@ -198,6 +202,8 @@ static void bond_info_show_slave(struct seq_file *seq, seq_puts(seq, "details actor lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->actor_system_priority); + seq_printf(seq, " system mac address: %pM\n", + &port->actor_system); seq_printf(seq, " port key: %d\n", port->actor_oper_port_key); seq_printf(seq, " port priority: %d\n", @@ -210,6 +216,8 @@ static void bond_info_show_slave(struct seq_file *seq, seq_puts(seq, "details partner lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->partner_oper.system_priority); + seq_printf(seq, " system mac address: %pM\n", + &port->partner_oper.system); seq_printf(seq, " oper key: %d\n", port->partner_oper.key); seq_printf(seq, " port priority: %d\n", diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 7e9e151d4d61..143a2abd1c1c 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -692,6 +692,49 @@ static ssize_t bonding_show_packets_per_slave(struct device *d, static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR, bonding_show_packets_per_slave, bonding_sysfs_store_option); +static ssize_t bonding_show_ad_actor_sys_prio(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%hu\n", bond->params.ad_actor_sys_prio); + + return 0; +} +static DEVICE_ATTR(ad_actor_sys_prio, S_IRUGO | S_IWUSR, + bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option); + +static ssize_t bonding_show_ad_actor_system(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%pM\n", bond->params.ad_actor_system); + + return 0; +} + +static DEVICE_ATTR(ad_actor_system, S_IRUGO | S_IWUSR, + bonding_show_ad_actor_system, bonding_sysfs_store_option); + +static ssize_t bonding_show_ad_user_port_key(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) + return sprintf(buf, "%hu\n", bond->params.ad_user_port_key); + + return 0; +} +static DEVICE_ATTR(ad_user_port_key, S_IRUGO | S_IWUSR, + bonding_show_ad_user_port_key, bonding_sysfs_store_option); + static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, @@ -725,6 +768,9 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_lp_interval.attr, &dev_attr_packets_per_slave.attr, &dev_attr_tlb_dynamic_lb.attr, + &dev_attr_ad_actor_sys_prio.attr, + &dev_attr_ad_actor_system.attr, + &dev_attr_ad_user_port_key.attr, NULL, }; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ad0a7e8c2c2b..6201c5a1a884 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -93,13 +93,13 @@ (FLEXCAN_CTRL_ERR_BUS | FLEXCAN_CTRL_ERR_STATE) /* FLEXCAN control register 2 (CTRL2) bits */ -#define FLEXCAN_CRL2_ECRWRE BIT(29) -#define FLEXCAN_CRL2_WRMFRZ BIT(28) -#define FLEXCAN_CRL2_RFFN(x) (((x) & 0x0f) << 24) -#define FLEXCAN_CRL2_TASD(x) (((x) & 0x1f) << 19) -#define FLEXCAN_CRL2_MRP BIT(18) -#define FLEXCAN_CRL2_RRS BIT(17) -#define FLEXCAN_CRL2_EACEN BIT(16) +#define FLEXCAN_CTRL2_ECRWRE BIT(29) +#define FLEXCAN_CTRL2_WRMFRZ BIT(28) +#define FLEXCAN_CTRL2_RFFN(x) (((x) & 0x0f) << 24) +#define FLEXCAN_CTRL2_TASD(x) (((x) & 0x1f) << 19) +#define FLEXCAN_CTRL2_MRP BIT(18) +#define FLEXCAN_CTRL2_RRS BIT(17) +#define FLEXCAN_CTRL2_EACEN BIT(16) /* FLEXCAN memory error control register (MECR) bits */ #define FLEXCAN_MECR_ECRWRDIS BIT(31) @@ -158,7 +158,6 @@ FLEXCAN_IFLAG_BUF(FLEXCAN_TX_BUF_ID)) /* FLEXCAN message buffers */ -#define FLEXCAN_MB_CNT_CODE(x) (((x) & 0xf) << 24) #define FLEXCAN_MB_CODE_RX_INACTIVE (0x0 << 24) #define FLEXCAN_MB_CODE_RX_EMPTY (0x4 << 24) #define FLEXCAN_MB_CODE_RX_FULL (0x2 << 24) @@ -184,14 +183,14 @@ * FLEXCAN hardware feature flags * * Below is some version info we got: - * SOC Version IP-Version Glitch- [TR]WRN_INT Memory err - * Filter? connected? detection - * MX25 FlexCAN2 03.00.00.00 no no no - * MX28 FlexCAN2 03.00.04.00 yes yes no - * MX35 FlexCAN2 03.00.00.00 no no no - * MX53 FlexCAN2 03.00.00.00 yes no no - * MX6s FlexCAN3 10.00.12.00 yes yes no - * VF610 FlexCAN3 ? no yes yes + * SOC Version IP-Version Glitch- [TR]WRN_INT Memory err RTR re- + * Filter? connected? detection ception in MB + * MX25 FlexCAN2 03.00.00.00 no no no no + * MX28 FlexCAN2 03.00.04.00 yes yes no no + * MX35 FlexCAN2 03.00.00.00 no no no no + * MX53 FlexCAN2 03.00.00.00 yes no no no + * MX6s FlexCAN3 10.00.12.00 yes yes no yes + * VF610 FlexCAN3 ? no yes yes yes? * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -221,7 +220,7 @@ struct flexcan_regs { u32 imask1; /* 0x28 */ u32 iflag2; /* 0x2c */ u32 iflag1; /* 0x30 */ - u32 crl2; /* 0x34 */ + u32 ctrl2; /* 0x34 */ u32 esr2; /* 0x38 */ u32 imeur; /* 0x3c */ u32 lrfr; /* 0x40 */ @@ -230,6 +229,16 @@ struct flexcan_regs { u32 rxfir; /* 0x4c */ u32 _reserved3[12]; /* 0x50 */ struct flexcan_mb cantxfg[64]; /* 0x80 */ + /* FIFO-mode: + * MB + * 0x080...0x08f 0 RX message buffer + * 0x090...0x0df 1-5 reserverd + * 0x0e0...0x0ff 6-7 8 entry ID table + * (mx25, mx28, mx35, mx53) + * 0x0e0...0x2df 6-7..37 8..128 entry ID table + * size conf'ed via ctrl2::RFFN + * (mx6, vf610) + */ u32 _reserved4[408]; u32 mecr; /* 0xae0 */ u32 erriar; /* 0xae4 */ @@ -468,7 +477,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) struct flexcan_regs __iomem *regs = priv->base; struct can_frame *cf = (struct can_frame *)skb->data; u32 can_id; - u32 ctrl = FLEXCAN_MB_CNT_CODE(0xc) | (cf->can_dlc << 16); + u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | (cf->can_dlc << 16); if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; @@ -815,7 +824,7 @@ static int flexcan_chip_start(struct net_device *dev) { struct flexcan_priv *priv = netdev_priv(dev); struct flexcan_regs __iomem *regs = priv->base; - u32 reg_mcr, reg_ctrl, reg_crl2, reg_mecr; + u32 reg_mcr, reg_ctrl, reg_ctrl2, reg_mecr; int err, i; /* enable module */ @@ -918,9 +927,9 @@ static int flexcan_chip_start(struct net_device *dev) * and Correction of Memory Errors" to write to * MECR register */ - reg_crl2 = flexcan_read(®s->crl2); - reg_crl2 |= FLEXCAN_CRL2_ECRWRE; - flexcan_write(reg_crl2, ®s->crl2); + reg_ctrl2 = flexcan_read(®s->ctrl2); + reg_ctrl2 |= FLEXCAN_CTRL2_ECRWRE; + flexcan_write(reg_ctrl2, ®s->ctrl2); reg_mecr = flexcan_read(®s->mecr); reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS; diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 4dd183a3643a..c1e85368a198 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -40,6 +40,7 @@ #define MSYNC_PEER 0x00 /* ICAN only */ #define MSYNC_LOCL 0x01 /* host only */ #define TARGET_RUNNING 0x02 +#define FIRMWARE_STAMP 0x60 /* big endian firmware stamp */ #define MSYNC_RB0 0x01 #define MSYNC_RB1 0x02 @@ -83,6 +84,7 @@ #define MSG_COFFREQ 0x42 #define MSG_CONREQ 0x43 #define MSG_CCONFREQ 0x47 +#define MSG_LMTS 0xb4 /* * Janz ICAN3 CAN Inquiry Message Types @@ -165,6 +167,12 @@ /* SJA1000 Clock Input */ #define ICAN3_CAN_CLOCK 8000000 +/* Janz ICAN3 firmware types */ +enum ican3_fwtype { + ICAN3_FWTYPE_ICANOS, + ICAN3_FWTYPE_CAL_CANOPEN, +}; + /* Driver Name */ #define DRV_NAME "janz-ican3" @@ -215,6 +223,10 @@ struct ican3_dev { struct completion buserror_comp; struct can_berr_counter bec; + /* firmware type */ + enum ican3_fwtype fwtype; + char fwinfo[32]; + /* old and new style host interface */ unsigned int iftype; @@ -750,13 +762,61 @@ static int ican3_set_id_filter(struct ican3_dev *mod, bool accept) */ static int ican3_set_bus_state(struct ican3_dev *mod, bool on) { + struct can_bittiming *bt = &mod->can.bittiming; struct ican3_msg msg; + u8 btr0, btr1; + int res; - memset(&msg, 0, sizeof(msg)); - msg.spec = on ? MSG_CONREQ : MSG_COFFREQ; - msg.len = cpu_to_le16(0); + /* This algorithm was stolen from drivers/net/can/sja1000/sja1000.c */ + /* The bittiming register command for the ICAN3 just sets the bit timing */ + /* registers on the SJA1000 chip directly */ + btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6); + btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) | + (((bt->phase_seg2 - 1) & 0x7) << 4); + if (mod->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) + btr1 |= 0x80; - return ican3_send_msg(mod, &msg); + if (mod->fwtype == ICAN3_FWTYPE_ICANOS) { + if (on) { + /* set bittiming */ + memset(&msg, 0, sizeof(msg)); + msg.spec = MSG_CBTRREQ; + msg.len = cpu_to_le16(4); + msg.data[0] = 0x00; + msg.data[1] = 0x00; + msg.data[2] = btr0; + msg.data[3] = btr1; + + res = ican3_send_msg(mod, &msg); + if (res) + return res; + } + + /* can-on/off request */ + memset(&msg, 0, sizeof(msg)); + msg.spec = on ? MSG_CONREQ : MSG_COFFREQ; + msg.len = cpu_to_le16(0); + + return ican3_send_msg(mod, &msg); + + } else if (mod->fwtype == ICAN3_FWTYPE_CAL_CANOPEN) { + memset(&msg, 0, sizeof(msg)); + msg.spec = MSG_LMTS; + if (on) { + msg.len = cpu_to_le16(4); + msg.data[0] = 0; + msg.data[1] = 0; + msg.data[2] = btr0; + msg.data[3] = btr1; + } else { + msg.len = cpu_to_le16(2); + msg.data[0] = 1; + msg.data[1] = 0; + } + + return ican3_send_msg(mod, &msg); + } + return -ENOTSUPP; } static int ican3_set_termination(struct ican3_dev *mod, bool on) @@ -1402,7 +1462,7 @@ static int ican3_reset_module(struct ican3_dev *mod) return 0; msleep(10); - } while (time_before(jiffies, start + HZ / 4)); + } while (time_before(jiffies, start + HZ / 2)); netdev_err(mod->ndev, "failed to reset CAN module\n"); return -ETIMEDOUT; @@ -1427,6 +1487,17 @@ static int ican3_startup_module(struct ican3_dev *mod) return ret; } + /* detect firmware */ + memcpy_fromio(mod->fwinfo, mod->dpm + FIRMWARE_STAMP, sizeof(mod->fwinfo) - 1); + if (strncmp(mod->fwinfo, "JANZ-ICAN3", 10)) { + netdev_err(mod->ndev, "ICAN3 not detected (found %s)\n", mod->fwinfo); + return -ENODEV; + } + if (strstr(mod->fwinfo, "CAL/CANopen")) + mod->fwtype = ICAN3_FWTYPE_CAL_CANOPEN; + else + mod->fwtype = ICAN3_FWTYPE_ICANOS; + /* re-enable interrupts so we can send messages */ iowrite8(1 << mod->num, &mod->ctrl->int_enable); @@ -1615,36 +1686,6 @@ static const struct can_bittiming_const ican3_bittiming_const = { .brp_inc = 1, }; -/* - * This routine was stolen from drivers/net/can/sja1000/sja1000.c - * - * The bittiming register command for the ICAN3 just sets the bit timing - * registers on the SJA1000 chip directly - */ -static int ican3_set_bittiming(struct net_device *ndev) -{ - struct ican3_dev *mod = netdev_priv(ndev); - struct can_bittiming *bt = &mod->can.bittiming; - struct ican3_msg msg; - u8 btr0, btr1; - - btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6); - btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) | - (((bt->phase_seg2 - 1) & 0x7) << 4); - if (mod->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) - btr1 |= 0x80; - - memset(&msg, 0, sizeof(msg)); - msg.spec = MSG_CBTRREQ; - msg.len = cpu_to_le16(4); - msg.data[0] = 0x00; - msg.data[1] = 0x00; - msg.data[2] = btr0; - msg.data[3] = btr1; - - return ican3_send_msg(mod, &msg); -} - static int ican3_set_mode(struct net_device *ndev, enum can_mode mode) { struct ican3_dev *mod = netdev_priv(ndev); @@ -1730,11 +1771,22 @@ static ssize_t ican3_sysfs_set_term(struct device *dev, return count; } +static ssize_t ican3_sysfs_show_fwinfo(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ican3_dev *mod = netdev_priv(to_net_dev(dev)); + + return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo); +} + static DEVICE_ATTR(termination, S_IWUSR | S_IRUGO, ican3_sysfs_show_term, ican3_sysfs_set_term); +static DEVICE_ATTR(fwinfo, S_IRUSR | S_IRUGO, ican3_sysfs_show_fwinfo, NULL); static struct attribute *ican3_sysfs_attrs[] = { &dev_attr_termination.attr, + &dev_attr_fwinfo.attr, NULL, }; @@ -1794,7 +1846,6 @@ static int ican3_probe(struct platform_device *pdev) mod->can.clock.freq = ICAN3_CAN_CLOCK; mod->can.bittiming_const = &ican3_bittiming_const; - mod->can.do_set_bittiming = ican3_set_bittiming; mod->can.do_set_mode = ican3_set_mode; mod->can.do_get_berr_counter = ican3_get_berr_counter; mod->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES @@ -1866,7 +1917,7 @@ static int ican3_probe(struct platform_device *pdev) goto out_free_irq; } - dev_info(dev, "module %d: registered CAN device\n", pdata->modno); + netdev_info(mod->ndev, "module %d: registered CAN device\n", pdata->modno); return 0; out_free_irq: diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 18550c7ebe6f..7ad0a4d8e475 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -37,22 +37,22 @@ config NET_DSA_MV88E6123_61_65 ethernet switch chips. config NET_DSA_MV88E6171 - tristate "Marvell 88E6171/6172 ethernet switch chip support" + tristate "Marvell 88E6171/6175/6350/6351 ethernet switch chip support" depends on NET_DSA select NET_DSA_MV88E6XXX select NET_DSA_TAG_EDSA ---help--- - This enables support for the Marvell 88E6171/6172 ethernet switch - chips. + This enables support for the Marvell 88E6171/6175/6350/6351 + ethernet switches chips. config NET_DSA_MV88E6352 - tristate "Marvell 88E6176/88E6352 ethernet switch chip support" + tristate "Marvell 88E6172/88E6176/88E6352 ethernet switch chip support" depends on NET_DSA select NET_DSA_MV88E6XXX select NET_DSA_TAG_EDSA ---help--- - This enables support for the Marvell 88E6176 and 88E6352 ethernet - switch chips. + This enables support for the Marvell 88E6172, 88E6176 and 88E6352 + ethernet switch chips. config NET_DSA_BCM_SF2 tristate "Broadcom Starfighter 2 Ethernet switch support" diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c index b4af6d5aff7c..71a29a7ce538 100644 --- a/drivers/net/dsa/mv88e6123_61_65.c +++ b/drivers/net/dsa/mv88e6123_61_65.c @@ -54,192 +54,40 @@ static char *mv88e6123_61_65_probe(struct device *host_dev, int sw_addr) static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) { + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Disable the PHY polling unit (since there won't be any * external PHYs to poll), don't discard packets with * excessive collisions, and mask all interrupt sources. */ - REG_WRITE(REG_GLOBAL, 0x04, 0x0000); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, 0x0000); /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames * are to be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable remote management for now, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Disable the loopback filter, disable flow control - * messages, disable flood broadcast override, disable - * removing of provider tags, disable ATU age violation - * interrupts, disable tag flow control, force flow - * control priority to the highest, and send all special - * multicast frames to the CPU at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* Disable ingress rate limiting by resetting all ingress - * rate limit registers to their initial state. - */ - for (i = 0; i < 6; i++) - REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); - - /* Initialise cross-chip port VLAN table to reset defaults. */ - REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); - - /* Clear the priority override table. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); - - /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f); return 0; } -static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); - else - REG_WRITE(addr, 0x01, 0x0003); - - /* Do not limit the period of time that this port can be - * paused for by the remote end or the period of time that - * this port can pause the remote end. - */ - REG_WRITE(addr, 0x02, 0x0000); - - /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. - * - * If this is the CPU link, use DSA or EDSA tagging depending - * on which tagging mode was configured. - * - * If this is a link to another switch, use DSA tagging mode. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts and multicasts. - */ - val = 0x0433; - if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) - val |= 0x3300; - else - val |= 0x0100; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - if (p == dsa_upstream_port(ds)) - val |= 0x000c; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, set the maximum - * frame size to 10240 bytes, don't let the switch add or - * strip 802.1q tags, don't discard tagged or untagged frames - * on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't - * send a copy of all transmitted/received frames on this port - * to the CPU. - */ - REG_WRITE(addr, 0x08, 0x2080); - - /* Egress rate control: disable egress rate control. */ - REG_WRITE(addr, 0x09, 0x0001); - - /* Egress rate control 2: disable egress rate control. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Port ATU control: disable limiting the number of address - * database entries that this port is allowed to use. - */ - REG_WRITE(addr, 0x0c, 0x0000); - - /* Priority Override: disable DA, SA and VTU priority override. */ - REG_WRITE(addr, 0x0d, 0x0000); - - /* Port Ethertype: use the Ethertype DSA Ethertype value. */ - REG_WRITE(addr, 0x0f, ETH_P_EDSA); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - static int mv88e6123_61_65_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i; int ret; ret = mv88e6xxx_setup_common(ds); @@ -262,19 +110,11 @@ static int mv88e6123_61_65_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6123_61_65_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - ret = mv88e6123_61_65_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; + return mv88e6xxx_setup_ports(ds); } struct dsa_switch_driver mv88e6123_61_65_switch_driver = { diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c index e54824fa0d95..32f4a08e9bc9 100644 --- a/drivers/net/dsa/mv88e6131.c +++ b/drivers/net/dsa/mv88e6131.c @@ -37,6 +37,8 @@ static char *mv88e6131_probe(struct device *host_dev, int sw_addr) return "Marvell 88E6131 (B2)"; if (ret_masked == PORT_SWITCH_ID_6131) return "Marvell 88E6131"; + if (ret_masked == PORT_SWITCH_ID_6185) + return "Marvell 88E6185"; } return NULL; @@ -44,186 +46,62 @@ static char *mv88e6131_probe(struct device *host_dev, int sw_addr) static int mv88e6131_setup_global(struct dsa_switch *ds) { + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Enable the PHY polling unit, don't discard packets with * excessive collisions, use a weighted fair queueing scheme * to arbitrate between packet queues, set the maximum frame * size to 1632, and mask all interrupt sources. */ - REG_WRITE(REG_GLOBAL, 0x04, 0x4400); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_MAX_FRAME_1632); /* Set the VLAN ethertype to 0x8100. */ - REG_WRITE(REG_GLOBAL, 0x19, 0x8100); + REG_WRITE(REG_GLOBAL, GLOBAL_CORE_TAG_TYPE, 0x8100); /* Disable ARP mirroring, and configure the upstream port as * the port to which ingress and egress monitor frames are to * be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + GLOBAL_MONITOR_CONTROL_ARP_DISABLED; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable cascade port functionality unless this device * is used in a cascade configuration, and set the switch's * DSA device number. */ if (ds->dst->pd->nr_chips > 1) - REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f)); + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_MULTIPLE_CASCADE | + (ds->index & 0x1f)); else - REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (ds->pd->rtable && - i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_NO_CASCADE | + (ds->index & 0x1f)); /* Force the priority of IGMP/MLD snoop frames and ARP frames * to the highest setting. */ - REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff); + REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, + GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP | + 7 << GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT | + GLOBAL2_PRIO_OVERRIDE_FORCE_ARP | + 7 << GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT); return 0; } -static int mv88e6131_setup_port(struct dsa_switch *ds, int p) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * (100 Mb/s on 6085) full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - if (ps->id == PORT_SWITCH_ID_6085) - REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */ - else - REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */ - else - REG_WRITE(addr, 0x01, 0x0003); - - /* Port Control: disable Core Tag, disable Drop-on-Lock, - * transmit frames unmodified, disable Header mode, - * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN - * tunneling, determine priority by looking at 802.1p and - * IP priority fields (IP prio has precedence), and set STP - * state to Forwarding. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts, and enable DSA tagging - * mode. - * - * If this is the link to another switch, use DSA tagging - * mode, but do not enable forwarding of unknown unicasts. - */ - val = 0x0433; - if (p == dsa_upstream_port(ds)) { - val |= 0x0104; - /* On 6085, unknown multicast forward is controlled - * here rather than in Port Control 2 register. - */ - if (ps->id == PORT_SWITCH_ID_6085) - val |= 0x0008; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, don't use - * VLAN-based, source address-based or destination - * address-based priority overrides, don't let the switch - * add or strip 802.1q tags, don't discard tagged or - * untagged frames on this port, do a destination address - * lookup on received packets as usual, don't send a copy - * of all transmitted/received frames on this port to the - * CPU, and configure the upstream port number. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown multicast addresses. - */ - if (ps->id == PORT_SWITCH_ID_6085) - /* on 6085, bits 3:0 are reserved, bit 6 control ARP - * mirroring, and multicast forward is handled in - * Port Control register. - */ - REG_WRITE(addr, 0x08, 0x0080); - else { - val = 0x0080 | dsa_upstream_port(ds); - if (p == dsa_upstream_port(ds)) - val |= 0x0040; - REG_WRITE(addr, 0x08, val); - } - - /* Rate Control: disable ingress rate limiting. */ - REG_WRITE(addr, 0x09, 0x0000); - - /* Rate Control 2: disable egress rate limiting. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - static int mv88e6131_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i; int ret; ret = mv88e6xxx_setup_common(ds); @@ -234,6 +112,7 @@ static int mv88e6131_setup(struct dsa_switch *ds) switch (ps->id) { case PORT_SWITCH_ID_6085: + case PORT_SWITCH_ID_6185: ps->num_ports = 10; break; case PORT_SWITCH_ID_6095: @@ -251,19 +130,11 @@ static int mv88e6131_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6131_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - ret = mv88e6131_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; + return mv88e6xxx_setup_ports(ds); } static int mv88e6131_port_to_phy_addr(struct dsa_switch *ds, int port) diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 9104efea0e3e..1c7808495a9d 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -1,4 +1,4 @@ -/* net/dsa/mv88e6171.c - Marvell 88e6171/8826172 switch chip support +/* net/dsa/mv88e6171.c - Marvell 88e6171 switch chip support * Copyright (c) 2008-2009 Marvell Semiconductor * Copyright (c) 2014 Claudio Leite <leitec@staticky.com> * @@ -29,8 +29,12 @@ static char *mv88e6171_probe(struct device *host_dev, int sw_addr) if (ret >= 0) { if ((ret & 0xfff0) == PORT_SWITCH_ID_6171) return "Marvell 88E6171"; - if ((ret & 0xfff0) == PORT_SWITCH_ID_6172) - return "Marvell 88E6172"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6175) + return "Marvell 88E6175"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6350) + return "Marvell 88E6350"; + if ((ret & 0xfff0) == PORT_SWITCH_ID_6351) + return "Marvell 88E6351"; } return NULL; @@ -38,196 +42,41 @@ static char *mv88e6171_probe(struct device *host_dev, int sw_addr) static int mv88e6171_setup_global(struct dsa_switch *ds) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Discard packets with excessive collisions, mask all * interrupt sources, enable PPU. */ - REG_WRITE(REG_GLOBAL, 0x04, 0x6000); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS); /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames * are to be sent. */ - if (REG_READ(REG_PORT(0), 0x03) == 0x1710) - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1111)); - else - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable remote management for now, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Disable the loopback filter, disable flow control - * messages, disable flood broadcast override, disable - * removing of provider tags, disable ATU age violation - * interrupts, disable tag flow control, force flow - * control priority to the highest, and send all special - * multicast frames to the CPU at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop; - - nexthop = 0x1f; - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < ps->num_ports; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* Disable ingress rate limiting by resetting all ingress - * rate limit registers to their initial state. - */ - for (i = 0; i < 6; i++) - REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); - - /* Initialise cross-chip port VLAN table to reset defaults. */ - REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); - - /* Clear the priority override table. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); - - /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL_2, ds->index & 0x1f); return 0; } -static int mv88e6171_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. - */ - val = REG_READ(addr, 0x01); - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, val | 0x003e); - else - REG_WRITE(addr, 0x01, val | 0x0003); - - /* Do not limit the period of time that this port can be - * paused for by the remote end or the period of time that - * this port can pause the remote end. - */ - REG_WRITE(addr, 0x02, 0x0000); - - /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. - * - * If this is the CPU link, use DSA or EDSA tagging depending - * on which tagging mode was configured. - * - * If this is a link to another switch, use DSA tagging mode. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts and multicasts. - */ - val = 0x0433; - if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) - val |= 0x3300; - else - val |= 0x0100; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - if (p == dsa_upstream_port(ds)) - val |= 0x000c; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, set the maximum - * frame size to 10240 bytes, don't let the switch add or - * strip 802.1q tags, don't discard tagged or untagged frames - * on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't - * send a copy of all transmitted/received frames on this port - * to the CPU. - */ - REG_WRITE(addr, 0x08, 0x2080); - - /* Egress rate control: disable egress rate control. */ - REG_WRITE(addr, 0x09, 0x0001); - - /* Egress rate control 2: disable egress rate control. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Port ATU control: disable limiting the number of address - * database entries that this port is allowed to use. - */ - REG_WRITE(addr, 0x0c, 0x0000); - - /* Priority Override: disable DA, SA and VTU priority override. */ - REG_WRITE(addr, 0x0d, 0x0000); - - /* Port Ethertype: use the Ethertype DSA Ethertype value. */ - REG_WRITE(addr, 0x0f, ETH_P_EDSA); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - static int mv88e6171_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i; int ret; ret = mv88e6xxx_setup_common(ds); @@ -240,44 +89,11 @@ static int mv88e6171_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6171_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - if (!(dsa_is_cpu_port(ds, i) || ds->phys_port_mask & (1 << i))) - continue; - - ret = mv88e6171_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; -} - -static int mv88e6171_get_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - if (ps->id == PORT_SWITCH_ID_6172) - return mv88e6xxx_get_eee(ds, port, e); - - return -EOPNOTSUPP; -} - -static int mv88e6171_set_eee(struct dsa_switch *ds, int port, - struct phy_device *phydev, struct ethtool_eee *e) -{ - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - - if (ps->id == PORT_SWITCH_ID_6172) - return mv88e6xxx_set_eee(ds, port, phydev, e); - - return -EOPNOTSUPP; + return mv88e6xxx_setup_ports(ds); } struct dsa_switch_driver mv88e6171_switch_driver = { @@ -292,8 +108,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .get_strings = mv88e6xxx_get_strings, .get_ethtool_stats = mv88e6xxx_get_ethtool_stats, .get_sset_count = mv88e6xxx_get_sset_count, - .set_eee = mv88e6171_set_eee, - .get_eee = mv88e6171_get_eee, #ifdef CONFIG_NET_DSA_HWMON .get_temp = mv88e6xxx_get_temp, #endif @@ -308,4 +122,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = { }; MODULE_ALIAS("platform:mv88e6171"); -MODULE_ALIAS("platform:mv88e6172"); +MODULE_ALIAS("platform:mv88e6175"); +MODULE_ALIAS("platform:mv88e6350"); +MODULE_ALIAS("platform:mv88e6351"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index 126c11b81e75..632815c10a40 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -32,6 +32,8 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), PORT_SWITCH_ID); if (ret >= 0) { + if ((ret & 0xfff0) == PORT_SWITCH_ID_6172) + return "Marvell 88E6172"; if ((ret & 0xfff0) == PORT_SWITCH_ID_6176) return "Marvell 88E6176"; if (ret == PORT_SWITCH_ID_6352_A0) @@ -47,187 +49,37 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr) static int mv88e6352_setup_global(struct dsa_switch *ds) { - struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + u32 upstream_port = dsa_upstream_port(ds); int ret; - int i; + u32 reg; + + ret = mv88e6xxx_setup_global(ds); + if (ret) + return ret; /* Discard packets with excessive collisions, * mask all interrupt sources, enable PPU (bit 14, undocumented). */ - REG_WRITE(REG_GLOBAL, 0x04, 0x6000); - - /* Set the default address aging time to 5 minutes, and - * enable address learn messages to be sent to all message - * ports. - */ - REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); - - /* Configure the priority mapping registers. */ - ret = mv88e6xxx_config_prio(ds); - if (ret < 0) - return ret; + REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, + GLOBAL_CONTROL_PPU_ENABLE | GLOBAL_CONTROL_DISCARD_EXCESS); /* Configure the upstream port, and configure the upstream * port as the port to which ingress and egress monitor frames * are to be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); + reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | + upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; + REG_WRITE(REG_GLOBAL, GLOBAL_MONITOR_CONTROL, reg); /* Disable remote management for now, and set the switch's * DSA device number. */ REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); - - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); - - /* Disable the loopback filter, disable flow control - * messages, disable flood broadcast override, disable - * removing of provider tags, disable ATU age violation - * interrupts, disable tag flow control, force flow - * control priority to the highest, and send all special - * multicast frames to the CPU at the highest priority. - */ - REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); - - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop = 0x1f; - - if (i != ds->index && i < ds->dst->pd->nr_chips) - nexthop = ds->pd->rtable[i] & 0x1f; - - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); - } - - /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7f); - - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); - - /* Disable ingress rate limiting by resetting all ingress - * rate limit registers to their initial state. - */ - for (i = 0; i < ps->num_ports; i++) - REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); - - /* Initialise cross-chip port VLAN table to reset defaults. */ - REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); - - /* Clear the priority override table. */ - for (i = 0; i < 16; i++) - REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); - - /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ - return 0; } -static int mv88e6352_setup_port(struct dsa_switch *ds, int p) -{ - int addr = REG_PORT(p); - u16 val; - - /* MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values on physical - * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. - */ - if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); - else - REG_WRITE(addr, 0x01, 0x0003); - - /* Do not limit the period of time that this port can be - * paused for by the remote end or the period of time that - * this port can pause the remote end. - */ - REG_WRITE(addr, 0x02, 0x0000); - - /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * disable Header mode, enable IGMP/MLD snooping, disable VLAN - * tunneling, determine priority by looking at 802.1p and IP - * priority fields (IP prio has precedence), and set STP state - * to Forwarding. - * - * If this is the CPU link, use DSA or EDSA tagging depending - * on which tagging mode was configured. - * - * If this is a link to another switch, use DSA tagging mode. - * - * If this is the upstream port for this switch, enable - * forwarding of unknown unicasts and multicasts. - */ - val = 0x0433; - if (dsa_is_cpu_port(ds, p)) { - if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) - val |= 0x3300; - else - val |= 0x0100; - } - if (ds->dsa_port_mask & (1 << p)) - val |= 0x0100; - if (p == dsa_upstream_port(ds)) - val |= 0x000c; - REG_WRITE(addr, 0x04, val); - - /* Port Control 2: don't force a good FCS, set the maximum - * frame size to 10240 bytes, don't let the switch add or - * strip 802.1q tags, don't discard tagged or untagged frames - * on this port, do a destination address lookup on all - * received packets as usual, disable ARP mirroring and don't - * send a copy of all transmitted/received frames on this port - * to the CPU. - */ - REG_WRITE(addr, 0x08, 0x2080); - - /* Egress rate control: disable egress rate control. */ - REG_WRITE(addr, 0x09, 0x0001); - - /* Egress rate control 2: disable egress rate control. */ - REG_WRITE(addr, 0x0a, 0x0000); - - /* Port Association Vector: when learning source addresses - * of packets, add the address to the address database using - * a port bitmap that has only the bit for this port set and - * the other bits clear. - */ - REG_WRITE(addr, 0x0b, 1 << p); - - /* Port ATU control: disable limiting the number of address - * database entries that this port is allowed to use. - */ - REG_WRITE(addr, 0x0c, 0x0000); - - /* Priority Override: disable DA, SA and VTU priority override. */ - REG_WRITE(addr, 0x0d, 0x0000); - - /* Port Ethertype: use the Ethertype DSA Ethertype value. */ - REG_WRITE(addr, 0x0f, ETH_P_EDSA); - - /* Tag Remap: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x18, 0x3210); - - /* Tag Remap 2: use an identity 802.1p prio -> switch prio - * mapping. - */ - REG_WRITE(addr, 0x19, 0x7654); - - return mv88e6xxx_setup_port_common(ds, p); -} - #ifdef CONFIG_NET_DSA_HWMON static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp) @@ -292,7 +144,6 @@ static int mv88e6352_setup(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - int i; ret = mv88e6xxx_setup_common(ds); if (ret < 0) @@ -306,19 +157,11 @@ static int mv88e6352_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* @@@ initialise vtu and atu */ - ret = mv88e6352_setup_global(ds); if (ret < 0) return ret; - for (i = 0; i < ps->num_ports; i++) { - ret = mv88e6352_setup_port(ds, i); - if (ret < 0) - return ret; - } - - return 0; + return mv88e6xxx_setup_ports(ds); } static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr) @@ -552,3 +395,4 @@ struct dsa_switch_driver mv88e6352_switch_driver = { }; MODULE_ALIAS("platform:mv88e6352"); +MODULE_ALIAS("platform:mv88e6172"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index cf309aa92802..7fba330ce702 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -19,6 +19,34 @@ #include <net/dsa.h> #include "mv88e6xxx.h" +/* MDIO bus access can be nested in the case of PHYs connected to the + * internal MDIO bus of the switch, which is accessed via MDIO bus of + * the Ethernet interface. Avoid lockdep false positives by using + * mutex_lock_nested(). + */ +static int mv88e6xxx_mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) +{ + int ret; + + mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING); + ret = bus->read(bus, addr, regnum); + mutex_unlock(&bus->mdio_lock); + + return ret; +} + +static int mv88e6xxx_mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, + u16 val) +{ + int ret; + + mutex_lock_nested(&bus->mdio_lock, SINGLE_DEPTH_NESTING); + ret = bus->write(bus, addr, regnum, val); + mutex_unlock(&bus->mdio_lock); + + return ret; +} + /* If the switch's ADDR[4:0] strap pins are strapped to zero, it will * use all 32 SMI bus addresses on its SMI bus, and all switch registers * will be directly accessible on some {device address,register address} @@ -33,7 +61,7 @@ static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr) int i; for (i = 0; i < 16; i++) { - ret = mdiobus_read(bus, sw_addr, SMI_CMD); + ret = mv88e6xxx_mdiobus_read(bus, sw_addr, SMI_CMD); if (ret < 0) return ret; @@ -49,7 +77,7 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) int ret; if (sw_addr == 0) - return mdiobus_read(bus, addr, reg); + return mv88e6xxx_mdiobus_read(bus, addr, reg); /* Wait for the bus to become free. */ ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); @@ -57,8 +85,8 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) return ret; /* Transmit the read command. */ - ret = mdiobus_write(bus, sw_addr, SMI_CMD, - SMI_CMD_OP_22_READ | (addr << 5) | reg); + ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_CMD, + SMI_CMD_OP_22_READ | (addr << 5) | reg); if (ret < 0) return ret; @@ -68,7 +96,7 @@ int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) return ret; /* Read the data. */ - ret = mdiobus_read(bus, sw_addr, SMI_DATA); + ret = mv88e6xxx_mdiobus_read(bus, sw_addr, SMI_DATA); if (ret < 0) return ret; @@ -112,7 +140,7 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, int ret; if (sw_addr == 0) - return mdiobus_write(bus, addr, reg, val); + return mv88e6xxx_mdiobus_write(bus, addr, reg, val); /* Wait for the bus to become free. */ ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); @@ -120,13 +148,13 @@ int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, return ret; /* Transmit the data to write. */ - ret = mdiobus_write(bus, sw_addr, SMI_DATA, val); + ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_DATA, val); if (ret < 0) return ret; /* Transmit the write command. */ - ret = mdiobus_write(bus, sw_addr, SMI_CMD, - SMI_CMD_OP_22_WRITE | (addr << 5) | reg); + ret = mv88e6xxx_mdiobus_write(bus, sw_addr, SMI_CMD, + SMI_CMD_OP_22_WRITE | (addr << 5) | reg); if (ret < 0) return ret; @@ -165,24 +193,6 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) return ret; } -int mv88e6xxx_config_prio(struct dsa_switch *ds) -{ - /* Configure the IP ToS mapping registers. */ - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); - REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); - - /* Configure the IEEE 802.1p priority mapping register. */ - REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); - - return 0; -} - int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) { REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]); @@ -217,20 +227,20 @@ int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) return 0; } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) { if (addr >= 0) - return mv88e6xxx_reg_read(ds, addr, regnum); + return _mv88e6xxx_reg_read(ds, addr, regnum); return 0xffff; } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val) { if (addr >= 0) - return mv88e6xxx_reg_write(ds, addr, regnum, val); + return _mv88e6xxx_reg_write(ds, addr, regnum, val); return 0; } @@ -434,26 +444,113 @@ void mv88e6xxx_poll_link(struct dsa_switch *ds) } } +static bool mv88e6xxx_6065_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6031: + case PORT_SWITCH_ID_6061: + case PORT_SWITCH_ID_6035: + case PORT_SWITCH_ID_6065: + return true; + } + return false; +} + +static bool mv88e6xxx_6095_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6092: + case PORT_SWITCH_ID_6095: + return true; + } + return false; +} + +static bool mv88e6xxx_6097_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6046: + case PORT_SWITCH_ID_6085: + case PORT_SWITCH_ID_6096: + case PORT_SWITCH_ID_6097: + return true; + } + return false; +} + +static bool mv88e6xxx_6165_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6123: + case PORT_SWITCH_ID_6161: + case PORT_SWITCH_ID_6165: + return true; + } + return false; +} + +static bool mv88e6xxx_6185_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6121: + case PORT_SWITCH_ID_6122: + case PORT_SWITCH_ID_6152: + case PORT_SWITCH_ID_6155: + case PORT_SWITCH_ID_6182: + case PORT_SWITCH_ID_6185: + case PORT_SWITCH_ID_6108: + case PORT_SWITCH_ID_6131: + return true; + } + return false; +} + +static bool mv88e6xxx_6351_family(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + + switch (ps->id) { + case PORT_SWITCH_ID_6171: + case PORT_SWITCH_ID_6175: + case PORT_SWITCH_ID_6350: + case PORT_SWITCH_ID_6351: + return true; + } + return false; +} + static bool mv88e6xxx_6352_family(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); switch (ps->id) { - case PORT_SWITCH_ID_6352: case PORT_SWITCH_ID_6172: case PORT_SWITCH_ID_6176: + case PORT_SWITCH_ID_6240: + case PORT_SWITCH_ID_6352: return true; } return false; } -static int mv88e6xxx_stats_wait(struct dsa_switch *ds) +/* Must be called with SMI mutex held */ +static int _mv88e6xxx_stats_wait(struct dsa_switch *ds) { int ret; int i; for (i = 0; i < 10; i++) { - ret = REG_READ(REG_GLOBAL, GLOBAL_STATS_OP); + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_OP); if ((ret & GLOBAL_STATS_OP_BUSY) == 0) return 0; } @@ -461,7 +558,8 @@ static int mv88e6xxx_stats_wait(struct dsa_switch *ds) return -ETIMEDOUT; } -static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) +/* Must be called with SMI mutex held */ +static int _mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) { int ret; @@ -469,42 +567,45 @@ static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) port = (port + 1) << 5; /* Snapshot the hardware statistics counters for this port. */ - REG_WRITE(REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_CAPTURE_PORT | - GLOBAL_STATS_OP_HIST_RX_TX | port); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_CAPTURE_PORT | + GLOBAL_STATS_OP_HIST_RX_TX | port); + if (ret < 0) + return ret; /* Wait for the snapshotting to complete. */ - ret = mv88e6xxx_stats_wait(ds); + ret = _mv88e6xxx_stats_wait(ds); if (ret < 0) return ret; return 0; } -static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) +/* Must be called with SMI mutex held */ +static void _mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) { u32 _val; int ret; *val = 0; - ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_READ_CAPTURED | - GLOBAL_STATS_OP_HIST_RX_TX | stat); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_READ_CAPTURED | + GLOBAL_STATS_OP_HIST_RX_TX | stat); if (ret < 0) return; - ret = mv88e6xxx_stats_wait(ds); + ret = _mv88e6xxx_stats_wait(ds); if (ret < 0) return; - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); if (ret < 0) return; _val = ret << 16; - ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); + ret = _mv88e6xxx_reg_read(ds, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); if (ret < 0) return; @@ -587,11 +688,11 @@ static void _mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int ret; int i; - mutex_lock(&ps->stats_mutex); + mutex_lock(&ps->smi_mutex); - ret = mv88e6xxx_stats_snapshot(ds, port); + ret = _mv88e6xxx_stats_snapshot(ds, port); if (ret < 0) { - mutex_unlock(&ps->stats_mutex); + mutex_unlock(&ps->smi_mutex); return; } @@ -608,8 +709,8 @@ static void _mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, goto error; low = ret; if (s->sizeof_stat == 4) { - ret = mv88e6xxx_reg_read(ds, REG_PORT(port), - s->reg - 0x100 + 1); + ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), + s->reg - 0x100 + 1); if (ret < 0) goto error; high = ret; @@ -617,14 +718,14 @@ static void _mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, data[i] = (((u64)high) << 16) | low; continue; } - mv88e6xxx_stats_read(ds, s->reg, &low); + _mv88e6xxx_stats_read(ds, s->reg, &low); if (s->sizeof_stat == 8) - mv88e6xxx_stats_read(ds, s->reg + 1, &high); + _mv88e6xxx_stats_read(ds, s->reg + 1, &high); data[i] = (((u64)high) << 32) | low; } error: - mutex_unlock(&ps->stats_mutex); + mutex_unlock(&ps->smi_mutex); } /* All the statistics in the table */ @@ -694,7 +795,7 @@ int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) *temp = 0; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x6); if (ret < 0) @@ -727,19 +828,23 @@ int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) error: _mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x0); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } #endif /* CONFIG_NET_DSA_HWMON */ -static int mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) +/* Must be called with SMI lock held */ +static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, + u16 mask) { unsigned long timeout = jiffies + HZ / 10; while (time_before(jiffies, timeout)) { int ret; - ret = REG_READ(reg, offset); + ret = _mv88e6xxx_reg_read(ds, reg, offset); + if (ret < 0) + return ret; if (!(ret & mask)) return 0; @@ -748,10 +853,22 @@ static int mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) return -ETIMEDOUT; } -int mv88e6xxx_phy_wait(struct dsa_switch *ds) +static int mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = _mv88e6xxx_wait(ds, reg, offset, mask); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +static int _mv88e6xxx_phy_wait(struct dsa_switch *ds) { - return mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_BUSY); + return _mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + GLOBAL2_SMI_OP_BUSY); } int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds) @@ -767,56 +884,46 @@ int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds) } /* Must be called with SMI lock held */ -static int _mv88e6xxx_wait(struct dsa_switch *ds, int reg, int offset, u16 mask) -{ - unsigned long timeout = jiffies + HZ / 10; - - while (time_before(jiffies, timeout)) { - int ret; - - ret = _mv88e6xxx_reg_read(ds, reg, offset); - if (ret < 0) - return ret; - if (!(ret & mask)) - return 0; - - usleep_range(1000, 2000); - } - return -ETIMEDOUT; -} - -/* Must be called with SMI lock held */ static int _mv88e6xxx_atu_wait(struct dsa_switch *ds) { return _mv88e6xxx_wait(ds, REG_GLOBAL, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY); } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum) { int ret; - REG_WRITE(REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_READ | (addr << 5) | regnum); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + GLOBAL2_SMI_OP_22_READ | (addr << 5) | + regnum); + if (ret < 0) + return ret; - ret = mv88e6xxx_phy_wait(ds); + ret = _mv88e6xxx_phy_wait(ds); if (ret < 0) return ret; - return REG_READ(REG_GLOBAL2, GLOBAL2_SMI_DATA); + return _mv88e6xxx_reg_read(ds, REG_GLOBAL2, GLOBAL2_SMI_DATA); } -/* Must be called with phy mutex held */ +/* Must be called with SMI mutex held */ static int _mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int addr, int regnum, u16 val) { - REG_WRITE(REG_GLOBAL2, GLOBAL2_SMI_DATA, val); - REG_WRITE(REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_WRITE | (addr << 5) | regnum); + int ret; - return mv88e6xxx_phy_wait(ds); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_DATA, val); + if (ret < 0) + return ret; + + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_SMI_OP, + GLOBAL2_SMI_OP_22_WRITE | (addr << 5) | + regnum); + + return _mv88e6xxx_phy_wait(ds); } int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) @@ -824,7 +931,7 @@ int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int reg; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); reg = _mv88e6xxx_phy_read_indirect(ds, port, 16); if (reg < 0) @@ -833,7 +940,7 @@ int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - reg = mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); + reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS); if (reg < 0) goto out; @@ -841,7 +948,7 @@ int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) reg = 0; out: - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return reg; } @@ -852,7 +959,7 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, int reg; int ret; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_read_indirect(ds, port, 16); if (ret < 0) @@ -866,7 +973,7 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, ret = _mv88e6xxx_phy_write_indirect(ds, port, 16, reg); out: - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1241,13 +1348,212 @@ static void mv88e6xxx_bridge_work(struct work_struct *work) } } -int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port) +static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret, fid; + u16 reg; mutex_lock(&ps->smi_mutex); + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) || + mv88e6xxx_6065_family(ds)) { + /* MAC Forcing register: don't force link, speed, + * duplex or flow control state to any particular + * values on physical ports, but force the CPU port + * and all DSA ports to their maximum bandwidth and + * full duplex. + */ + reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL); + if (dsa_is_cpu_port(ds, port) || + ds->dsa_port_mask & (1 << port)) { + reg |= PORT_PCS_CTRL_FORCE_LINK | + PORT_PCS_CTRL_LINK_UP | + PORT_PCS_CTRL_DUPLEX_FULL | + PORT_PCS_CTRL_FORCE_DUPLEX; + if (mv88e6xxx_6065_family(ds)) + reg |= PORT_PCS_CTRL_100; + else + reg |= PORT_PCS_CTRL_1000; + } else { + reg |= PORT_PCS_CTRL_UNFORCED; + } + + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_PCS_CTRL, reg); + if (ret) + goto abort; + } + + /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, + * disable Header mode, enable IGMP/MLD snooping, disable VLAN + * tunneling, determine priority by looking at 802.1p and IP + * priority fields (IP prio has precedence), and set STP state + * to Forwarding. + * + * If this is the CPU link, use DSA or EDSA tagging depending + * on which tagging mode was configured. + * + * If this is a link to another switch, use DSA tagging mode. + * + * If this is the upstream port for this switch, enable + * forwarding of unknown unicasts and multicasts. + */ + reg = 0; + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || + mv88e6xxx_6185_family(ds)) + reg = PORT_CONTROL_IGMP_MLD_SNOOP | + PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP | + PORT_CONTROL_STATE_FORWARDING; + if (dsa_is_cpu_port(ds, port)) { + if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) + reg |= PORT_CONTROL_DSA_TAG; + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) + reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA; + else + reg |= PORT_CONTROL_FRAME_MODE_DSA; + } + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) || + mv88e6xxx_6185_family(ds)) { + if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA) + reg |= PORT_CONTROL_EGRESS_ADD_TAG; + } + } + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds)) { + if (ds->dsa_port_mask & (1 << port)) + reg |= PORT_CONTROL_FRAME_MODE_DSA; + if (port == dsa_upstream_port(ds)) + reg |= PORT_CONTROL_FORWARD_UNKNOWN | + PORT_CONTROL_FORWARD_UNKNOWN_MC; + } + if (reg) { + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_CONTROL, reg); + if (ret) + goto abort; + } + + /* Port Control 2: don't force a good FCS, set the maximum + * frame size to 10240 bytes, don't let the switch add or + * strip 802.1q tags, don't discard tagged or untagged frames + * on this port, do a destination address lookup on all + * received packets as usual, disable ARP mirroring and don't + * send a copy of all transmitted/received frames on this port + * to the CPU. + */ + reg = 0; + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6095_family(ds)) + reg = PORT_CONTROL_2_MAP_DA; + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds)) + reg |= PORT_CONTROL_2_JUMBO_10240; + + if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) { + /* Set the upstream port this port should use */ + reg |= dsa_upstream_port(ds); + /* enable forwarding of unknown multicast addresses to + * the upstream port + */ + if (port == dsa_upstream_port(ds)) + reg |= PORT_CONTROL_2_FORWARD_UNKNOWN; + } + + if (reg) { + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_CONTROL_2, reg); + if (ret) + goto abort; + } + + /* Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_ASSOC_VECTOR, + 1 << port); + if (ret) + goto abort; + + /* Egress rate control 2: disable egress rate control. */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_RATE_CONTROL_2, + 0x0000); + if (ret) + goto abort; + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + /* Do not limit the period of time that this port can + * be paused for by the remote end or the period of + * time that this port can pause the remote end. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_PAUSE_CTRL, 0x0000); + if (ret) + goto abort; + + /* Port ATU control: disable limiting the number of + * address database entries that this port is allowed + * to use. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_ATU_CONTROL, 0x0000); + /* Priority Override: disable DA, SA and VTU priority + * override. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_PRI_OVERRIDE, 0x0000); + if (ret) + goto abort; + + /* Port Ethertype: use the Ethertype DSA Ethertype + * value. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_ETH_TYPE, ETH_P_EDSA); + if (ret) + goto abort; + /* Tag Remap: use an identity 802.1p prio -> switch + * prio mapping. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_TAG_REGMAP_0123, 0x3210); + if (ret) + goto abort; + + /* Tag Remap 2: use an identity 802.1p prio -> switch + * prio mapping. + */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_TAG_REGMAP_4567, 0x7654); + if (ret) + goto abort; + } + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) { + /* Rate Control: disable ingress rate limiting. */ + ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), + PORT_RATE_CONTROL, 0x0001); + if (ret) + goto abort; + } + /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ @@ -1281,13 +1587,25 @@ abort: return ret; } +int mv88e6xxx_setup_ports(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int ret; + int i; + + for (i = 0; i < ps->num_ports; i++) { + ret = mv88e6xxx_setup_port(ds, i); + if (ret < 0) + return ret; + } + return 0; +} + int mv88e6xxx_setup_common(struct dsa_switch *ds) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); mutex_init(&ps->smi_mutex); - mutex_init(&ps->stats_mutex); - mutex_init(&ps->phy_mutex); ps->id = REG_READ(REG_PORT(0), PORT_SWITCH_ID) & 0xfff0; @@ -1298,6 +1616,104 @@ int mv88e6xxx_setup_common(struct dsa_switch *ds) return 0; } +int mv88e6xxx_setup_global(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); + int i; + + /* Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL, + 0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL); + + /* Configure the IP ToS mapping registers. */ + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); + REG_WRITE(REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); + + /* Configure the IEEE 802.1p priority mapping register. */ + REG_WRITE(REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); + + /* Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, 0xffff); + + /* Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the highest priority. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, + 0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 | + GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI); + + /* Program the DSA routing table. */ + for (i = 0; i < 32; i++) { + int nexthop = 0x1f; + + if (ds->pd->rtable && + i != ds->index && i < ds->dst->pd->nr_chips) + nexthop = ds->pd->rtable[i] & 0x1f; + + REG_WRITE(REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, + GLOBAL2_DEVICE_MAPPING_UPDATE | + (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | + nexthop); + } + + /* Clear all trunk masks. */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MASK, + 0x8000 | (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) | + ((1 << ps->num_ports) - 1)); + + /* Clear all trunk mappings. */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, + GLOBAL2_TRUNK_MAPPING_UPDATE | + (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT)); + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) { + /* Send all frames with destination addresses matching + * 01:80:c2:00:00:2x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, 0xffff); + + /* Initialise cross-chip port VLAN table to reset + * defaults. + */ + REG_WRITE(REG_GLOBAL2, GLOBAL2_PVT_ADDR, 0x9000); + + /* Clear the priority override table. */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, + 0x8000 | (i << 8)); + } + + if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || + mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) || + mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) { + /* Disable ingress rate limiting by resetting all + * ingress rate limit registers to their initial + * state. + */ + for (i = 0; i < ps->num_ports; i++) + REG_WRITE(REG_GLOBAL2, GLOBAL2_INGRESS_OP, + 0x9000 | (i << 8)); + } + + return 0; +} + int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); @@ -1343,14 +1759,14 @@ int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg) struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); if (ret < 0) goto error; ret = _mv88e6xxx_phy_read_indirect(ds, port, reg); error: _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1360,7 +1776,7 @@ int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); int ret; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page); if (ret < 0) goto error; @@ -1368,7 +1784,7 @@ int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page, ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val); error: _mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1391,9 +1807,9 @@ mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum) if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_read(ds, addr, regnum); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1407,9 +1823,9 @@ mv88e6xxx_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write(ds, addr, regnum, val); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1423,9 +1839,9 @@ mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int port, int regnum) if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_read_indirect(ds, addr, regnum); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } @@ -1440,9 +1856,9 @@ mv88e6xxx_phy_write_indirect(struct dsa_switch *ds, int port, int regnum, if (addr < 0) return addr; - mutex_lock(&ps->phy_mutex); + mutex_lock(&ps->smi_mutex); ret = _mv88e6xxx_phy_write_indirect(ds, addr, regnum, val); - mutex_unlock(&ps->phy_mutex); + mutex_unlock(&ps->smi_mutex); return ret; } diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h index e045154f3364..e10ccdb4ffbc 100644 --- a/drivers/net/dsa/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx.h @@ -40,9 +40,31 @@ #define PORT_STATUS_TX_PAUSED BIT(5) #define PORT_STATUS_FLOW_CTRL BIT(4) #define PORT_PCS_CTRL 0x01 +#define PORT_PCS_CTRL_FC BIT(7) +#define PORT_PCS_CTRL_FORCE_FC BIT(6) +#define PORT_PCS_CTRL_LINK_UP BIT(5) +#define PORT_PCS_CTRL_FORCE_LINK BIT(4) +#define PORT_PCS_CTRL_DUPLEX_FULL BIT(3) +#define PORT_PCS_CTRL_FORCE_DUPLEX BIT(2) +#define PORT_PCS_CTRL_10 0x00 +#define PORT_PCS_CTRL_100 0x01 +#define PORT_PCS_CTRL_1000 0x02 +#define PORT_PCS_CTRL_UNFORCED 0x03 +#define PORT_PAUSE_CTRL 0x02 #define PORT_SWITCH_ID 0x03 +#define PORT_SWITCH_ID_6031 0x0310 +#define PORT_SWITCH_ID_6035 0x0350 +#define PORT_SWITCH_ID_6046 0x0480 +#define PORT_SWITCH_ID_6061 0x0610 +#define PORT_SWITCH_ID_6065 0x0650 #define PORT_SWITCH_ID_6085 0x04a0 +#define PORT_SWITCH_ID_6092 0x0970 #define PORT_SWITCH_ID_6095 0x0950 +#define PORT_SWITCH_ID_6096 0x0980 +#define PORT_SWITCH_ID_6097 0x0990 +#define PORT_SWITCH_ID_6108 0x1070 +#define PORT_SWITCH_ID_6121 0x1040 +#define PORT_SWITCH_ID_6122 0x1050 #define PORT_SWITCH_ID_6123 0x1210 #define PORT_SWITCH_ID_6123_A1 0x1212 #define PORT_SWITCH_ID_6123_A2 0x1213 @@ -58,13 +80,38 @@ #define PORT_SWITCH_ID_6165_A2 0x1653 #define PORT_SWITCH_ID_6171 0x1710 #define PORT_SWITCH_ID_6172 0x1720 +#define PORT_SWITCH_ID_6175 0x1750 #define PORT_SWITCH_ID_6176 0x1760 #define PORT_SWITCH_ID_6182 0x1a60 #define PORT_SWITCH_ID_6185 0x1a70 +#define PORT_SWITCH_ID_6240 0x2400 +#define PORT_SWITCH_ID_6320 0x1250 +#define PORT_SWITCH_ID_6350 0x3710 +#define PORT_SWITCH_ID_6351 0x3750 #define PORT_SWITCH_ID_6352 0x3520 #define PORT_SWITCH_ID_6352_A0 0x3521 #define PORT_SWITCH_ID_6352_A1 0x3522 #define PORT_CONTROL 0x04 +#define PORT_CONTROL_USE_CORE_TAG BIT(15) +#define PORT_CONTROL_DROP_ON_LOCK BIT(14) +#define PORT_CONTROL_EGRESS_UNMODIFIED (0x0 << 12) +#define PORT_CONTROL_EGRESS_UNTAGGED (0x1 << 12) +#define PORT_CONTROL_EGRESS_TAGGED (0x2 << 12) +#define PORT_CONTROL_EGRESS_ADD_TAG (0x3 << 12) +#define PORT_CONTROL_HEADER BIT(11) +#define PORT_CONTROL_IGMP_MLD_SNOOP BIT(10) +#define PORT_CONTROL_DOUBLE_TAG BIT(9) +#define PORT_CONTROL_FRAME_MODE_NORMAL (0x0 << 8) +#define PORT_CONTROL_FRAME_MODE_DSA (0x1 << 8) +#define PORT_CONTROL_FRAME_MODE_PROVIDER (0x2 << 8) +#define PORT_CONTROL_FRAME_ETHER_TYPE_DSA (0x3 << 8) +#define PORT_CONTROL_DSA_TAG BIT(8) +#define PORT_CONTROL_VLAN_TUNNEL BIT(7) +#define PORT_CONTROL_TAG_IF_BOTH BIT(6) +#define PORT_CONTROL_USE_IP BIT(5) +#define PORT_CONTROL_USE_TAG BIT(4) +#define PORT_CONTROL_FORWARD_UNKNOWN_MC BIT(3) +#define PORT_CONTROL_FORWARD_UNKNOWN BIT(2) #define PORT_CONTROL_STATE_MASK 0x03 #define PORT_CONTROL_STATE_DISABLED 0x00 #define PORT_CONTROL_STATE_BLOCKING 0x01 @@ -74,15 +121,32 @@ #define PORT_BASE_VLAN 0x06 #define PORT_DEFAULT_VLAN 0x07 #define PORT_CONTROL_2 0x08 +#define PORT_CONTROL_2_IGNORE_FCS BIT(15) +#define PORT_CONTROL_2_VTU_PRI_OVERRIDE BIT(14) +#define PORT_CONTROL_2_SA_PRIO_OVERRIDE BIT(13) +#define PORT_CONTROL_2_DA_PRIO_OVERRIDE BIT(12) +#define PORT_CONTROL_2_JUMBO_1522 (0x00 << 12) +#define PORT_CONTROL_2_JUMBO_2048 (0x01 << 12) +#define PORT_CONTROL_2_JUMBO_10240 (0x02 << 12) +#define PORT_CONTROL_2_DISCARD_TAGGED BIT(9) +#define PORT_CONTROL_2_DISCARD_UNTAGGED BIT(8) +#define PORT_CONTROL_2_MAP_DA BIT(7) +#define PORT_CONTROL_2_DEFAULT_FORWARD BIT(6) +#define PORT_CONTROL_2_FORWARD_UNKNOWN BIT(6) +#define PORT_CONTROL_2_EGRESS_MONITOR BIT(5) +#define PORT_CONTROL_2_INGRESS_MONITOR BIT(4) #define PORT_RATE_CONTROL 0x09 #define PORT_RATE_CONTROL_2 0x0a #define PORT_ASSOC_VECTOR 0x0b +#define PORT_ATU_CONTROL 0x0c +#define PORT_PRI_OVERRIDE 0x0d +#define PORT_ETH_TYPE 0x0f #define PORT_IN_DISCARD_LO 0x10 #define PORT_IN_DISCARD_HI 0x11 #define PORT_IN_FILTERED 0x12 #define PORT_OUT_FILTERED 0x13 -#define PORT_TAG_REGMAP_0123 0x19 -#define PORT_TAG_REGMAP_4567 0x1a +#define PORT_TAG_REGMAP_0123 0x18 +#define PORT_TAG_REGMAP_4567 0x19 #define REG_GLOBAL 0x1b #define GLOBAL_STATUS 0x00 @@ -102,7 +166,7 @@ #define GLOBAL_CONTROL_DISCARD_EXCESS BIT(13) /* 6352 */ #define GLOBAL_CONTROL_SCHED_PRIO BIT(11) /* 6152 */ #define GLOBAL_CONTROL_MAX_FRAME_1632 BIT(10) /* 6152 */ -#define GLOBAL_CONTROL_RELOAD_EEPROM BIT(9) /* 6152 */ +#define GLOBAL_CONTROL_RELOAD_EEPROM BIT(9) /* 6152 */ #define GLOBAL_CONTROL_DEVICE_EN BIT(7) #define GLOBAL_CONTROL_STATS_DONE_EN BIT(6) #define GLOBAL_CONTROL_VTU_PROBLEM_EN BIT(5) @@ -117,6 +181,7 @@ #define GLOBAL_VTU_DATA_4_7 0x08 #define GLOBAL_VTU_DATA_8_11 0x09 #define GLOBAL_ATU_CONTROL 0x0a +#define GLOBAL_ATU_CONTROL_LEARN2ALL BIT(3) #define GLOBAL_ATU_OP 0x0b #define GLOBAL_ATU_OP_BUSY BIT(15) #define GLOBAL_ATU_OP_NOP (0 << 12) @@ -151,7 +216,15 @@ #define GLOBAL_IEEE_PRI 0x18 #define GLOBAL_CORE_TAG_TYPE 0x19 #define GLOBAL_MONITOR_CONTROL 0x1a +#define GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT 12 +#define GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT 8 +#define GLOBAL_MONITOR_CONTROL_ARP_SHIFT 4 +#define GLOBAL_MONITOR_CONTROL_MIRROR_SHIFT 0 +#define GLOBAL_MONITOR_CONTROL_ARP_DISABLED (0xf0) #define GLOBAL_CONTROL_2 0x1c +#define GLOBAL_CONTROL_2_NO_CASCADE 0xe000 +#define GLOBAL_CONTROL_2_MULTIPLE_CASCADE 0xf000 + #define GLOBAL_STATS_OP 0x1d #define GLOBAL_STATS_OP_BUSY BIT(15) #define GLOBAL_STATS_OP_NOP (0 << 12) @@ -172,9 +245,20 @@ #define GLOBAL2_MGMT_EN_0X 0x03 #define GLOBAL2_FLOW_CONTROL 0x04 #define GLOBAL2_SWITCH_MGMT 0x05 +#define GLOBAL2_SWITCH_MGMT_USE_DOUBLE_TAG_DATA BIT(15) +#define GLOBAL2_SWITCH_MGMT_PREVENT_LOOPS BIT(14) +#define GLOBAL2_SWITCH_MGMT_FLOW_CONTROL_MSG BIT(13) +#define GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI BIT(7) +#define GLOBAL2_SWITCH_MGMT_RSVD2CPU BIT(3) #define GLOBAL2_DEVICE_MAPPING 0x06 +#define GLOBAL2_DEVICE_MAPPING_UPDATE BIT(15) +#define GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT 8 #define GLOBAL2_TRUNK_MASK 0x07 +#define GLOBAL2_TRUNK_MASK_UPDATE BIT(15) +#define GLOBAL2_TRUNK_MASK_NUM_SHIFT 12 #define GLOBAL2_TRUNK_MAPPING 0x08 +#define GLOBAL2_TRUNK_MAPPING_UPDATE BIT(15) +#define GLOBAL2_TRUNK_MAPPING_ID_SHIFT 11 #define GLOBAL2_INGRESS_OP 0x09 #define GLOBAL2_INGRESS_DATA 0x0a #define GLOBAL2_PVT_ADDR 0x0b @@ -183,6 +267,10 @@ #define GLOBAL2_SWITCH_MAC_BUSY BIT(15) #define GLOBAL2_ATU_STATS 0x0e #define GLOBAL2_PRIO_OVERRIDE 0x0f +#define GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP BIT(7) +#define GLOBAL2_PRIO_OVERRIDE_SNOOP_SHIFT 4 +#define GLOBAL2_PRIO_OVERRIDE_FORCE_ARP BIT(3) +#define GLOBAL2_PRIO_OVERRIDE_ARP_SHIFT 0 #define GLOBAL2_EEPROM_OP 0x14 #define GLOBAL2_EEPROM_OP_BUSY BIT(15) #define GLOBAL2_EEPROM_OP_LOAD BIT(11) @@ -260,14 +348,14 @@ struct mv88e6xxx_hw_stat { }; int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active); -int mv88e6xxx_setup_port_common(struct dsa_switch *ds, int port); +int mv88e6xxx_setup_ports(struct dsa_switch *ds); int mv88e6xxx_setup_common(struct dsa_switch *ds); +int mv88e6xxx_setup_global(struct dsa_switch *ds); int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, int reg, u16 val); int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); -int mv88e6xxx_config_prio(struct dsa_switch *ds); int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); int mv88e6xxx_phy_read(struct dsa_switch *ds, int port, int regnum); @@ -289,7 +377,6 @@ int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port); void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p); int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp); -int mv88e6xxx_phy_wait(struct dsa_switch *ds); int mv88e6xxx_eeprom_load_wait(struct dsa_switch *ds); int mv88e6xxx_eeprom_busy_wait(struct dsa_switch *ds); int mv88e6xxx_phy_read_indirect(struct dsa_switch *ds, int addr, int regnum); diff --git a/drivers/net/ethernet/apm/xgene/Makefile b/drivers/net/ethernet/apm/xgene/Makefile index 68be565548c0..700b5abe5de5 100644 --- a/drivers/net/ethernet/apm/xgene/Makefile +++ b/drivers/net/ethernet/apm/xgene/Makefile @@ -3,5 +3,5 @@ # xgene-enet-objs := xgene_enet_hw.o xgene_enet_sgmac.o xgene_enet_xgmac.o \ - xgene_enet_main.o xgene_enet_ethtool.o + xgene_enet_main.o xgene_enet_ring2.o xgene_enet_ethtool.o obj-$(CONFIG_NET_XGENE) += xgene-enet.o diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index b927021c6c40..25873d142b95 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -87,10 +87,11 @@ static void xgene_enet_ring_rd32(struct xgene_enet_desc_ring *ring, static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring) { + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); int i; xgene_enet_ring_wr32(ring, CSR_RING_CONFIG, ring->num); - for (i = 0; i < NUM_RING_CONFIG; i++) { + for (i = 0; i < pdata->ring_ops->num_ring_config; i++) { xgene_enet_ring_wr32(ring, CSR_RING_WR_BASE + (i * 4), ring->state[i]); } @@ -98,7 +99,7 @@ static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring) static void xgene_enet_clr_ring_state(struct xgene_enet_desc_ring *ring) { - memset(ring->state, 0, sizeof(u32) * NUM_RING_CONFIG); + memset(ring->state, 0, sizeof(ring->state)); xgene_enet_write_ring_state(ring); } @@ -141,8 +142,8 @@ static void xgene_enet_clr_desc_ring_id(struct xgene_enet_desc_ring *ring) xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, 0); } -struct xgene_enet_desc_ring *xgene_enet_setup_ring( - struct xgene_enet_desc_ring *ring) +static struct xgene_enet_desc_ring *xgene_enet_setup_ring( + struct xgene_enet_desc_ring *ring) { u32 size = ring->size; u32 i, data; @@ -168,7 +169,7 @@ struct xgene_enet_desc_ring *xgene_enet_setup_ring( return ring; } -void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring) +static void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring) { u32 data; bool is_bufpool; @@ -186,6 +187,22 @@ out: xgene_enet_clr_ring_state(ring); } +static void xgene_enet_wr_cmd(struct xgene_enet_desc_ring *ring, int count) +{ + iowrite32(count, ring->cmd); +} + +static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) +{ + u32 __iomem *cmd_base = ring->cmd_base; + u32 ring_state, num_msgs; + + ring_state = ioread32(&cmd_base[1]); + num_msgs = GET_VAL(NUMMSGSINQ, ring_state); + + return num_msgs; +} + void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status) @@ -803,3 +820,12 @@ struct xgene_port_ops xgene_gport_ops = { .cle_bypass = xgene_enet_cle_bypass, .shutdown = xgene_gport_shutdown, }; + +struct xgene_ring_ops xgene_ring1_ops = { + .num_ring_config = NUM_RING_CONFIG, + .num_ring_id_shift = 6, + .setup = xgene_enet_setup_ring, + .clear = xgene_enet_clear_ring, + .wr_cmd = xgene_enet_wr_cmd, + .len = xgene_enet_ring_len, +}; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index d9bc89d69266..541bed056012 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -26,6 +26,7 @@ struct xgene_enet_pdata; struct xgene_enet_stats; +struct xgene_enet_desc_ring; /* clears and then set bits */ static inline void xgene_set_bits(u32 *dst, u32 val, u32 start, u32 len) @@ -101,8 +102,8 @@ enum xgene_enet_rm { #define BLOCK_ETH_CSR_OFFSET 0x2000 #define BLOCK_ETH_RING_IF_OFFSET 0x9000 +#define BLOCK_ETH_CLKRST_CSR_OFFSET 0xc000 #define BLOCK_ETH_DIAG_CSR_OFFSET 0xD000 - #define BLOCK_ETH_MAC_OFFSET 0x0000 #define BLOCK_ETH_MAC_CSR_OFFSET 0x2800 @@ -261,6 +262,7 @@ enum xgene_enet_ring_type { enum xgene_ring_owner { RING_OWNER_ETH0, + RING_OWNER_ETH1, RING_OWNER_CPU = 15, RING_OWNER_INVALID }; @@ -314,9 +316,6 @@ static inline u16 xgene_enet_get_numslots(u16 id, u32 size) size / WORK_DESC_SIZE; } -struct xgene_enet_desc_ring *xgene_enet_setup_ring( - struct xgene_enet_desc_ring *ring); -void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring); void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status); @@ -327,5 +326,6 @@ bool xgene_ring_mgr_init(struct xgene_enet_pdata *p); extern struct xgene_mac_ops xgene_gmac_ops; extern struct xgene_port_ops xgene_gport_ops; +extern struct xgene_ring_ops xgene_ring1_ops; #endif /* __XGENE_ENET_HW_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 40d3530d7f30..1bb317532f75 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -28,6 +28,8 @@ #define RES_RING_CSR 1 #define RES_RING_CMD 2 +static const struct of_device_id xgene_enet_of_match[]; + static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool) { struct xgene_enet_raw_desc16 *raw_desc; @@ -48,6 +50,7 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, { struct sk_buff *skb; struct xgene_enet_raw_desc16 *raw_desc; + struct xgene_enet_pdata *pdata; struct net_device *ndev; struct device *dev; dma_addr_t dma_addr; @@ -58,6 +61,7 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, ndev = buf_pool->ndev; dev = ndev_to_dev(buf_pool->ndev); + pdata = netdev_priv(ndev); bufdatalen = BUF_LEN_CODE_2K | (SKB_BUFFER_SIZE & GENMASK(11, 0)); len = XGENE_ENET_MAX_MTU; @@ -82,7 +86,7 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, tail = (tail + 1) & slots; } - iowrite32(nbuf, buf_pool->cmd); + pdata->ring_ops->wr_cmd(buf_pool, nbuf); buf_pool->tail = tail; return 0; @@ -102,26 +106,16 @@ static u8 xgene_enet_hdr_len(const void *data) return (eth->h_proto == htons(ETH_P_8021Q)) ? VLAN_ETH_HLEN : ETH_HLEN; } -static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) -{ - u32 __iomem *cmd_base = ring->cmd_base; - u32 ring_state, num_msgs; - - ring_state = ioread32(&cmd_base[1]); - num_msgs = ring_state & CREATE_MASK(NUMMSGSINQ_POS, NUMMSGSINQ_LEN); - - return num_msgs >> NUMMSGSINQ_POS; -} - static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool) { + struct xgene_enet_pdata *pdata = netdev_priv(buf_pool->ndev); struct xgene_enet_raw_desc16 *raw_desc; u32 slots = buf_pool->slots - 1; u32 tail = buf_pool->tail; u32 userinfo; int i, len; - len = xgene_enet_ring_len(buf_pool); + len = pdata->ring_ops->len(buf_pool); for (i = 0; i < len; i++) { tail = (tail - 1) & slots; raw_desc = &buf_pool->raw_desc16[tail]; @@ -131,7 +125,7 @@ static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool) dev_kfree_skb_any(buf_pool->rx_skb[userinfo]); } - iowrite32(-len, buf_pool->cmd); + pdata->ring_ops->wr_cmd(buf_pool, -len); buf_pool->tail = tail; } @@ -263,8 +257,8 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, struct xgene_enet_desc_ring *cp_ring = tx_ring->cp_ring; u32 tx_level, cq_level; - tx_level = xgene_enet_ring_len(tx_ring); - cq_level = xgene_enet_ring_len(cp_ring); + tx_level = pdata->ring_ops->len(tx_ring); + cq_level = pdata->ring_ops->len(cp_ring); if (unlikely(tx_level > pdata->tx_qcnt_hi || cq_level > pdata->cp_qcnt_hi)) { netif_stop_queue(ndev); @@ -276,7 +270,7 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - iowrite32(1, tx_ring->cmd); + pdata->ring_ops->wr_cmd(tx_ring, 1); skb_tx_timestamp(skb); tx_ring->tail = (tx_ring->tail + 1) & (tx_ring->slots - 1); @@ -389,11 +383,11 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring, } while (--budget); if (likely(count)) { - iowrite32(-count, ring->cmd); + pdata->ring_ops->wr_cmd(ring, -count); ring->head = head; if (netif_queue_stopped(ring->ndev)) { - if (xgene_enet_ring_len(ring) < pdata->cp_qcnt_low) + if (pdata->ring_ops->len(ring) < pdata->cp_qcnt_low) netif_wake_queue(ring->ndev); } } @@ -510,6 +504,7 @@ static int xgene_enet_open(struct net_device *ndev) else schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF); + netif_carrier_off(ndev); netif_start_queue(ndev); return ret; @@ -545,7 +540,7 @@ static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring) pdata = netdev_priv(ring->ndev); dev = ndev_to_dev(ring->ndev); - xgene_enet_clear_ring(ring); + pdata->ring_ops->clear(ring); dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma); } @@ -598,15 +593,17 @@ static int xgene_enet_get_ring_size(struct device *dev, static void xgene_enet_free_desc_ring(struct xgene_enet_desc_ring *ring) { + struct xgene_enet_pdata *pdata; struct device *dev; if (!ring) return; dev = ndev_to_dev(ring->ndev); + pdata = netdev_priv(ring->ndev); if (ring->desc_addr) { - xgene_enet_clear_ring(ring); + pdata->ring_ops->clear(ring); dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma); } devm_kfree(dev, ring); @@ -637,6 +634,25 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata) } } +static bool is_irq_mbox_required(struct xgene_enet_pdata *pdata, + struct xgene_enet_desc_ring *ring) +{ + if ((pdata->enet_id == XGENE_ENET2) && + (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU)) { + return true; + } + + return false; +} + +static void __iomem *xgene_enet_ring_cmd_base(struct xgene_enet_pdata *pdata, + struct xgene_enet_desc_ring *ring) +{ + u8 num_ring_id_shift = pdata->ring_ops->num_ring_id_shift; + + return pdata->ring_cmd_addr + (ring->num << num_ring_id_shift); +} + static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring( struct net_device *ndev, u32 ring_num, enum xgene_enet_ring_cfgsize cfgsize, u32 ring_id) @@ -668,9 +684,20 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring( } ring->size = size; - ring->cmd_base = pdata->ring_cmd_addr + (ring->num << 6); + if (is_irq_mbox_required(pdata, ring)) { + ring->irq_mbox_addr = dma_zalloc_coherent(dev, INTR_MBOX_SIZE, + &ring->irq_mbox_dma, GFP_KERNEL); + if (!ring->irq_mbox_addr) { + dma_free_coherent(dev, size, ring->desc_addr, + ring->dma); + devm_kfree(dev, ring); + return NULL; + } + } + + ring->cmd_base = xgene_enet_ring_cmd_base(pdata, ring); ring->cmd = ring->cmd_base + INC_DEC_CMD_ADDR; - ring = xgene_enet_setup_ring(ring); + ring = pdata->ring_ops->setup(ring); netdev_dbg(ndev, "ring info: num=%d size=%d id=%d slots=%d\n", ring->num, ring->size, ring->id, ring->slots); @@ -682,12 +709,34 @@ static u16 xgene_enet_get_ring_id(enum xgene_ring_owner owner, u8 bufnum) return (owner << 6) | (bufnum & GENMASK(5, 0)); } +static enum xgene_ring_owner xgene_derive_ring_owner(struct xgene_enet_pdata *p) +{ + enum xgene_ring_owner owner; + + if (p->enet_id == XGENE_ENET1) { + switch (p->phy_mode) { + case PHY_INTERFACE_MODE_SGMII: + owner = RING_OWNER_ETH0; + break; + default: + owner = (!p->port_id) ? RING_OWNER_ETH0 : + RING_OWNER_ETH1; + break; + } + } else { + owner = (!p->port_id) ? RING_OWNER_ETH0 : RING_OWNER_ETH1; + } + + return owner; +} + static int xgene_enet_create_desc_rings(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); struct device *dev = ndev_to_dev(ndev); struct xgene_enet_desc_ring *rx_ring, *tx_ring, *cp_ring; struct xgene_enet_desc_ring *buf_pool = NULL; + enum xgene_ring_owner owner; u8 cpu_bufnum = pdata->cpu_bufnum; u8 eth_bufnum = pdata->eth_bufnum; u8 bp_bufnum = pdata->bp_bufnum; @@ -696,6 +745,7 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) int ret; /* allocate rx descriptor ring */ + owner = xgene_derive_ring_owner(pdata); ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); rx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, RING_CFGSIZE_16KB, ring_id); @@ -705,7 +755,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) } /* allocate buffer pool for receiving packets */ - ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, bp_bufnum++); + owner = xgene_derive_ring_owner(pdata); + ring_id = xgene_enet_get_ring_id(owner, bp_bufnum++); buf_pool = xgene_enet_create_desc_ring(ndev, ring_num++, RING_CFGSIZE_2KB, ring_id); if (!buf_pool) { @@ -734,7 +785,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) pdata->rx_ring = rx_ring; /* allocate tx descriptor ring */ - ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, eth_bufnum++); + owner = xgene_derive_ring_owner(pdata); + ring_id = xgene_enet_get_ring_id(owner, eth_bufnum++); tx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, RING_CFGSIZE_16KB, ring_id); if (!tx_ring) { @@ -824,14 +876,21 @@ static int xgene_get_port_id(struct device *dev, struct xgene_enet_pdata *pdata) int ret; ret = device_property_read_u32(dev, "port-id", &id); - if (!ret && id > 1) { - dev_err(dev, "Incorrect port-id specified\n"); - return -ENODEV; - } - pdata->port_id = id; + switch (ret) { + case -EINVAL: + pdata->port_id = 0; + ret = 0; + break; + case 0: + pdata->port_id = id & BIT(0); + break; + default: + dev_err(dev, "Incorrect port-id specified: errno: %d\n", ret); + break; + } - return 0; + return ret; } static int xgene_get_mac_address(struct device *dev, @@ -876,6 +935,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) struct device *dev; struct resource *res; void __iomem *base_addr; + u32 offset; int ret; pdev = pdata->pdev; @@ -962,14 +1022,20 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) pdata->clk = NULL; } - base_addr = pdata->base_addr - (pdata->port_id * MAC_OFFSET); + if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) + base_addr = pdata->base_addr - (pdata->port_id * MAC_OFFSET); + else + base_addr = pdata->base_addr; pdata->eth_csr_addr = base_addr + BLOCK_ETH_CSR_OFFSET; pdata->eth_ring_if_addr = base_addr + BLOCK_ETH_RING_IF_OFFSET; pdata->eth_diag_csr_addr = base_addr + BLOCK_ETH_DIAG_CSR_OFFSET; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII || pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { pdata->mcx_mac_addr = pdata->base_addr + BLOCK_ETH_MAC_OFFSET; - pdata->mcx_mac_csr_addr = base_addr + BLOCK_ETH_MAC_CSR_OFFSET; + offset = (pdata->enet_id == XGENE_ENET1) ? + BLOCK_ETH_MAC_CSR_OFFSET : + X2_BLOCK_ETH_MAC_CSR_OFFSET; + pdata->mcx_mac_csr_addr = base_addr + offset; } else { pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET; pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET; @@ -1034,23 +1100,44 @@ static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) break; } - switch (pdata->port_id) { - case 0: - pdata->cpu_bufnum = START_CPU_BUFNUM_0; - pdata->eth_bufnum = START_ETH_BUFNUM_0; - pdata->bp_bufnum = START_BP_BUFNUM_0; - pdata->ring_num = START_RING_NUM_0; - break; - case 1: - pdata->cpu_bufnum = START_CPU_BUFNUM_1; - pdata->eth_bufnum = START_ETH_BUFNUM_1; - pdata->bp_bufnum = START_BP_BUFNUM_1; - pdata->ring_num = START_RING_NUM_1; - break; - default: - break; + if (pdata->enet_id == XGENE_ENET1) { + switch (pdata->port_id) { + case 0: + pdata->cpu_bufnum = START_CPU_BUFNUM_0; + pdata->eth_bufnum = START_ETH_BUFNUM_0; + pdata->bp_bufnum = START_BP_BUFNUM_0; + pdata->ring_num = START_RING_NUM_0; + break; + case 1: + pdata->cpu_bufnum = START_CPU_BUFNUM_1; + pdata->eth_bufnum = START_ETH_BUFNUM_1; + pdata->bp_bufnum = START_BP_BUFNUM_1; + pdata->ring_num = START_RING_NUM_1; + break; + default: + break; + } + pdata->ring_ops = &xgene_ring1_ops; + } else { + switch (pdata->port_id) { + case 0: + pdata->cpu_bufnum = X2_START_CPU_BUFNUM_0; + pdata->eth_bufnum = X2_START_ETH_BUFNUM_0; + pdata->bp_bufnum = X2_START_BP_BUFNUM_0; + pdata->ring_num = X2_START_RING_NUM_0; + break; + case 1: + pdata->cpu_bufnum = X2_START_CPU_BUFNUM_1; + pdata->eth_bufnum = X2_START_ETH_BUFNUM_1; + pdata->bp_bufnum = X2_START_BP_BUFNUM_1; + pdata->ring_num = X2_START_RING_NUM_1; + break; + default: + break; + } + pdata->rm = RM0; + pdata->ring_ops = &xgene_ring2_ops; } - } static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata) @@ -1086,6 +1173,9 @@ static int xgene_enet_probe(struct platform_device *pdev) struct xgene_enet_pdata *pdata; struct device *dev = &pdev->dev; struct xgene_mac_ops *mac_ops; +#ifdef CONFIG_OF + const struct of_device_id *of_id; +#endif int ret; ndev = alloc_etherdev(sizeof(struct xgene_enet_pdata)); @@ -1104,6 +1194,17 @@ static int xgene_enet_probe(struct platform_device *pdev) NETIF_F_GSO | NETIF_F_GRO; +#ifdef CONFIG_OF + of_id = of_match_device(xgene_enet_of_match, &pdev->dev); + if (of_id) { + pdata->enet_id = (enum xgene_enet_id)of_id->data; + if (!pdata->enet_id) { + free_netdev(ndev); + return -ENODEV; + } + } +#endif + ret = xgene_enet_get_resources(pdata); if (ret) goto err; @@ -1175,9 +1276,11 @@ MODULE_DEVICE_TABLE(acpi, xgene_enet_acpi_match); #ifdef CONFIG_OF static const struct of_device_id xgene_enet_of_match[] = { - {.compatible = "apm,xgene-enet",}, - {.compatible = "apm,xgene1-sgenet",}, - {.compatible = "apm,xgene1-xgenet",}, + {.compatible = "apm,xgene-enet", .data = (void *)XGENE_ENET1}, + {.compatible = "apm,xgene1-sgenet", .data = (void *)XGENE_ENET1}, + {.compatible = "apm,xgene1-xgenet", .data = (void *)XGENE_ENET1}, + {.compatible = "apm,xgene2-sgenet", .data = (void *)XGENE_ENET2}, + {.compatible = "apm,xgene2-xgenet", .data = (void *)XGENE_ENET2}, {}, }; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 8f3d232b09bc..1c85fc87703a 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -35,6 +35,7 @@ #include <linux/if_vlan.h> #include <linux/phy.h> #include "xgene_enet_hw.h" +#include "xgene_enet_ring2.h" #define XGENE_DRV_VERSION "v1.0" #define XGENE_ENET_MAX_MTU 1536 @@ -51,12 +52,26 @@ #define START_BP_BUFNUM_1 0x2A #define START_RING_NUM_1 264 +#define X2_START_CPU_BUFNUM_0 0 +#define X2_START_ETH_BUFNUM_0 0 +#define X2_START_BP_BUFNUM_0 0x20 +#define X2_START_RING_NUM_0 0 +#define X2_START_CPU_BUFNUM_1 0xc +#define X2_START_ETH_BUFNUM_1 0 +#define X2_START_BP_BUFNUM_1 0x20 +#define X2_START_RING_NUM_1 256 + #define IRQ_ID_SIZE 16 #define XGENE_MAX_TXC_RINGS 1 #define PHY_POLL_LINK_ON (10 * HZ) #define PHY_POLL_LINK_OFF (PHY_POLL_LINK_ON / 5) +enum xgene_enet_id { + XGENE_ENET1 = 1, + XGENE_ENET2 +}; + /* software context of a descriptor ring */ struct xgene_enet_desc_ring { struct net_device *ndev; @@ -68,10 +83,12 @@ struct xgene_enet_desc_ring { u16 irq; char irq_name[IRQ_ID_SIZE]; u32 size; - u32 state[NUM_RING_CONFIG]; + u32 state[X2_NUM_RING_CONFIG]; void __iomem *cmd_base; void __iomem *cmd; dma_addr_t dma; + dma_addr_t irq_mbox_dma; + void *irq_mbox_addr; u16 dst_ring_num; u8 nbufpool; struct sk_buff *(*rx_skb); @@ -105,6 +122,15 @@ struct xgene_port_ops { void (*shutdown)(struct xgene_enet_pdata *pdata); }; +struct xgene_ring_ops { + u8 num_ring_config; + u8 num_ring_id_shift; + struct xgene_enet_desc_ring * (*setup)(struct xgene_enet_desc_ring *); + void (*clear)(struct xgene_enet_desc_ring *); + void (*wr_cmd)(struct xgene_enet_desc_ring *, int); + u32 (*len)(struct xgene_enet_desc_ring *); +}; + /* ethernet private data */ struct xgene_enet_pdata { struct net_device *ndev; @@ -113,6 +139,7 @@ struct xgene_enet_pdata { int phy_speed; struct clk *clk; struct platform_device *pdev; + enum xgene_enet_id enet_id; struct xgene_enet_desc_ring *tx_ring; struct xgene_enet_desc_ring *rx_ring; char *dev_name; @@ -136,6 +163,7 @@ struct xgene_enet_pdata { struct rtnl_link_stats64 stats; struct xgene_mac_ops *mac_ops; struct xgene_port_ops *port_ops; + struct xgene_ring_ops *ring_ops; struct delayed_work link_work; u32 port_id; u8 cpu_bufnum; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c new file mode 100644 index 000000000000..0b6896bb351e --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c @@ -0,0 +1,200 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2015, Applied Micro Circuits Corporation + * Author: Iyappan Subramanian <isubramanian@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "xgene_enet_main.h" +#include "xgene_enet_hw.h" +#include "xgene_enet_ring2.h" + +static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + u64 addr = ring->dma; + + if (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU) { + ring_cfg[0] |= SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK); + ring_cfg[3] |= SET_BIT(X2_DEQINTEN); + } + ring_cfg[0] |= SET_VAL(X2_CFGCRID, 1); + + addr >>= 8; + ring_cfg[2] |= QCOHERENT | SET_VAL(RINGADDRL, addr); + + addr >>= 27; + ring_cfg[3] |= SET_VAL(RINGSIZE, ring->cfgsize) + | ACCEPTLERR + | SET_VAL(RINGADDRH, addr); + ring_cfg[4] |= SET_VAL(X2_SELTHRSH, 1); + ring_cfg[5] |= SET_BIT(X2_QBASE_AM) | SET_BIT(X2_MSG_AM); +} + +static void xgene_enet_ring_set_type(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + bool is_bufpool; + u32 val; + + is_bufpool = xgene_enet_is_bufpool(ring->id); + val = (is_bufpool) ? RING_BUFPOOL : RING_REGULAR; + ring_cfg[4] |= SET_VAL(X2_RINGTYPE, val); + if (is_bufpool) + ring_cfg[3] |= SET_VAL(RINGMODE, BUFPOOL_MODE); +} + +static void xgene_enet_ring_set_recombbuf(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + + ring_cfg[3] |= RECOMBBUF; + ring_cfg[4] |= SET_VAL(X2_RECOMTIMEOUT, 0x7); +} + +static void xgene_enet_ring_wr32(struct xgene_enet_desc_ring *ring, + u32 offset, u32 data) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + + iowrite32(data, pdata->ring_csr_addr + offset); +} + +static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + int i; + + xgene_enet_ring_wr32(ring, CSR_RING_CONFIG, ring->num); + for (i = 0; i < pdata->ring_ops->num_ring_config; i++) { + xgene_enet_ring_wr32(ring, CSR_RING_WR_BASE + (i * 4), + ring->state[i]); + } +} + +static void xgene_enet_clr_ring_state(struct xgene_enet_desc_ring *ring) +{ + memset(ring->state, 0, sizeof(ring->state)); + xgene_enet_write_ring_state(ring); +} + +static void xgene_enet_set_ring_state(struct xgene_enet_desc_ring *ring) +{ + enum xgene_ring_owner owner; + + xgene_enet_ring_set_type(ring); + + owner = xgene_enet_ring_owner(ring->id); + if (owner == RING_OWNER_ETH0 || owner == RING_OWNER_ETH1) + xgene_enet_ring_set_recombbuf(ring); + + xgene_enet_ring_init(ring); + xgene_enet_write_ring_state(ring); +} + +static void xgene_enet_set_ring_id(struct xgene_enet_desc_ring *ring) +{ + u32 ring_id_val, ring_id_buf; + bool is_bufpool; + + if (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU) + return; + + is_bufpool = xgene_enet_is_bufpool(ring->id); + + ring_id_val = ring->id & GENMASK(9, 0); + ring_id_val |= OVERWRITE; + + ring_id_buf = (ring->num << 9) & GENMASK(18, 9); + ring_id_buf |= PREFETCH_BUF_EN; + if (is_bufpool) + ring_id_buf |= IS_BUFFER_POOL; + + xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id_val); + xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, ring_id_buf); +} + +static void xgene_enet_clr_desc_ring_id(struct xgene_enet_desc_ring *ring) +{ + u32 ring_id; + + ring_id = ring->id | OVERWRITE; + xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id); + xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, 0); +} + +static struct xgene_enet_desc_ring *xgene_enet_setup_ring( + struct xgene_enet_desc_ring *ring) +{ + bool is_bufpool; + u32 addr, i; + + xgene_enet_clr_ring_state(ring); + xgene_enet_set_ring_state(ring); + xgene_enet_set_ring_id(ring); + + ring->slots = xgene_enet_get_numslots(ring->id, ring->size); + + is_bufpool = xgene_enet_is_bufpool(ring->id); + if (is_bufpool || xgene_enet_ring_owner(ring->id) != RING_OWNER_CPU) + return ring; + + addr = CSR_VMID0_INTR_MBOX + (4 * (ring->id & RING_BUFNUM_MASK)); + xgene_enet_ring_wr32(ring, addr, ring->irq_mbox_dma >> 10); + + for (i = 0; i < ring->slots; i++) + xgene_enet_mark_desc_slot_empty(&ring->raw_desc[i]); + + return ring; +} + +static void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring) +{ + xgene_enet_clr_desc_ring_id(ring); + xgene_enet_clr_ring_state(ring); +} + +static void xgene_enet_wr_cmd(struct xgene_enet_desc_ring *ring, int count) +{ + u32 data = 0; + + if (xgene_enet_ring_owner(ring->id) == RING_OWNER_CPU) { + data = SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK) | + INTR_CLEAR; + } + data |= (count & GENMASK(16, 0)); + + iowrite32(data, ring->cmd); +} + +static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) +{ + u32 __iomem *cmd_base = ring->cmd_base; + u32 ring_state, num_msgs; + + ring_state = ioread32(&cmd_base[1]); + num_msgs = GET_VAL(X2_NUMMSGSINQ, ring_state); + + return num_msgs; +} + +struct xgene_ring_ops xgene_ring2_ops = { + .num_ring_config = X2_NUM_RING_CONFIG, + .num_ring_id_shift = 13, + .setup = xgene_enet_setup_ring, + .clear = xgene_enet_clear_ring, + .wr_cmd = xgene_enet_wr_cmd, + .len = xgene_enet_ring_len, +}; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h new file mode 100644 index 000000000000..8b235db23c42 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.h @@ -0,0 +1,49 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2015, Applied Micro Circuits Corporation + * Author: Iyappan Subramanian <isubramanian@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __XGENE_ENET_RING2_H__ +#define __XGENE_ENET_RING2_H__ + +#include "xgene_enet_main.h" + +#define X2_NUM_RING_CONFIG 6 + +#define INTR_MBOX_SIZE 1024 +#define CSR_VMID0_INTR_MBOX 0x0270 +#define INTR_CLEAR BIT(23) + +#define X2_MSG_AM_POS 10 +#define X2_QBASE_AM_POS 11 +#define X2_INTLINE_POS 24 +#define X2_INTLINE_LEN 5 +#define X2_CFGCRID_POS 29 +#define X2_CFGCRID_LEN 3 +#define X2_SELTHRSH_POS 7 +#define X2_SELTHRSH_LEN 3 +#define X2_RINGTYPE_POS 23 +#define X2_RINGTYPE_LEN 2 +#define X2_DEQINTEN_POS 29 +#define X2_RECOMTIMEOUT_POS 0 +#define X2_RECOMTIMEOUT_LEN 7 +#define X2_NUMMSGSINQ_POS 0 +#define X2_NUMMSGSINQ_LEN 17 + +extern struct xgene_ring_ops xgene_ring2_ops; + +#endif /* __XGENE_ENET_RING2_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c index f27fb6f2a93b..ff240b3cb2b8 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c @@ -21,6 +21,7 @@ #include "xgene_enet_main.h" #include "xgene_enet_hw.h" #include "xgene_enet_sgmac.h" +#include "xgene_enet_xgmac.h" static void xgene_enet_wr_csr(struct xgene_enet_pdata *p, u32 offset, u32 val) { @@ -39,6 +40,14 @@ static void xgene_enet_wr_diag_csr(struct xgene_enet_pdata *p, iowrite32(val, p->eth_diag_csr_addr + offset); } +static void xgene_enet_wr_mcx_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 val) +{ + void __iomem *addr = pdata->mcx_mac_csr_addr + offset; + + iowrite32(val, addr); +} + static bool xgene_enet_wr_indirect(struct xgene_indirect_ctl *ctl, u32 wr_addr, u32 wr_data) { @@ -140,8 +149,9 @@ static int xgene_enet_ecc_init(struct xgene_enet_pdata *p) static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *p) { - u32 val = 0xffffffff; + u32 val; + val = (p->enet_id == XGENE_ENET1) ? 0xffffffff : 0; xgene_enet_wr_ring_if(p, ENET_CFGSSQMIWQASSOC_ADDR, val); xgene_enet_wr_ring_if(p, ENET_CFGSSQMIFPQASSOC_ADDR, val); } @@ -227,6 +237,8 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p) { u32 data, loop = 10; u32 offset = p->port_id * 4; + u32 enet_spare_cfg_reg, rsif_config_reg; + u32 cfg_bypass_reg, rx_dv_gate_reg; xgene_sgmac_reset(p); @@ -239,7 +251,7 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p) SGMII_STATUS_ADDR >> 2); if ((data & AUTO_NEG_COMPLETE) && (data & LINK_STATUS)) break; - usleep_range(10, 20); + usleep_range(1000, 2000); } if (!(data & AUTO_NEG_COMPLETE) || !(data & LINK_STATUS)) netdev_err(p->ndev, "Auto-negotiation failed\n"); @@ -249,33 +261,38 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p) xgene_enet_wr_mac(p, MAC_CONFIG_2_ADDR, data | FULL_DUPLEX2); xgene_enet_wr_mac(p, INTERFACE_CONTROL_ADDR, ENET_GHD_MODE); - data = xgene_enet_rd_csr(p, ENET_SPARE_CFG_REG_ADDR); + if (p->enet_id == XGENE_ENET1) { + enet_spare_cfg_reg = ENET_SPARE_CFG_REG_ADDR; + rsif_config_reg = RSIF_CONFIG_REG_ADDR; + cfg_bypass_reg = CFG_BYPASS_ADDR; + rx_dv_gate_reg = SG_RX_DV_GATE_REG_0_ADDR; + } else { + enet_spare_cfg_reg = XG_ENET_SPARE_CFG_REG_ADDR; + rsif_config_reg = XG_RSIF_CONFIG_REG_ADDR; + cfg_bypass_reg = XG_CFG_BYPASS_ADDR; + rx_dv_gate_reg = XG_MCX_RX_DV_GATE_REG_0_ADDR; + } + + data = xgene_enet_rd_csr(p, enet_spare_cfg_reg); data |= MPA_IDLE_WITH_QMI_EMPTY; - xgene_enet_wr_csr(p, ENET_SPARE_CFG_REG_ADDR, data); + xgene_enet_wr_csr(p, enet_spare_cfg_reg, data); xgene_sgmac_set_mac_addr(p); - data = xgene_enet_rd_csr(p, DEBUG_REG_ADDR); - data |= CFG_BYPASS_UNISEC_TX | CFG_BYPASS_UNISEC_RX; - xgene_enet_wr_csr(p, DEBUG_REG_ADDR, data); - /* Adjust MDC clock frequency */ data = xgene_enet_rd_mac(p, MII_MGMT_CONFIG_ADDR); MGMT_CLOCK_SEL_SET(&data, 7); xgene_enet_wr_mac(p, MII_MGMT_CONFIG_ADDR, data); /* Enable drop if bufpool not available */ - data = xgene_enet_rd_csr(p, RSIF_CONFIG_REG_ADDR); + data = xgene_enet_rd_csr(p, rsif_config_reg); data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; - xgene_enet_wr_csr(p, RSIF_CONFIG_REG_ADDR, data); - - /* Rtype should be copied from FP */ - xgene_enet_wr_csr(p, RSIF_RAM_DBG_REG0_ADDR, 0); + xgene_enet_wr_csr(p, rsif_config_reg, data); /* Bypass traffic gating */ - xgene_enet_wr_csr(p, CFG_LINK_AGGR_RESUME_0_ADDR + offset, TX_PORT0); - xgene_enet_wr_csr(p, CFG_BYPASS_ADDR, RESUME_TX); - xgene_enet_wr_csr(p, SG_RX_DV_GATE_REG_0_ADDR + offset, RESUME_RX0); + xgene_enet_wr_csr(p, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x84); + xgene_enet_wr_csr(p, cfg_bypass_reg, RESUME_TX); + xgene_enet_wr_mcx_csr(p, rx_dv_gate_reg + offset, RESUME_RX0); } static void xgene_sgmac_rxtx(struct xgene_enet_pdata *p, u32 bits, bool set) @@ -331,14 +348,23 @@ static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p, u32 dst_ring_num, u16 bufpool_id) { u32 data, fpsel; + u32 cle_bypass_reg0, cle_bypass_reg1; u32 offset = p->port_id * MAC_OFFSET; + if (p->enet_id == XGENE_ENET1) { + cle_bypass_reg0 = CLE_BYPASS_REG0_0_ADDR; + cle_bypass_reg1 = CLE_BYPASS_REG1_0_ADDR; + } else { + cle_bypass_reg0 = XCLE_BYPASS_REG0_ADDR; + cle_bypass_reg1 = XCLE_BYPASS_REG1_ADDR; + } + data = CFG_CLE_BYPASS_EN0; - xgene_enet_wr_csr(p, CLE_BYPASS_REG0_0_ADDR + offset, data); + xgene_enet_wr_csr(p, cle_bypass_reg0 + offset, data); fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20; data = CFG_CLE_DSTQID0(dst_ring_num) | CFG_CLE_FPSEL0(fpsel); - xgene_enet_wr_csr(p, CLE_BYPASS_REG1_0_ADDR + offset, data); + xgene_enet_wr_csr(p, cle_bypass_reg1 + offset, data); } static void xgene_enet_shutdown(struct xgene_enet_pdata *p) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index a18a9d1f1143..27ba2fe3fca6 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -122,7 +122,6 @@ static bool xgene_enet_rd_indirect(void __iomem *addr, void __iomem *rd, return true; } - static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata, u32 rd_addr, u32 *rd_data) { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h index 5a5296a6d1df..bf0a99435737 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h @@ -21,9 +21,28 @@ #ifndef __XGENE_ENET_XGMAC_H__ #define __XGENE_ENET_XGMAC_H__ +#define X2_BLOCK_ETH_MAC_CSR_OFFSET 0x3000 #define BLOCK_AXG_MAC_OFFSET 0x0800 #define BLOCK_AXG_MAC_CSR_OFFSET 0x2000 +#define XGENET_CONFIG_REG_ADDR 0x20 +#define XGENET_SRST_ADDR 0x00 +#define XGENET_CLKEN_ADDR 0x08 + +#define CSR_CLK BIT(0) +#define XGENET_CLK BIT(1) +#define PCS_CLK BIT(3) +#define AN_REF_CLK BIT(4) +#define AN_CLK BIT(5) +#define AD_CLK BIT(6) + +#define CSR_RST BIT(0) +#define XGENET_RST BIT(1) +#define PCS_RST BIT(3) +#define AN_REF_RST BIT(4) +#define AN_RST BIT(5) +#define AD_RST BIT(6) + #define AXGMAC_CONFIG_0 0x0000 #define AXGMAC_CONFIG_1 0x0004 #define HSTMACRST BIT(31) @@ -38,6 +57,7 @@ #define HSTMACADR_MSW_ADDR 0x0014 #define HSTMAXFRAME_LENGTH_ADDR 0x0020 +#define XG_MCX_RX_DV_GATE_REG_0_ADDR 0x0004 #define XG_RSIF_CONFIG_REG_ADDR 0x00a0 #define XCLE_BYPASS_REG0_ADDR 0x0160 #define XCLE_BYPASS_REG1_ADDR 0x0164 diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 783543ad1fcf..084a50a555de 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -456,6 +456,67 @@ static int bcm_sysport_set_wol(struct net_device *dev, return 0; } +static int bcm_sysport_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + u32 reg; + + reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0)); + + ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / 1000; + ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK; + + reg = rdma_readl(priv, RDMA_MBDONE_INTR); + + ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 / 1000; + ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK; + + return 0; +} + +static int bcm_sysport_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + unsigned int i; + u32 reg; + + /* Base system clock is 125Mhz, DMA timeout is this reference clock + * divided by 1024, which yield roughly 8.192 us, our maximum value has + * to fit in the RING_TIMEOUT_MASK (16 bits). + */ + if (ec->tx_max_coalesced_frames > RING_INTR_THRESH_MASK || + ec->tx_coalesce_usecs > (RING_TIMEOUT_MASK * 8) + 1 || + ec->rx_max_coalesced_frames > RDMA_INTR_THRESH_MASK || + ec->rx_coalesce_usecs > (RDMA_TIMEOUT_MASK * 8) + 1) + return -EINVAL; + + if ((ec->tx_coalesce_usecs == 0 && ec->tx_max_coalesced_frames == 0) || + (ec->rx_coalesce_usecs == 0 && ec->rx_max_coalesced_frames == 0)) + return -EINVAL; + + for (i = 0; i < dev->num_tx_queues; i++) { + reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i)); + reg &= ~(RING_INTR_THRESH_MASK | + RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT); + reg |= ec->tx_max_coalesced_frames; + reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) << + RING_TIMEOUT_SHIFT; + tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i)); + } + + reg = rdma_readl(priv, RDMA_MBDONE_INTR); + reg &= ~(RDMA_INTR_THRESH_MASK | + RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT); + reg |= ec->rx_max_coalesced_frames; + reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) << + RDMA_TIMEOUT_SHIFT; + rdma_writel(priv, reg, RDMA_MBDONE_INTR); + + return 0; +} + static void bcm_sysport_free_cb(struct bcm_sysport_cb *cb) { dev_kfree_skb_any(cb->skb); @@ -1641,6 +1702,8 @@ static struct ethtool_ops bcm_sysport_ethtool_ops = { .get_sset_count = bcm_sysport_get_sset_count, .get_wol = bcm_sysport_get_wol, .set_wol = bcm_sysport_set_wol, + .get_coalesce = bcm_sysport_get_coalesce, + .set_coalesce = bcm_sysport_set_coalesce, }; static const struct net_device_ops bcm_sysport_netdev_ops = { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index e2c043eabbf3..42a4b4a0bc14 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -292,7 +292,7 @@ struct bcm_rsb { #define RDMA_END_ADDR_LO 0x102c #define RDMA_MBDONE_INTR 0x1030 -#define RDMA_INTR_THRESH_MASK 0xff +#define RDMA_INTR_THRESH_MASK 0x1ff #define RDMA_TIMEOUT_SHIFT 16 #define RDMA_TIMEOUT_MASK 0xffff diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ec56a9b65dc3..2ef202d10948 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -662,7 +662,7 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data) { if (fp->rx_frag_size) - put_page(virt_to_head_page(data)); + skb_free_frag(data); else kfree(data); } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 069952fa5d64..73c934cf6c61 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6618,7 +6618,7 @@ static void tg3_tx(struct tg3_napi *tnapi) static void tg3_frag_free(bool is_frag, void *data) { if (is_frag) - put_page(virt_to_head_page(data)); + skb_free_frag(data); else kfree(data); } diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 61aa570aad9a..e7c10b0addb5 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -54,6 +54,8 @@ #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1)) #define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1)) +#define GEM_MTU_MIN_SIZE 68 + /* * Graceful stop timeouts in us. We should allow up to * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions) @@ -782,7 +784,7 @@ static int gem_rx(struct macb *bp, int budget) } /* now everything is ready for receiving packet */ bp->rx_skbuff[entry] = NULL; - len = MACB_BFEXT(RX_FRMLEN, ctrl); + len = ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); @@ -828,7 +830,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, struct macb_dma_desc *desc; desc = macb_rx_desc(bp, last_frag); - len = MACB_BFEXT(RX_FRMLEN, desc->ctrl); + len = desc->ctrl & bp->rx_frm_len_mask; netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n", macb_rx_ring_wrap(first_frag), @@ -1642,7 +1644,10 @@ static void macb_init_hw(struct macb *bp) config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */ config |= MACB_BIT(PAE); /* PAuse Enable */ config |= MACB_BIT(DRFCS); /* Discard Rx FCS */ - config |= MACB_BIT(BIG); /* Receive oversized frames */ + if (bp->caps | MACB_CAPS_JUMBO) + config |= MACB_BIT(JFRAME); /* Enable jumbo frames */ + else + config |= MACB_BIT(BIG); /* Receive oversized frames */ if (bp->dev->flags & IFF_PROMISC) config |= MACB_BIT(CAF); /* Copy All Frames */ else if (macb_is_gem(bp) && bp->dev->features & NETIF_F_RXCSUM) @@ -1651,8 +1656,13 @@ static void macb_init_hw(struct macb *bp) config |= MACB_BIT(NBC); /* No BroadCast */ config |= macb_dbw(bp); macb_writel(bp, NCFGR, config); + if ((bp->caps | MACB_CAPS_JUMBO) && bp->jumbo_max_len) + gem_writel(bp, JML, bp->jumbo_max_len); bp->speed = SPEED_10; bp->duplex = DUPLEX_HALF; + bp->rx_frm_len_mask = MACB_RX_FRMLEN_MASK; + if (bp->caps | MACB_CAPS_JUMBO) + bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK; macb_configure_dma(bp); @@ -1856,6 +1866,26 @@ static int macb_close(struct net_device *dev) return 0; } +static int macb_change_mtu(struct net_device *dev, int new_mtu) +{ + struct macb *bp = netdev_priv(dev); + u32 max_mtu; + + if (netif_running(dev)) + return -EBUSY; + + max_mtu = ETH_DATA_LEN; + if (bp->caps | MACB_CAPS_JUMBO) + max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN; + + if ((new_mtu > max_mtu) || (new_mtu < GEM_MTU_MIN_SIZE)) + return -EINVAL; + + dev->mtu = new_mtu; + + return 0; +} + static void gem_update_stats(struct macb *bp) { int i; @@ -2132,7 +2162,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_get_stats = macb_get_stats, .ndo_do_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, + .ndo_change_mtu = macb_change_mtu, .ndo_set_mac_address = eth_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = macb_poll_controller, @@ -2693,6 +2723,15 @@ static const struct macb_config emac_config = { .init = at91ether_init, }; +static const struct macb_config zynqmp_config = { + .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE | + MACB_CAPS_JUMBO, + .dma_burst_length = 16, + .clk_init = macb_clk_init, + .init = macb_init, + .jumbo_max_len = 10240, +}; + static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,at32ap7000-macb" }, { .compatible = "cdns,at91sam9260-macb", .data = &at91sam9260_config }, @@ -2703,6 +2742,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config }, { .compatible = "cdns,at91rm9200-emac", .data = &emac_config }, { .compatible = "cdns,emac", .data = &emac_config }, + { .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, macb_dt_ids); @@ -2771,6 +2811,10 @@ static int macb_probe(struct platform_device *pdev) bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; + if (macb_config->jumbo_max_len) { + bp->jumbo_max_len = macb_config->jumbo_max_len; + } + spin_lock_init(&bp->lock); /* setup capabilities */ diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index eb7d76f7bf6a..7d4ef513df75 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -71,6 +71,7 @@ #define GEM_NCFGR 0x0004 /* Network Config */ #define GEM_USRIO 0x000c /* User IO */ #define GEM_DMACFG 0x0010 /* DMA Configuration */ +#define GEM_JML 0x0048 /* Jumbo Max Length */ #define GEM_HRB 0x0080 /* Hash Bottom */ #define GEM_HRT 0x0084 /* Hash Top */ #define GEM_SA1B 0x0088 /* Specific1 Bottom */ @@ -397,6 +398,7 @@ #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 #define MACB_CAPS_MACB_IS_GEM 0x80000000 +#define MACB_CAPS_JUMBO 0x00000008 /* Bit manipulation macros */ #define MACB_BIT(name) \ @@ -514,6 +516,9 @@ struct macb_dma_desc { #define MACB_RX_BROADCAST_OFFSET 31 #define MACB_RX_BROADCAST_SIZE 1 +#define MACB_RX_FRMLEN_MASK 0xFFF +#define MACB_RX_JFRMLEN_MASK 0x3FFF + /* RX checksum offload disabled: bit 24 clear in NCFGR */ #define GEM_RX_TYPEID_MATCH_OFFSET 22 #define GEM_RX_TYPEID_MATCH_SIZE 2 @@ -757,6 +762,7 @@ struct macb_config { int (*clk_init)(struct platform_device *pdev, struct clk **pclk, struct clk **hclk, struct clk **tx_clk); int (*init)(struct platform_device *pdev); + int jumbo_max_len; }; struct macb_queue { @@ -826,6 +832,9 @@ struct macb { unsigned int max_tx_length; u64 ethtool_stats[GEM_STATS_LEN]; + + unsigned int rx_frm_len_mask; + unsigned int jumbo_max_len; }; static inline bool macb_is_gem(struct macb *bp) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 524d11098c56..932ab3b72a4d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -328,6 +328,17 @@ struct adapter_params { unsigned int max_ird_adapter; /* Max read depth per adapter */ }; +/* State needed to monitor the forward progress of SGE Ingress DMA activities + * and possible hangs. + */ +struct sge_idma_monitor_state { + unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */ + unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */ + unsigned int idma_state[2]; /* IDMA Hang detect state */ + unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */ + unsigned int idma_warn[2]; /* time to warning in HZ */ +}; + #include "t4fw_api.h" #define FW_VERSION(chip) ( \ @@ -630,12 +641,7 @@ struct sge { u32 fl_align; /* response queue message alignment */ u32 fl_starve_thres; /* Free List starvation threshold */ - /* State variables for detecting an SGE Ingress DMA hang */ - unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */ - unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */ - unsigned int idma_state[2]; /* SGE IDMA Hang detect state */ - unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */ - + struct sge_idma_monitor_state idma_monitor; unsigned int egr_start; unsigned int egr_sz; unsigned int ingr_start; @@ -1055,7 +1061,7 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb); int t4_ofld_send(struct adapter *adap, struct sk_buff *skb); int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd); + struct sge_fl *fl, rspq_handler_t hnd, int cong); int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *netdevq, unsigned int iqid); @@ -1195,12 +1201,15 @@ int t4_init_devlog_params(struct adapter *adapter); int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); +int t4_init_rss_mode(struct adapter *adap, int mbox); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); void t4_fatal_err(struct adapter *adapter); int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, int start, int n, const u16 *rspq, unsigned int nrspq); int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, unsigned int flags); +int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid, + unsigned int flags, unsigned int defq); int t4_read_rss(struct adapter *adapter, u16 *entries); void t4_read_rss_key(struct adapter *adapter, u32 *key); void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx); @@ -1215,6 +1224,7 @@ int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); +unsigned int t4_get_mps_bg_map(struct adapter *adapter, int idx); void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data, @@ -1310,4 +1320,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_free_mem(void *addr); +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma); +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 10d82b51d7ef..401272a2691e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -578,7 +578,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq; c->rx_coalesce_usecs = qtimer_val(adap, rq); - c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ? + c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN_F) ? adap->sge.counter_val[rq->pktcnt_idx] : 0; c->use_adaptive_rx_coalesce = get_adaptive_rx_setting(dev); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 803d91beec6f..5aecf69efe56 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -856,23 +856,39 @@ static void free_msix_queue_irqs(struct adapter *adap) * * Sets up the portion of the HW RSS table for the port's VI to distribute * packets to the Rx queues in @queues. + * Should never be called before setting up sge eth rx queues */ int cxgb4_write_rss(const struct port_info *pi, const u16 *queues) { u16 *rss; int i, err; - const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset]; + struct adapter *adapter = pi->adapter; + const struct sge_eth_rxq *rxq; + rxq = &adapter->sge.ethrxq[pi->first_qset]; rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL); if (!rss) return -ENOMEM; /* map the queue indices to queue ids */ for (i = 0; i < pi->rss_size; i++, queues++) - rss[i] = q[*queues].rspq.abs_id; + rss[i] = rxq[*queues].rspq.abs_id; - err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0, + err = t4_config_rss_range(adapter, adapter->fn, pi->viid, 0, pi->rss_size, rss, pi->rss_size); + /* If Tunnel All Lookup isn't specified in the global RSS + * Configuration, then we need to specify a default Ingress + * Queue for any ingress packets which aren't hashed. We'll + * use our first ingress queue ... + */ + if (!err) + err = t4_config_vi_rss(adapter, adapter->mbox, pi->viid, + FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F | + FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F | + FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F | + FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F | + FW_RSS_VI_CONFIG_CMD_UDPEN_F, + rss[0]); kfree(rss); return err; } @@ -885,11 +901,15 @@ int cxgb4_write_rss(const struct port_info *pi, const u16 *queues) */ static int setup_rss(struct adapter *adap) { - int i, err; + int i, j, err; for_each_port(adap, i) { const struct port_info *pi = adap2pinfo(adap, i); + /* Fill default values with equal distribution */ + for (j = 0; j < pi->rss_size; j++) + pi->rss[j] = j % pi->nqsets; + err = cxgb4_write_rss(pi, pi->rss); if (err) return err; @@ -977,7 +997,7 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i / per_chan], msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler); + uldrx_handler, 0); if (err) return err; memset(&q->stats, 0, sizeof(q->stats)); @@ -1007,7 +1027,7 @@ static int setup_sge_queues(struct adapter *adap) msi_idx = 1; /* vector 0 is for non-queue interrupts */ else { err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, - NULL, NULL); + NULL, NULL, -1); if (err) return err; msi_idx = -((int)s->intrq.abs_id + 1); @@ -1027,7 +1047,7 @@ static int setup_sge_queues(struct adapter *adap) * new/deleted queues. */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], - msi_idx, NULL, fwevtq_handler); + msi_idx, NULL, fwevtq_handler, -1); if (err) { freeout: t4_free_sge_resources(adap); return err; @@ -1044,7 +1064,9 @@ freeout: t4_free_sge_resources(adap); msi_idx++; err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx, &q->fl, - t4_ethrx_handler); + t4_ethrx_handler, + t4_get_mps_bg_map(adap, + pi->tx_chan)); if (err) goto freeout; q->rspq.idx = j; @@ -1398,7 +1420,7 @@ int cxgb4_set_rspq_intr_params(struct sge_rspq *q, } us = us == 0 ? 6 : closest_timer(&adap->sge, us); - q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0); + q->intr_params = QINTR_TIMER_IDX_V(us) | QINTR_CNT_EN_V(cnt > 0); return 0; } @@ -2432,6 +2454,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) lli.max_ordird_qp = adap->params.max_ordird_qp; lli.max_ird_adapter = adap->params.max_ird_adapter; lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; + lli.nodeid = dev_to_node(adap->pdev_dev); handle = ulds[uld].add(&lli); if (IS_ERR(handle)) { @@ -4340,7 +4363,12 @@ static int enable_msix(struct adapter *adap) static int init_rss(struct adapter *adap) { - unsigned int i, j; + unsigned int i; + int err; + + err = t4_init_rss_mode(adap, adap->mbox); + if (err) + return err; for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); @@ -4348,8 +4376,6 @@ static int init_rss(struct adapter *adap) pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL); if (!pi->rss) return -ENOMEM; - for (j = 0; j < pi->rss_size; j++) - pi->rss[j] = ethtool_rxfh_indir_default(j, pi->nqsets); } return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 78ab4d406ce2..df34293f35e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -264,6 +264,7 @@ struct cxgb4_lld_info { unsigned int max_ordird_qp; /* Max ORD/IRD depth per RDMA QP */ unsigned int max_ird_adapter; /* Max IRD memory per adapter */ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + int nodeid; /* device numa node id */ }; struct cxgb4_uld_info { diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 0d2eddab04ef..dd18fcb644f9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -100,16 +100,6 @@ */ #define TX_QCHECK_PERIOD (HZ / 2) -/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate - * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA - * State Machines in the same state for this amount of time (in HZ) then we'll - * issue a warning about a potential hang. We'll repeat the warning as the - * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till - * the situation clears. If the situation clears, we'll note that as well. - */ -#define SGE_IDMA_WARN_THRESH (1 * HZ) -#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD) - /* * Max number of Tx descriptors to be reclaimed by the Tx timer. */ @@ -540,6 +530,10 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) val = PIDX_T5_V(q->pend_cred / 8) | DBTYPE_F; val |= DBPRIO_F; + + /* Make sure all memory writes to the Free List queue are + * committed before we tell the hardware about them. + */ wmb(); /* If we don't have access to the new User Doorbell (T5+), use @@ -930,7 +924,10 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) */ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { - wmb(); /* write descriptors before telling HW */ + /* Make sure that all writes to the TX Descriptors are committed + * before we tell the hardware about them. + */ + wmb(); /* If we don't have access to the new User Doorbell (T5+), use the old * doorbell mechanism; otherwise use the new BAR2 mechanism. @@ -1047,7 +1044,7 @@ nocsum: /* * unknown protocol, disable HW csum * and hope a bad packet is detected */ - return TXPKT_L4CSUM_DIS; + return TXPKT_L4CSUM_DIS_F; } } else { /* @@ -1064,14 +1061,15 @@ nocsum: /* } if (likely(csum_type >= TX_CSUM_TCPIP)) - return TXPKT_CSUM_TYPE(csum_type) | - TXPKT_IPHDR_LEN(skb_network_header_len(skb)) | - TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_IPHDR_LEN_V(skb_network_header_len(skb)) | + TXPKT_ETHHDR_LEN_V(skb_network_offset(skb) - ETH_HLEN); else { int start = skb_transport_offset(skb); - return TXPKT_CSUM_TYPE(csum_type) | TXPKT_CSUM_START(start) | - TXPKT_CSUM_LOC(start + skb->csum_offset); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_CSUM_START_V(start) | + TXPKT_CSUM_LOC_V(start + skb->csum_offset); } } @@ -1112,11 +1110,11 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, return -ENOTSUPP; /* FC CRC offload */ - *cntrl = TXPKT_CSUM_TYPE(TX_CSUM_FCOE) | - TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS | - TXPKT_CSUM_START(CXGB_FCOE_TXPKT_CSUM_START) | - TXPKT_CSUM_END(CXGB_FCOE_TXPKT_CSUM_END) | - TXPKT_CSUM_LOC(CXGB_FCOE_TXPKT_CSUM_END); + *cntrl = TXPKT_CSUM_TYPE_V(TX_CSUM_FCOE) | + TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F | + TXPKT_CSUM_START_V(CXGB_FCOE_TXPKT_CSUM_START) | + TXPKT_CSUM_END_V(CXGB_FCOE_TXPKT_CSUM_END) | + TXPKT_CSUM_LOC_V(CXGB_FCOE_TXPKT_CSUM_END); return 0; } #endif /* CONFIG_CHELSIO_T4_FCOE */ @@ -1130,7 +1128,6 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap, */ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - int len; u32 wr_mid; u64 cntrl, *end; int qidx, credits; @@ -1143,6 +1140,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) const struct skb_shared_info *ssi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; bool immediate = false; + int len, max_pkt_len; #ifdef CONFIG_CHELSIO_T4_FCOE int err; #endif /* CONFIG_CHELSIO_T4_FCOE */ @@ -1156,13 +1154,20 @@ out_free: dev_kfree_skb_any(skb); return NETDEV_TX_OK; } + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + pi = netdev_priv(dev); adap = pi->adapter; qidx = skb_get_queue_mapping(skb); q = &adap->sge.ethtxq[qidx + pi->first_qset]; reclaim_completed_tx(adap, &q->q, true); - cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; + cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; #ifdef CONFIG_CHELSIO_T4_FCOE err = cxgb_fcoe_offload(skb, adap, pi, &cntrl); @@ -1213,23 +1218,23 @@ out_free: dev_kfree_skb_any(skb); len += sizeof(*lso); wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | FW_WR_IMMDLEN_V(len)); - lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) | - LSO_FIRST_SLICE | LSO_LAST_SLICE | - LSO_IPV6(v6) | - LSO_ETHHDR_LEN(eth_xtra_len / 4) | - LSO_IPHDR_LEN(l3hdr_len / 4) | - LSO_TCPHDR_LEN(tcp_hdr(skb)->doff)); + lso->c.lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) | + LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F | + LSO_IPV6_V(v6) | + LSO_ETHHDR_LEN_V(eth_xtra_len / 4) | + LSO_IPHDR_LEN_V(l3hdr_len / 4) | + LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff)); lso->c.ipid_ofst = htons(0); lso->c.mss = htons(ssi->gso_size); lso->c.seqno_offset = htonl(0); if (is_t4(adap->params.chip)) lso->c.len = htonl(skb->len); else - lso->c.len = htonl(LSO_T5_XFER_SIZE(skb->len)); + lso->c.len = htonl(LSO_T5_XFER_SIZE_V(skb->len)); cpl = (void *)(lso + 1); - cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | - TXPKT_IPHDR_LEN(l3hdr_len) | - TXPKT_ETHHDR_LEN(eth_xtra_len); + cntrl = TXPKT_CSUM_TYPE_V(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | + TXPKT_IPHDR_LEN_V(l3hdr_len) | + TXPKT_ETHHDR_LEN_V(eth_xtra_len); q->tso++; q->tx_cso += ssi->gso_segs; } else { @@ -1238,23 +1243,24 @@ out_free: dev_kfree_skb_any(skb); FW_WR_IMMDLEN_V(len)); cpl = (void *)(wr + 1); if (skb->ip_summed == CHECKSUM_PARTIAL) { - cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; + cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS_F; q->tx_cso++; } } if (skb_vlan_tag_present(skb)) { q->vlan_ins++; - cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb)); + cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb)); #ifdef CONFIG_CHELSIO_T4_FCOE if (skb->protocol == htons(ETH_P_FCOE)) - cntrl |= TXPKT_VLAN( + cntrl |= TXPKT_VLAN_V( ((skb->priority & 0x7) << VLAN_PRIO_SHIFT)); #endif /* CONFIG_CHELSIO_T4_FCOE */ } - cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) | - TXPKT_INTF(pi->tx_chan) | TXPKT_PF(adap->fn)); + cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT_XT) | + TXPKT_INTF_V(pi->tx_chan) | + TXPKT_PF_V(adap->fn)); cpl->pack = htons(0); cpl->len = htons(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); @@ -1964,7 +1970,7 @@ static void restore_rx_bufs(const struct pkt_gl *si, struct sge_fl *q, static inline bool is_new_response(const struct rsp_ctrl *r, const struct sge_rspq *q) { - return RSPD_GEN(r->type_gen) == q->gen; + return (r->type_gen >> RSPD_GEN_S) == q->gen; } /** @@ -2011,19 +2017,19 @@ static int process_responses(struct sge_rspq *q, int budget) break; dma_rmb(); - rsp_type = RSPD_TYPE(rc->type_gen); - if (likely(rsp_type == RSP_TYPE_FLBUF)) { + rsp_type = RSPD_TYPE_G(rc->type_gen); + if (likely(rsp_type == RSPD_TYPE_FLBUF_X)) { struct page_frag *fp; struct pkt_gl si; const struct rx_sw_desc *rsd; u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags; - if (len & RSPD_NEWBUF) { + if (len & RSPD_NEWBUF_F) { if (likely(q->offset > 0)) { free_rx_bufs(q->adap, &rxq->fl, 1); q->offset = 0; } - len = RSPD_LEN(len); + len = RSPD_LEN_G(len); } si.tot_len = len; @@ -2058,7 +2064,7 @@ static int process_responses(struct sge_rspq *q, int budget) q->offset += ALIGN(fp->size, s->fl_align); else restore_rx_bufs(&si, &rxq->fl, frags); - } else if (likely(rsp_type == RSP_TYPE_CPL)) { + } else if (likely(rsp_type == RSPD_TYPE_CPL_X)) { ret = q->handler(q, q->cur_desc, NULL); } else { ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN); @@ -2066,7 +2072,7 @@ static int process_responses(struct sge_rspq *q, int budget) if (unlikely(ret)) { /* couldn't process descriptor, back off for recovery */ - q->next_intr_params = QINTR_TIMER_IDX(NOMEM_TMR_IDX); + q->next_intr_params = QINTR_TIMER_IDX_V(NOMEM_TMR_IDX); break; } @@ -2090,7 +2096,7 @@ int cxgb_busy_poll(struct napi_struct *napi) return LL_FLUSH_BUSY; work_done = process_responses(q, 4); - params = QINTR_TIMER_IDX(TIMERREG_COUNTER0_X) | QINTR_CNT_EN; + params = QINTR_TIMER_IDX_V(TIMERREG_COUNTER0_X) | QINTR_CNT_EN_V(1); q->next_intr_params = params; val = CIDXINC_V(work_done) | SEINTARM_V(params); @@ -2137,7 +2143,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) int timer_index; napi_complete(napi); - timer_index = QINTR_TIMER_IDX_GET(q->next_intr_params); + timer_index = QINTR_TIMER_IDX_G(q->next_intr_params); if (q->adaptive_rx) { if (work_done > max(timer_pkt_quota[timer_index], @@ -2147,15 +2153,16 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) timer_index = timer_index - 1; timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1); - q->next_intr_params = QINTR_TIMER_IDX(timer_index) | - V_QINTR_CNT_EN; + q->next_intr_params = + QINTR_TIMER_IDX_V(timer_index) | + QINTR_CNT_EN_V(0); params = q->next_intr_params; } else { params = q->next_intr_params; q->next_intr_params = q->intr_params; } } else - params = QINTR_TIMER_IDX(7); + params = QINTR_TIMER_IDX_V(7); val = CIDXINC_V(work_done) | SEINTARM_V(params); @@ -2203,7 +2210,7 @@ static unsigned int process_intrq(struct adapter *adap) break; dma_rmb(); - if (RSPD_TYPE(rc->type_gen) == RSP_TYPE_INTR) { + if (RSPD_TYPE_G(rc->type_gen) == RSPD_TYPE_INTR_X) { unsigned int qid = ntohl(rc->pldbuflen_qid); qid -= adap->sge.ingr_start; @@ -2279,7 +2286,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap) static void sge_rx_timer_cb(unsigned long data) { unsigned long m; - unsigned int i, idma_same_state_cnt[2]; + unsigned int i; struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; @@ -2300,67 +2307,16 @@ static void sge_rx_timer_cb(unsigned long data) set_bit(id, s->starving_fl); } } + /* The remainder of the SGE RX Timer Callback routine is dedicated to + * global Master PF activities like checking for chip ingress stalls, + * etc. + */ + if (!(adap->flags & MASTER_PF)) + goto done; - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13); - idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A); - idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - - for (i = 0; i < 2; i++) { - u32 debug0, debug11; - - /* If the Ingress DMA Same State Counter ("timer") is less - * than 1s, then we can reset our synthesized Stall Timer and - * continue. If we have previously emitted warnings about a - * potential stalled Ingress Queue, issue a note indicating - * that the Ingress Queue has resumed forward progress. - */ - if (idma_same_state_cnt[i] < s->idma_1s_thresh) { - if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH) - CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n", - i, s->idma_qid[i], - s->idma_stalled[i]/HZ); - s->idma_stalled[i] = 0; - continue; - } - - /* Synthesize an SGE Ingress DMA Same State Timer in the Hz - * domain. The first time we get here it'll be because we - * passed the 1s Threshold; each additional time it'll be - * because the RX Timer Callback is being fired on its regular - * schedule. - * - * If the stall is below our Potential Hung Ingress Queue - * Warning Threshold, continue. - */ - if (s->idma_stalled[i] == 0) - s->idma_stalled[i] = HZ; - else - s->idma_stalled[i] += RX_QCHECK_PERIOD; - - if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH) - continue; - - /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */ - if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0) - continue; - - /* Read and save the SGE IDMA State and Queue ID information. - * We do this every time in case it changes across time ... - */ - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0); - debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; - - t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11); - debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A); - s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; - - CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n", - i, s->idma_qid[i], s->idma_state[i], - s->idma_stalled[i]/HZ, debug0, debug11); - t4_sge_decode_idma_state(adap, s->idma_state[i]); - } + t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD); +done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } @@ -2437,9 +2393,12 @@ static void __iomem *bar2_address(struct adapter *adapter, return adapter->bar2 + bar2_qoffset; } +/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0 + * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map + */ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct net_device *dev, int intr_idx, - struct sge_fl *fl, rspq_handler_t hnd) + struct sge_fl *fl, rspq_handler_t hnd, int cong) { int ret, flsz = 0; struct fw_iq_cmd c; @@ -2462,7 +2421,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, FW_LEN16(c)); c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE_V(FW_IQ_TYPE_FL_INT_CAP) | FW_IQ_CMD_IQASYNCH_V(fwevtq) | FW_IQ_CMD_VIID_V(pi->viid) | - FW_IQ_CMD_IQANDST_V(intr_idx < 0) | FW_IQ_CMD_IQANUD_V(1) | + FW_IQ_CMD_IQANDST_V(intr_idx < 0) | + FW_IQ_CMD_IQANUD_V(UPDATEDELIVERY_INTERRUPT_X) | FW_IQ_CMD_IQANDSTINDEX_V(intr_idx >= 0 ? intr_idx : -intr_idx - 1)); c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH_V(pi->tx_chan) | @@ -2471,8 +2431,19 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4)); c.iqsize = htons(iq->size); c.iqaddr = cpu_to_be64(iq->phys_addr); + if (cong >= 0) + c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F); if (fl) { + /* Allocate the ring for the hardware free list (with space + * for its status page) along with the associated software + * descriptor ring. The free list size needs to be a multiple + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). + */ + if (fl->size < s->fl_starve_thres - 1 + 2 * 8) + fl->size = s->fl_starve_thres - 1 + 2 * 8; fl->size = roundup(fl->size, 8); fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), &fl->addr, @@ -2481,12 +2452,18 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto fl_nomem; flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); - c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F | - FW_IQ_CMD_FL0FETCHRO_F | - FW_IQ_CMD_FL0DATARO_F | - FW_IQ_CMD_FL0PADEN_F); - c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) | - FW_IQ_CMD_FL0FBMAX_V(3)); + c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_F | + FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0PADEN_F); + if (cong >= 0) + c.iqns_to_fl0congen |= + htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) | + FW_IQ_CMD_FL0CONGCIF_F | + FW_IQ_CMD_FL0CONGEN_F); + c.fl0dcaen_to_fl0cidxfthresh = + htons(FW_IQ_CMD_FL0FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_IQ_CMD_FL0FBMAX_V(FETCHBURSTMAX_512B_X)); c.fl0size = htons(flsz); c.fl0addr = cpu_to_be64(fl->addr); } @@ -2532,6 +2509,41 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, &fl->bar2_qid); refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL); } + + /* For T5 and later we attempt to set up the Congestion Manager values + * of the new RX Ethernet Queue. This should really be handled by + * firmware because it's more complex than any host driver wants to + * get involved with and it's different per chip and this is almost + * certainly wrong. Firmware would be wrong as well, but it would be + * a lot easier to fix in one place ... For now we do something very + * simple (and hopefully less wrong). + */ + if (!is_t4(adap->params.chip) && cong >= 0) { + u32 param, val; + int i; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | + FW_PARAMS_PARAM_YZ_V(iq->cntxt_id)); + if (cong == 0) { + val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X); + } else { + val = + CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X); + for (i = 0; i < 4; i++) { + if (cong & (1 << i)) + val |= + CONMCTXT_CNGCHMAP_V(1 << (i << 2)); + } + } + ret = t4_set_params(adap, adap->mbox, adap->fn, 0, 1, + ¶m, &val); + if (ret) + dev_warn(adap->pdev_dev, "Failed to set Congestion" + " Manager Context for Ingress Queue %d: %d\n", + iq->cntxt_id, -ret); + } + return 0; fl_nomem: @@ -2595,14 +2607,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c)); c.viid_pkd = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | FW_EQ_ETH_CMD_VIID_V(pi->viid)); - c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(2) | - FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | - FW_EQ_ETH_CMD_FETCHRO_V(1) | - FW_EQ_ETH_CMD_IQID_V(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN_V(2) | - FW_EQ_ETH_CMD_FBMAX_V(3) | - FW_EQ_ETH_CMD_CIDXFTHRESH_V(5) | - FW_EQ_ETH_CMD_EQSIZE_V(nentries)); + c.fetchszm_to_iqid = + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_ETH_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -2637,7 +2650,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, txq->q.desc = alloc_ring(adap->pdev_dev, nentries, sizeof(struct tx_desc), 0, &txq->q.phys_addr, - NULL, 0, NUMA_NO_NODE); + NULL, 0, dev_to_node(adap->pdev_dev)); if (!txq->q.desc) return -ENOMEM; @@ -2649,14 +2662,15 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, FW_EQ_CTRL_CMD_EQSTART_F | FW_LEN16(c)); c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID_V(cmplqid)); c.physeqid_pkd = htonl(0); - c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(2) | - FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | - FW_EQ_CTRL_CMD_FETCHRO_F | - FW_EQ_CTRL_CMD_IQID_V(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN_V(2) | - FW_EQ_CTRL_CMD_FBMAX_V(3) | - FW_EQ_CTRL_CMD_CIDXFTHRESH_V(5) | - FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); + c.fetchszm_to_iqid = + htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_CTRL_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -2701,14 +2715,15 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, FW_EQ_OFLD_CMD_VFN_V(0)); c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC_F | FW_EQ_OFLD_CMD_EQSTART_F | FW_LEN16(c)); - c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(2) | - FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | - FW_EQ_OFLD_CMD_FETCHRO_F | - FW_EQ_OFLD_CMD_IQID_V(iqid)); - c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN_V(2) | - FW_EQ_OFLD_CMD_FBMAX_V(3) | - FW_EQ_OFLD_CMD_CIDXFTHRESH_V(5) | - FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); + c.fetchszm_to_iqid = + htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) | + FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid)); + c.dcaen_to_eqsize = + htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X) | + FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | + FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_OFLD_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); ret = t4_wr_mbox(adap, adap->fn, &c, sizeof(c), &c); @@ -3023,7 +3038,11 @@ int t4_sge_init(struct adapter *adap) * Packing Boundary. T5 introduced the ability to specify these * separately. The actual Ingress Packet Data alignment boundary * within Packed Buffer Mode is the maximum of these two - * specifications. + * specifications. (Note that it makes no real practical sense to + * have the Pading Boudary be larger than the Packing Boundary but you + * could set the chip up that way and, in fact, legacy T4 code would + * end doing this because it would initialize the Padding Boundary and + * leave the Packing Boundary initialized to 0 (16 bytes).) */ ingpadboundary = 1 << (INGPADBOUNDARY_G(sge_control) + INGPADBOUNDARY_SHIFT_X); @@ -3067,11 +3086,14 @@ int t4_sge_init(struct adapter *adap) egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl); s->fl_starve_thres = 2*egress_threshold + 1; + t4_idma_monitor_init(adap, &s->idma_monitor); + + /* Set up timers used for recuring callbacks to process RX and TX + * administrative tasks. + */ setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); - s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */ - s->idma_stalled[0] = 0; - s->idma_stalled[1] = 0; + spin_lock_init(&s->intrq_lock); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e8578a742f2a..c626252e51d4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3014,6 +3014,31 @@ int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, return t4_wr_mbox(adapter, mbox, &c, sizeof(c), NULL); } +/** + * t4_config_vi_rss - configure per VI RSS settings + * @adapter: the adapter + * @mbox: mbox to use for the FW command + * @viid: the VI id + * @flags: RSS flags + * @defq: id of the default RSS queue for the VI. + * + * Configures VI-specific RSS properties. + */ +int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid, + unsigned int flags, unsigned int defq) +{ + struct fw_rss_vi_config_cmd c; + + memset(&c, 0, sizeof(c)); + c.op_to_viid = cpu_to_be32(FW_CMD_OP_V(FW_RSS_VI_CONFIG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_WRITE_F | + FW_RSS_VI_CONFIG_CMD_VIID_V(viid)); + c.retval_len16 = cpu_to_be32(FW_LEN16(c)); + c.u.basicvirtual.defaultq_to_udpen = cpu_to_be32(flags | + FW_RSS_VI_CONFIG_CMD_DEFAULTQ_V(defq)); + return t4_wr_mbox(adapter, mbox, &c, sizeof(c), NULL); +} + /* Read an RSS table row */ static int rd_rss_row(struct adapter *adap, int row, u32 *val) { @@ -3401,7 +3426,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) } /** - * get_mps_bg_map - return the buffer groups associated with a port + * t4_get_mps_bg_map - return the buffer groups associated with a port * @adap: the adapter * @idx: the port index * @@ -3409,7 +3434,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]) * with the given port. Bit i is set if buffer group i is used by the * port. */ -static unsigned int get_mps_bg_map(struct adapter *adap, int idx) +unsigned int t4_get_mps_bg_map(struct adapter *adap, int idx) { u32 n = NUMPORTS_G(t4_read_reg(adap, MPS_CMN_CTL_A)); @@ -3460,7 +3485,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type) */ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) { - u32 bgmap = get_mps_bg_map(adap, idx); + u32 bgmap = t4_get_mps_bg_map(adap, idx); #define GET_STAT(name) \ t4_read_reg64(adap, \ @@ -5373,6 +5398,28 @@ int t4_filter_field_shift(const struct adapter *adap, int filter_sel) return field_shift; } +int t4_init_rss_mode(struct adapter *adap, int mbox) +{ + int i, ret; + struct fw_rss_vi_config_cmd rvc; + + memset(&rvc, 0, sizeof(rvc)); + + for_each_port(adap, i) { + struct port_info *p = adap2pinfo(adap, i); + + rvc.op_to_viid = htonl(FW_CMD_OP_V(FW_RSS_VI_CONFIG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_RSS_VI_CONFIG_CMD_VIID_V(p->viid)); + rvc.retval_len16 = htonl(FW_LEN16(rvc)); + ret = t4_wr_mbox(adap, mbox, &rvc, sizeof(rvc), &rvc); + if (ret) + return ret; + p->rss_mode = ntohl(rvc.u.basicvirtual.defaultq_to_udpen); + } + return 0; +} + int t4_port_init(struct adapter *adap, int mbox, int pf, int vf) { u8 addr[6]; @@ -5717,3 +5764,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr) t4_write_reg(adap, TP_DBG_LA_CONFIG_A, cfg | adap->params.tp.la_mask); } + +/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in + * seconds). If we find one of the SGE Ingress DMA State Machines in the same + * state for more than the Warning Threshold then we'll issue a warning about + * a potential hang. We'll repeat the warning as the SGE Ingress DMA Channel + * appears to be hung every Warning Repeat second till the situation clears. + * If the situation clears, we'll note that as well. + */ +#define SGE_IDMA_WARN_THRESH 1 +#define SGE_IDMA_WARN_REPEAT 300 + +/** + * t4_idma_monitor_init - initialize SGE Ingress DMA Monitor + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * + * Initialize the state of an SGE Ingress DMA Monitor. + */ +void t4_idma_monitor_init(struct adapter *adapter, + struct sge_idma_monitor_state *idma) +{ + /* Initialize the state variables for detecting an SGE Ingress DMA + * hang. The SGE has internal counters which count up on each clock + * tick whenever the SGE finds its Ingress DMA State Engines in the + * same state they were on the previous clock tick. The clock used is + * the Core Clock so we have a limit on the maximum "time" they can + * record; typically a very small number of seconds. For instance, + * with a 600MHz Core Clock, we can only count up to a bit more than + * 7s. So we'll synthesize a larger counter in order to not run the + * risk of having the "timers" overflow and give us the flexibility to + * maintain a Hung SGE State Machine of our own which operates across + * a longer time frame. + */ + idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */ + idma->idma_stalled[0] = 0; + idma->idma_stalled[1] = 0; +} + +/** + * t4_idma_monitor - monitor SGE Ingress DMA state + * @adapter: the adapter + * @idma: the adapter IDMA Monitor state + * @hz: number of ticks/second + * @ticks: number of ticks since the last IDMA Monitor call + */ +void t4_idma_monitor(struct adapter *adapter, + struct sge_idma_monitor_state *idma, + int hz, int ticks) +{ + int i, idma_same_state_cnt[2]; + + /* Read the SGE Debug Ingress DMA Same State Count registers. These + * are counters inside the SGE which count up on each clock when the + * SGE finds its Ingress DMA State Engines in the same states they + * were in the previous clock. The counters will peg out at + * 0xffffffff without wrapping around so once they pass the 1s + * threshold they'll stay above that till the IDMA state changes. + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13); + idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A); + idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + + for (i = 0; i < 2; i++) { + u32 debug0, debug11; + + /* If the Ingress DMA Same State Counter ("timer") is less + * than 1s, then we can reset our synthesized Stall Timer and + * continue. If we have previously emitted warnings about a + * potential stalled Ingress Queue, issue a note indicating + * that the Ingress Queue has resumed forward progress. + */ + if (idma_same_state_cnt[i] < idma->idma_1s_thresh) { + if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz) + dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, " + "resumed after %d seconds\n", + i, idma->idma_qid[i], + idma->idma_stalled[i] / hz); + idma->idma_stalled[i] = 0; + continue; + } + + /* Synthesize an SGE Ingress DMA Same State Timer in the Hz + * domain. The first time we get here it'll be because we + * passed the 1s Threshold; each additional time it'll be + * because the RX Timer Callback is being fired on its regular + * schedule. + * + * If the stall is below our Potential Hung Ingress Queue + * Warning Threshold, continue. + */ + if (idma->idma_stalled[i] == 0) { + idma->idma_stalled[i] = hz; + idma->idma_warn[i] = 0; + } else { + idma->idma_stalled[i] += ticks; + idma->idma_warn[i] -= ticks; + } + + if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz) + continue; + + /* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds. + */ + if (idma->idma_warn[i] > 0) + continue; + idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz; + + /* Read and save the SGE IDMA State and Queue ID information. + * We do this every time in case it changes across time ... + * can't be too careful ... + */ + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0); + debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; + + t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11); + debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A); + idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; + + dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in " + "state %u for %d seconds (debug0=%#x, debug11=%#x)\n", + i, idma->idma_qid[i], idma->idma_state[i], + idma->idma_stalled[i] / hz, + debug0, debug11); + t4_sge_decode_idma_state(adapter, idma->idma_state[i]); + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 380b15c0417a..88067d90121c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -152,17 +152,33 @@ struct rsp_ctrl { }; }; -#define RSPD_NEWBUF 0x80000000U -#define RSPD_LEN(x) (((x) >> 0) & 0x7fffffffU) -#define RSPD_QID(x) RSPD_LEN(x) +#define RSPD_NEWBUF_S 31 +#define RSPD_NEWBUF_V(x) ((x) << RSPD_NEWBUF_S) +#define RSPD_NEWBUF_F RSPD_NEWBUF_V(1U) -#define RSPD_GEN(x) ((x) >> 7) -#define RSPD_TYPE(x) (((x) >> 4) & 3) +#define RSPD_LEN_S 0 +#define RSPD_LEN_M 0x7fffffff +#define RSPD_LEN_G(x) (((x) >> RSPD_LEN_S) & RSPD_LEN_M) -#define V_QINTR_CNT_EN 0x0 -#define QINTR_CNT_EN 0x1 -#define QINTR_TIMER_IDX(x) ((x) << 1) -#define QINTR_TIMER_IDX_GET(x) (((x) >> 1) & 0x7) +#define RSPD_QID_S RSPD_LEN_S +#define RSPD_QID_M RSPD_LEN_M +#define RSPD_QID_G(x) RSPD_LEN_G(x) + +#define RSPD_GEN_S 7 + +#define RSPD_TYPE_S 4 +#define RSPD_TYPE_M 0x3 +#define RSPD_TYPE_G(x) (((x) >> RSPD_TYPE_S) & RSPD_TYPE_M) + +/* Rx queue interrupt deferral fields: counter enable and timer index */ +#define QINTR_CNT_EN_S 0 +#define QINTR_CNT_EN_V(x) ((x) << QINTR_CNT_EN_S) +#define QINTR_CNT_EN_F QINTR_CNT_EN_V(1U) + +#define QINTR_TIMER_IDX_S 1 +#define QINTR_TIMER_IDX_M 0x7 +#define QINTR_TIMER_IDX_V(x) ((x) << QINTR_TIMER_IDX_S) +#define QINTR_TIMER_IDX_G(x) (((x) >> QINTR_TIMER_IDX_S) & QINTR_TIMER_IDX_M) /* * Flash layout. diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index 30a2f56e99c2..d90f8a03e378 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -634,26 +634,9 @@ struct cpl_tid_release { struct cpl_tx_pkt_core { __be32 ctrl0; -#define TXPKT_VF(x) ((x) << 0) -#define TXPKT_PF(x) ((x) << 8) -#define TXPKT_VF_VLD (1 << 11) -#define TXPKT_OVLAN_IDX(x) ((x) << 12) -#define TXPKT_INTF(x) ((x) << 16) -#define TXPKT_INS_OVLAN (1 << 21) -#define TXPKT_OPCODE(x) ((x) << 24) __be16 pack; __be16 len; __be64 ctrl1; -#define TXPKT_CSUM_END(x) ((x) << 12) -#define TXPKT_CSUM_START(x) ((x) << 20) -#define TXPKT_IPHDR_LEN(x) ((u64)(x) << 20) -#define TXPKT_CSUM_LOC(x) ((u64)(x) << 30) -#define TXPKT_ETHHDR_LEN(x) ((u64)(x) << 34) -#define TXPKT_CSUM_TYPE(x) ((u64)(x) << 40) -#define TXPKT_VLAN(x) ((u64)(x) << 44) -#define TXPKT_VLAN_VLD (1ULL << 60) -#define TXPKT_IPCSUM_DIS (1ULL << 62) -#define TXPKT_L4CSUM_DIS (1ULL << 63) }; struct cpl_tx_pkt { @@ -663,16 +646,66 @@ struct cpl_tx_pkt { #define cpl_tx_pkt_xt cpl_tx_pkt +/* cpl_tx_pkt_core.ctrl0 fields */ +#define TXPKT_VF_S 0 +#define TXPKT_VF_V(x) ((x) << TXPKT_VF_S) + +#define TXPKT_PF_S 8 +#define TXPKT_PF_V(x) ((x) << TXPKT_PF_S) + +#define TXPKT_VF_VLD_S 11 +#define TXPKT_VF_VLD_V(x) ((x) << TXPKT_VF_VLD_S) +#define TXPKT_VF_VLD_F TXPKT_VF_VLD_V(1U) + +#define TXPKT_OVLAN_IDX_S 12 +#define TXPKT_OVLAN_IDX_V(x) ((x) << TXPKT_OVLAN_IDX_S) + +#define TXPKT_INTF_S 16 +#define TXPKT_INTF_V(x) ((x) << TXPKT_INTF_S) + +#define TXPKT_INS_OVLAN_S 21 +#define TXPKT_INS_OVLAN_V(x) ((x) << TXPKT_INS_OVLAN_S) +#define TXPKT_INS_OVLAN_F TXPKT_INS_OVLAN_V(1U) + +#define TXPKT_OPCODE_S 24 +#define TXPKT_OPCODE_V(x) ((x) << TXPKT_OPCODE_S) + +/* cpl_tx_pkt_core.ctrl1 fields */ +#define TXPKT_CSUM_END_S 12 +#define TXPKT_CSUM_END_V(x) ((x) << TXPKT_CSUM_END_S) + +#define TXPKT_CSUM_START_S 20 +#define TXPKT_CSUM_START_V(x) ((x) << TXPKT_CSUM_START_S) + +#define TXPKT_IPHDR_LEN_S 20 +#define TXPKT_IPHDR_LEN_V(x) ((__u64)(x) << TXPKT_IPHDR_LEN_S) + +#define TXPKT_CSUM_LOC_S 30 +#define TXPKT_CSUM_LOC_V(x) ((__u64)(x) << TXPKT_CSUM_LOC_S) + +#define TXPKT_ETHHDR_LEN_S 34 +#define TXPKT_ETHHDR_LEN_V(x) ((__u64)(x) << TXPKT_ETHHDR_LEN_S) + +#define TXPKT_CSUM_TYPE_S 40 +#define TXPKT_CSUM_TYPE_V(x) ((__u64)(x) << TXPKT_CSUM_TYPE_S) + +#define TXPKT_VLAN_S 44 +#define TXPKT_VLAN_V(x) ((__u64)(x) << TXPKT_VLAN_S) + +#define TXPKT_VLAN_VLD_S 60 +#define TXPKT_VLAN_VLD_V(x) ((__u64)(x) << TXPKT_VLAN_VLD_S) +#define TXPKT_VLAN_VLD_F TXPKT_VLAN_VLD_V(1ULL) + +#define TXPKT_IPCSUM_DIS_S 62 +#define TXPKT_IPCSUM_DIS_V(x) ((__u64)(x) << TXPKT_IPCSUM_DIS_S) +#define TXPKT_IPCSUM_DIS_F TXPKT_IPCSUM_DIS_V(1ULL) + +#define TXPKT_L4CSUM_DIS_S 63 +#define TXPKT_L4CSUM_DIS_V(x) ((__u64)(x) << TXPKT_L4CSUM_DIS_S) +#define TXPKT_L4CSUM_DIS_F TXPKT_L4CSUM_DIS_V(1ULL) + struct cpl_tx_pkt_lso_core { __be32 lso_ctrl; -#define LSO_TCPHDR_LEN(x) ((x) << 0) -#define LSO_IPHDR_LEN(x) ((x) << 4) -#define LSO_ETHHDR_LEN(x) ((x) << 16) -#define LSO_IPV6(x) ((x) << 20) -#define LSO_LAST_SLICE (1 << 22) -#define LSO_FIRST_SLICE (1 << 23) -#define LSO_OPCODE(x) ((x) << 24) -#define LSO_T5_XFER_SIZE(x) ((x) << 0) __be16 ipid_ofst; __be16 mss; __be32 seqno_offset; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h index 19b2dcf6acde..72ec1f91d29f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h @@ -61,6 +61,29 @@ #define SGE_TIMERREGS 6 #define TIMERREG_COUNTER0_X 0 +#define FETCHBURSTMIN_64B_X 2 + +#define FETCHBURSTMAX_512B_X 3 + +#define HOSTFCMODE_STATUS_PAGE_X 2 + +#define CIDXFLUSHTHRESH_32_X 5 + +#define UPDATEDELIVERY_INTERRUPT_X 1 + +#define RSPD_TYPE_FLBUF_X 0 +#define RSPD_TYPE_CPL_X 1 +#define RSPD_TYPE_INTR_X 2 + +/* Congestion Manager Definitions. + */ +#define CONMCTXT_CNGTPMODE_S 19 +#define CONMCTXT_CNGTPMODE_V(x) ((x) << CONMCTXT_CNGTPMODE_S) +#define CONMCTXT_CNGCHMAP_S 0 +#define CONMCTXT_CNGCHMAP_V(x) ((x) << CONMCTXT_CNGCHMAP_S) +#define CONMCTXT_CNGTPMODE_CHANNEL_X 2 +#define CONMCTXT_CNGTPMODE_QUEUE_X 1 + /* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues. * The User Doorbells are each 128 bytes in length with a Simple Doorbell at * offsets 8x and a Write Combining single 64-byte Egress Queue Unit diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 03fbfd1fb3df..16c6d67370ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1123,6 +1123,7 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11, FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12, FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13, + FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20, }; enum fw_params_param_dev_diag { @@ -1377,6 +1378,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_IQFLINTCONGEN_S 27 #define FW_IQ_CMD_IQFLINTCONGEN_V(x) ((x) << FW_IQ_CMD_IQFLINTCONGEN_S) +#define FW_IQ_CMD_IQFLINTCONGEN_F FW_IQ_CMD_IQFLINTCONGEN_V(1U) #define FW_IQ_CMD_IQFLINTISCSIC_S 26 #define FW_IQ_CMD_IQFLINTISCSIC_V(x) ((x) << FW_IQ_CMD_IQFLINTISCSIC_S) @@ -1399,6 +1401,7 @@ struct fw_iq_cmd { #define FW_IQ_CMD_FL0CONGCIF_S 11 #define FW_IQ_CMD_FL0CONGCIF_V(x) ((x) << FW_IQ_CMD_FL0CONGCIF_S) +#define FW_IQ_CMD_FL0CONGCIF_F FW_IQ_CMD_FL0CONGCIF_V(1U) #define FW_IQ_CMD_FL0ONCHIP_S 10 #define FW_IQ_CMD_FL0ONCHIP_V(x) ((x) << FW_IQ_CMD_FL0ONCHIP_S) @@ -1589,6 +1592,7 @@ struct fw_eq_eth_cmd { #define FW_EQ_ETH_CMD_FETCHRO_S 22 #define FW_EQ_ETH_CMD_FETCHRO_V(x) ((x) << FW_EQ_ETH_CMD_FETCHRO_S) +#define FW_EQ_ETH_CMD_FETCHRO_F FW_EQ_ETH_CMD_FETCHRO_V(1U) #define FW_EQ_ETH_CMD_HOSTFCMODE_S 20 #define FW_EQ_ETH_CMD_HOSTFCMODE_V(x) ((x) << FW_EQ_ETH_CMD_HOSTFCMODE_S) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 1d893b0b7ddf..b2b5e5bbe04c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1021,7 +1021,7 @@ static int closest_thres(const struct sge *s, int thres) static unsigned int qtimer_val(const struct adapter *adapter, const struct sge_rspq *rspq) { - unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params); + unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params); return timer_idx < SGE_NTIMERS ? adapter->sge.timer_val[timer_idx] @@ -1086,8 +1086,8 @@ static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq, * Update the response queue's interrupt coalescing parameters and * return success. */ - rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | - (cnt > 0 ? QINTR_CNT_EN : 0)); + rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) | + QINTR_CNT_EN_V(cnt > 0)); return 0; } @@ -1439,7 +1439,7 @@ static int cxgb4vf_get_coalesce(struct net_device *dev, coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq); coalesce->rx_max_coalesced_frames = - ((rspq->intr_params & QINTR_CNT_EN) + ((rspq->intr_params & QINTR_CNT_EN_F) ? adapter->sge.counter_val[rspq->pktcnt_idx] : 0); return 0; @@ -2393,8 +2393,9 @@ static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx, u8 pkt_cnt_idx, unsigned int size, unsigned int iqe_size) { - rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | - (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0)); + rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) | + (pkt_cnt_idx < SGE_NCOUNTERS ? + QINTR_CNT_EN_F : 0)); rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 482f6de6817d..2e41d1541d73 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1100,7 +1100,7 @@ nocsum: * unknown protocol, disable HW csum * and hope a bad packet is detected */ - return TXPKT_L4CSUM_DIS; + return TXPKT_L4CSUM_DIS_F; } } else { /* @@ -1117,15 +1117,15 @@ nocsum: } if (likely(csum_type >= TX_CSUM_TCPIP)) - return TXPKT_CSUM_TYPE(csum_type) | - TXPKT_IPHDR_LEN(skb_network_header_len(skb)) | - TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_IPHDR_LEN_V(skb_network_header_len(skb)) | + TXPKT_ETHHDR_LEN_V(skb_network_offset(skb) - ETH_HLEN); else { int start = skb_transport_offset(skb); - return TXPKT_CSUM_TYPE(csum_type) | - TXPKT_CSUM_START(start) | - TXPKT_CSUM_LOC(start + skb->csum_offset); + return TXPKT_CSUM_TYPE_V(csum_type) | + TXPKT_CSUM_START_V(start) | + TXPKT_CSUM_LOC_V(start + skb->csum_offset); } } @@ -1160,7 +1160,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) { u32 wr_mid; u64 cntrl, *end; - int qidx, credits; + int qidx, credits, max_pkt_len; unsigned int flits, ndesc; struct adapter *adapter; struct sge_eth_txq *txq; @@ -1183,6 +1183,13 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->len < fw_hdr_copy_len)) goto out_free; + /* Discard the packet if the length is greater than mtu */ + max_pkt_len = ETH_HLEN + dev->mtu; + if (skb_vlan_tag_present(skb)) + max_pkt_len += VLAN_HLEN; + if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len))) + goto out_free; + /* * Figure out which TX Queue we're going to use. */ @@ -1281,29 +1288,30 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) * Fill in the LSO CPL message. */ lso->lso_ctrl = - cpu_to_be32(LSO_OPCODE(CPL_TX_PKT_LSO) | - LSO_FIRST_SLICE | - LSO_LAST_SLICE | - LSO_IPV6(v6) | - LSO_ETHHDR_LEN(eth_xtra_len/4) | - LSO_IPHDR_LEN(l3hdr_len/4) | - LSO_TCPHDR_LEN(tcp_hdr(skb)->doff)); + cpu_to_be32(LSO_OPCODE_V(CPL_TX_PKT_LSO) | + LSO_FIRST_SLICE_F | + LSO_LAST_SLICE_F | + LSO_IPV6_V(v6) | + LSO_ETHHDR_LEN_V(eth_xtra_len / 4) | + LSO_IPHDR_LEN_V(l3hdr_len / 4) | + LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff)); lso->ipid_ofst = cpu_to_be16(0); lso->mss = cpu_to_be16(ssi->gso_size); lso->seqno_offset = cpu_to_be32(0); if (is_t4(adapter->params.chip)) lso->len = cpu_to_be32(skb->len); else - lso->len = cpu_to_be32(LSO_T5_XFER_SIZE(skb->len)); + lso->len = cpu_to_be32(LSO_T5_XFER_SIZE_V(skb->len)); /* * Set up TX Packet CPL pointer, control word and perform * accounting. */ cpl = (void *)(lso + 1); - cntrl = (TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | - TXPKT_IPHDR_LEN(l3hdr_len) | - TXPKT_ETHHDR_LEN(eth_xtra_len)); + cntrl = (TXPKT_CSUM_TYPE_V(v6 ? + TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) | + TXPKT_IPHDR_LEN_V(l3hdr_len) | + TXPKT_ETHHDR_LEN_V(eth_xtra_len)); txq->tso++; txq->tx_cso += ssi->gso_segs; } else { @@ -1320,10 +1328,10 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) */ cpl = (void *)(wr + 1); if (skb->ip_summed == CHECKSUM_PARTIAL) { - cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS; + cntrl = hwcsum(skb) | TXPKT_IPCSUM_DIS_F; txq->tx_cso++; } else - cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS; + cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; } /* @@ -1332,15 +1340,15 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) */ if (skb_vlan_tag_present(skb)) { txq->vlan_ins++; - cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb)); + cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb)); } /* * Fill in the TX Packet CPL message header. */ - cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE(CPL_TX_PKT_XT) | - TXPKT_INTF(pi->port_id) | - TXPKT_PF(0)); + cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) | + TXPKT_INTF_V(pi->port_id) | + TXPKT_PF_V(0)); cpl->pack = cpu_to_be16(0); cpl->len = cpu_to_be16(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); @@ -1663,7 +1671,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, static inline bool is_new_response(const struct rsp_ctrl *rc, const struct sge_rspq *rspq) { - return RSPD_GEN(rc->type_gen) == rspq->gen; + return ((rc->type_gen >> RSPD_GEN_S) & 0x1) == rspq->gen; } /** @@ -1752,8 +1760,8 @@ static int process_responses(struct sge_rspq *rspq, int budget) * SGE. */ dma_rmb(); - rsp_type = RSPD_TYPE(rc->type_gen); - if (likely(rsp_type == RSP_TYPE_FLBUF)) { + rsp_type = RSPD_TYPE_G(rc->type_gen); + if (likely(rsp_type == RSPD_TYPE_FLBUF_X)) { struct page_frag *fp; struct pkt_gl gl; const struct rx_sw_desc *sdesc; @@ -1764,7 +1772,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) * If we get a "new buffer" message from the SGE we * need to move on to the next Free List buffer. */ - if (len & RSPD_NEWBUF) { + if (len & RSPD_NEWBUF_F) { /* * We get one "new buffer" message when we * first start up a queue so we need to ignore @@ -1775,7 +1783,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) 1); rspq->offset = 0; } - len = RSPD_LEN(len); + len = RSPD_LEN_G(len); } gl.tot_len = len; @@ -1818,10 +1826,10 @@ static int process_responses(struct sge_rspq *rspq, int budget) rspq->offset += ALIGN(fp->size, s->fl_align); else restore_rx_bufs(&gl, &rxq->fl, frag); - } else if (likely(rsp_type == RSP_TYPE_CPL)) { + } else if (likely(rsp_type == RSPD_TYPE_CPL_X)) { ret = rspq->handler(rspq, rspq->cur_desc, NULL); } else { - WARN_ON(rsp_type > RSP_TYPE_CPL); + WARN_ON(rsp_type > RSPD_TYPE_CPL_X); ret = 0; } @@ -1833,7 +1841,7 @@ static int process_responses(struct sge_rspq *rspq, int budget) */ const int NOMEM_TIMER_IDX = SGE_NTIMERS-1; rspq->next_intr_params = - QINTR_TIMER_IDX(NOMEM_TIMER_IDX); + QINTR_TIMER_IDX_V(NOMEM_TIMER_IDX); break; } @@ -1875,7 +1883,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) intr_params = rspq->next_intr_params; rspq->next_intr_params = rspq->intr_params; } else - intr_params = QINTR_TIMER_IDX(SGE_TIMER_UPD_CIDX); + intr_params = QINTR_TIMER_IDX_V(SGE_TIMER_UPD_CIDX); if (unlikely(work_done == 0)) rspq->unhandled_irqs++; @@ -1936,10 +1944,10 @@ static unsigned int process_intrq(struct adapter *adapter) * never happen ... */ dma_rmb(); - if (unlikely(RSPD_TYPE(rc->type_gen) != RSP_TYPE_INTR)) { + if (unlikely(RSPD_TYPE_G(rc->type_gen) != RSPD_TYPE_INTR_X)) { dev_err(adapter->pdev_dev, "Unexpected INTRQ response type %d\n", - RSPD_TYPE(rc->type_gen)); + RSPD_TYPE_G(rc->type_gen)); continue; } @@ -1951,7 +1959,7 @@ static unsigned int process_intrq(struct adapter *adapter) * want to either make them fatal and/or conditionalized under * DEBUG. */ - qid = RSPD_QID(be32_to_cpu(rc->pldbuflen_qid)); + qid = RSPD_QID_G(be32_to_cpu(rc->pldbuflen_qid)); iq_idx = IQ_IDX(s, qid); if (unlikely(iq_idx >= MAX_INGQ)) { dev_err(adapter->pdev_dev, @@ -2243,8 +2251,12 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, * Allocate the ring for the hardware free list (with space * for its status page) along with the associated software * descriptor ring. The free list size needs to be a multiple - * of the Egress Queue Unit. + * of the Egress Queue Unit and at least 2 Egress Units larger + * than the SGE's Egress Congrestion Threshold + * (fl_starve_thres - 1). */ + if (fl->size < s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT) + fl->size = s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT; fl->size = roundup(fl->size, FL_PER_EQ_UNIT); fl->desc = alloc_ring(adapter->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 1bf1cdce74ac..1eafc075edae 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -35,7 +35,7 @@ #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "10.6.0.1" +#define DRV_VER "10.6.0.2" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" @@ -366,6 +366,7 @@ struct be_vf_cfg { u32 tx_rate; u32 plink_tracking; u32 privileges; + bool spoofchk; }; enum vf_state { @@ -804,6 +805,7 @@ bool be_pause_supported(struct be_adapter *adapter); u32 be_get_fw_log_level(struct be_adapter *adapter); int be_update_queues(struct be_adapter *adapter); int be_poll(struct napi_struct *napi, int budget); +void be_eqd_update(struct be_adapter *adapter, bool force_update); /* * internal function to initialize-cleanup roce device. diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index fb140faeafb1..b419bde29a6f 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -140,6 +140,7 @@ static bool be_skip_err_log(u8 opcode, u16 base_status, u16 addl_status) if (base_status == MCC_STATUS_NOT_SUPPORTED || base_status == MCC_STATUS_ILLEGAL_REQUEST || addl_status == MCC_ADDL_STATUS_TOO_MANY_INTERFACES || + addl_status == MCC_ADDL_STATUS_INSUFFICIENT_VLANS || (opcode == OPCODE_COMMON_WRITE_FLASHROM && (base_status == MCC_STATUS_ILLEGAL_FIELD || addl_status == MCC_ADDL_STATUS_FLASH_IMAGE_CRC_MISMATCH))) @@ -1457,7 +1458,7 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags, *if_handle = le32_to_cpu(resp->interface_id); /* Hack to retrieve VF's pmac-id on BE3 */ - if (BE3_chip(adapter) && !be_physfn(adapter)) + if (BE3_chip(adapter) && be_virtfn(adapter)) adapter->pmac_id[0] = le32_to_cpu(resp->pmac_id); } return status; @@ -3153,7 +3154,7 @@ int be_cmd_set_mac(struct be_adapter *adapter, u8 *mac, int if_id, u32 dom) } int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, - u32 domain, u16 intf_id, u16 hsw_mode) + u32 domain, u16 intf_id, u16 hsw_mode, u8 spoofchk) { struct be_mcc_wrb *wrb; struct be_cmd_req_set_hsw_config *req; @@ -3189,6 +3190,14 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, ctxt, hsw_mode); } + /* Enable/disable both mac and vlan spoof checking */ + if (!BEx_chip(adapter) && spoofchk) { + AMAP_SET_BITS(struct amap_set_hsw_context, mac_spoofchk, + ctxt, spoofchk); + AMAP_SET_BITS(struct amap_set_hsw_context, vlan_spoofchk, + ctxt, spoofchk); + } + be_dws_cpu_to_le(req->context, sizeof(req->context)); status = be_mcc_notify_wait(adapter); @@ -3199,7 +3208,7 @@ err: /* Get Hyper switch config */ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, - u32 domain, u16 intf_id, u8 *mode) + u32 domain, u16 intf_id, u8 *mode, bool *spoofchk) { struct be_mcc_wrb *wrb; struct be_cmd_req_get_hsw_config *req; @@ -3247,6 +3256,10 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, if (mode) *mode = AMAP_GET_BITS(struct amap_get_hsw_resp_context, port_fwd_type, &resp->context); + if (spoofchk) + *spoofchk = + AMAP_GET_BITS(struct amap_get_hsw_resp_context, + spoofchk, &resp->context); } err: @@ -3258,7 +3271,7 @@ static bool be_is_wol_excluded(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; - if (!be_physfn(adapter)) + if (be_virtfn(adapter)) return true; switch (pdev->subsystem_device) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 1ec22300e254..c713d514fcd1 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -65,7 +65,8 @@ enum mcc_base_status { enum mcc_addl_status { MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES = 0x16, MCC_ADDL_STATUS_FLASH_IMAGE_CRC_MISMATCH = 0x4d, - MCC_ADDL_STATUS_TOO_MANY_INTERFACES = 0x4a + MCC_ADDL_STATUS_TOO_MANY_INTERFACES = 0x4a, + MCC_ADDL_STATUS_INSUFFICIENT_VLANS = 0xab }; #define CQE_BASE_STATUS_MASK 0xFFFF @@ -1109,10 +1110,6 @@ struct be_cmd_req_query_fw_cfg { u32 rsvd[31]; }; -/* ASIC revisions */ -#define ASIC_REV_B0 0x10 -#define ASIC_REV_P2 0x11 - struct be_cmd_resp_query_fw_cfg { struct be_cmd_resp_hdr hdr; u32 be_config_number; @@ -1745,18 +1742,24 @@ struct be_cmd_req_set_mac_list { #define PORT_FWD_TYPE_VEPA 0x3 #define PORT_FWD_TYPE_VEB 0x2 +#define ENABLE_MAC_SPOOFCHK 0x2 +#define DISABLE_MAC_SPOOFCHK 0x3 + struct amap_set_hsw_context { u8 interface_id[16]; - u8 rsvd0[14]; + u8 rsvd0[8]; + u8 mac_spoofchk[2]; + u8 rsvd1[4]; u8 pvid_valid; u8 pport; - u8 rsvd1[6]; + u8 rsvd2[6]; u8 port_fwd_type[3]; - u8 rsvd2[7]; + u8 rsvd3[5]; + u8 vlan_spoofchk[2]; u8 pvid[16]; - u8 rsvd3[32]; u8 rsvd4[32]; u8 rsvd5[32]; + u8 rsvd6[32]; } __packed; struct be_cmd_req_set_hsw_config { @@ -1774,11 +1777,13 @@ struct amap_get_hsw_req_context { struct amap_get_hsw_resp_context { u8 rsvd0[6]; u8 port_fwd_type[3]; - u8 rsvd1[7]; + u8 rsvd1[5]; + u8 spoofchk; + u8 rsvd2; u8 pvid[16]; - u8 rsvd2[32]; u8 rsvd3[32]; u8 rsvd4[32]; + u8 rsvd5[32]; } __packed; struct be_cmd_req_get_hsw_config { @@ -2334,9 +2339,9 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, u8 mac_count, u32 domain); int be_cmd_set_mac(struct be_adapter *adapter, u8 *mac, int if_id, u32 dom); int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, u32 domain, - u16 intf_id, u16 hsw_mode); + u16 intf_id, u16 hsw_mode, u8 spoofchk); int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, u32 domain, - u16 intf_id, u8 *mode); + u16 intf_id, u8 *mode, bool *spoofchk); int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter); int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level); int be_cmd_get_fw_log_level(struct be_adapter *adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index b765c24625bf..9124a93eb474 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -368,6 +368,14 @@ static int be_set_coalesce(struct net_device *netdev, aic++; } + /* For Skyhawk, the EQD setting happens via EQ_DB when AIC is enabled. + * When AIC is disabled, persistently force set EQD value via the + * FW cmd, so that we don't have to calculate the delay multiplier + * encode value each time EQ_DB is rung + */ + if (!et->use_adaptive_rx_coalesce && skyhawk_chip(adapter)) + be_eqd_update(adapter, true); + return 0; } diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index 48840889db62..c684bb32b487 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -132,6 +132,18 @@ #define DB_EQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */ /* Rearm bit */ #define DB_EQ_REARM_SHIFT (29) /* bit 29 */ +/* Rearm to interrupt delay encoding */ +#define DB_EQ_R2I_DLY_SHIFT (30) /* bits 30 - 31 */ + +/* Rearm to interrupt (R2I) delay multiplier encoding represents 3 different + * values configured in CEV_REARM2IRPT_DLY_MULT_CSR register. This value is + * programmed by host driver while ringing an EQ doorbell(EQ_DB) if a delay + * between rearming the EQ and next interrupt on this EQ is desired. + */ +#define R2I_DLY_ENC_0 0 /* No delay */ +#define R2I_DLY_ENC_1 1 /* maps to 160us EQ delay */ +#define R2I_DLY_ENC_2 2 /* maps to 96us EQ delay */ +#define R2I_DLY_ENC_3 3 /* maps to 48us EQ delay */ /********* Compl Q door bell *************/ #define DB_CQ_OFFSET 0x120 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6dcbf850c1f..f15a3cfeb217 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or @@ -211,7 +211,8 @@ static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo, } static void be_eq_notify(struct be_adapter *adapter, u16 qid, - bool arm, bool clear_int, u16 num_popped) + bool arm, bool clear_int, u16 num_popped, + u32 eq_delay_mult_enc) { u32 val = 0; @@ -227,6 +228,7 @@ static void be_eq_notify(struct be_adapter *adapter, u16 qid, val |= 1 << DB_EQ_CLR_SHIFT; val |= 1 << DB_EQ_EVNT_SHIFT; val |= num_popped << DB_EQ_NUM_POPPED_SHIFT; + val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT; iowrite32(val, adapter->db + DB_EQ_OFFSET); } @@ -662,6 +664,8 @@ void be_link_status_update(struct be_adapter *adapter, u8 link_status) netif_carrier_on(netdev); else netif_carrier_off(netdev); + + netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down"); } static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb) @@ -1265,7 +1269,8 @@ static int be_vid_config(struct be_adapter *adapter) if (status) { dev_err(dev, "Setting HW VLAN filtering failed\n"); /* Set to VLAN promisc mode as setting VLAN filter failed */ - if (addl_status(status) == + if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS || + addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES) return be_set_vlan_promisc(adapter); } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) { @@ -1466,6 +1471,7 @@ static int be_get_vf_config(struct net_device *netdev, int vf, vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT; memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN); vi->linkstate = adapter->vf_cfg[vf].plink_tracking; + vi->spoofchk = adapter->vf_cfg[vf].spoofchk; return 0; } @@ -1478,7 +1484,7 @@ static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan) int status; /* Enable Transparent VLAN Tagging */ - status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0); + status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0); if (status) return status; @@ -1507,7 +1513,7 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) /* Reset Transparent VLAN Tagging. */ status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1, - vf_cfg->if_handle, 0); + vf_cfg->if_handle, 0, 0); if (status) return status; @@ -1642,6 +1648,39 @@ static int be_set_vf_link_state(struct net_device *netdev, int vf, return 0; } +static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable) +{ + struct be_adapter *adapter = netdev_priv(netdev); + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + u8 spoofchk; + int status; + + if (!sriov_enabled(adapter)) + return -EPERM; + + if (vf >= adapter->num_vfs) + return -EINVAL; + + if (BEx_chip(adapter)) + return -EOPNOTSUPP; + + if (enable == vf_cfg->spoofchk) + return 0; + + spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK; + + status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle, + 0, spoofchk); + if (status) { + dev_err(&adapter->pdev->dev, + "Spoofchk change on VF %d failed: %#x\n", vf, status); + return be_cmd_status(status); + } + + vf_cfg->spoofchk = enable; + return 0; +} + static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts, ulong now) { @@ -1650,61 +1689,110 @@ static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts, aic->jiffies = now; } -static void be_eqd_update(struct be_adapter *adapter) +static int be_get_new_eqd(struct be_eq_obj *eqo) { - struct be_set_eqd set_eqd[MAX_EVT_QS]; - int eqd, i, num = 0, start; + struct be_adapter *adapter = eqo->adapter; + int eqd, start; struct be_aic_obj *aic; - struct be_eq_obj *eqo; struct be_rx_obj *rxo; struct be_tx_obj *txo; - u64 rx_pkts, tx_pkts; + u64 rx_pkts = 0, tx_pkts = 0; ulong now; u32 pps, delta; + int i; - for_all_evt_queues(adapter, eqo, i) { - aic = &adapter->aic_obj[eqo->idx]; - if (!aic->enable) { - if (aic->jiffies) - aic->jiffies = 0; - eqd = aic->et_eqd; - goto modify_eqd; - } + aic = &adapter->aic_obj[eqo->idx]; + if (!aic->enable) { + if (aic->jiffies) + aic->jiffies = 0; + eqd = aic->et_eqd; + return eqd; + } - rxo = &adapter->rx_obj[eqo->idx]; + for_all_rx_queues_on_eq(adapter, eqo, rxo, i) { do { start = u64_stats_fetch_begin_irq(&rxo->stats.sync); - rx_pkts = rxo->stats.rx_pkts; + rx_pkts += rxo->stats.rx_pkts; } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start)); + } - txo = &adapter->tx_obj[eqo->idx]; + for_all_tx_queues_on_eq(adapter, eqo, txo, i) { do { start = u64_stats_fetch_begin_irq(&txo->stats.sync); - tx_pkts = txo->stats.tx_reqs; + tx_pkts += txo->stats.tx_reqs; } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start)); + } - /* Skip, if wrapped around or first calculation */ - now = jiffies; - if (!aic->jiffies || time_before(now, aic->jiffies) || - rx_pkts < aic->rx_pkts_prev || - tx_pkts < aic->tx_reqs_prev) { - be_aic_update(aic, rx_pkts, tx_pkts, now); - continue; - } + /* Skip, if wrapped around or first calculation */ + now = jiffies; + if (!aic->jiffies || time_before(now, aic->jiffies) || + rx_pkts < aic->rx_pkts_prev || + tx_pkts < aic->tx_reqs_prev) { + be_aic_update(aic, rx_pkts, tx_pkts, now); + return aic->prev_eqd; + } - delta = jiffies_to_msecs(now - aic->jiffies); - pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) + - (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta); - eqd = (pps / 15000) << 2; + delta = jiffies_to_msecs(now - aic->jiffies); + if (delta == 0) + return aic->prev_eqd; - if (eqd < 8) - eqd = 0; - eqd = min_t(u32, eqd, aic->max_eqd); - eqd = max_t(u32, eqd, aic->min_eqd); + pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) + + (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta); + eqd = (pps / 15000) << 2; - be_aic_update(aic, rx_pkts, tx_pkts, now); -modify_eqd: - if (eqd != aic->prev_eqd) { + if (eqd < 8) + eqd = 0; + eqd = min_t(u32, eqd, aic->max_eqd); + eqd = max_t(u32, eqd, aic->min_eqd); + + be_aic_update(aic, rx_pkts, tx_pkts, now); + + return eqd; +} + +/* For Skyhawk-R only */ +static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo) +{ + struct be_adapter *adapter = eqo->adapter; + struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx]; + ulong now = jiffies; + int eqd; + u32 mult_enc; + + if (!aic->enable) + return 0; + + if (time_before_eq(now, aic->jiffies) || + jiffies_to_msecs(now - aic->jiffies) < 1) + eqd = aic->prev_eqd; + else + eqd = be_get_new_eqd(eqo); + + if (eqd > 100) + mult_enc = R2I_DLY_ENC_1; + else if (eqd > 60) + mult_enc = R2I_DLY_ENC_2; + else if (eqd > 20) + mult_enc = R2I_DLY_ENC_3; + else + mult_enc = R2I_DLY_ENC_0; + + aic->prev_eqd = eqd; + + return mult_enc; +} + +void be_eqd_update(struct be_adapter *adapter, bool force_update) +{ + struct be_set_eqd set_eqd[MAX_EVT_QS]; + struct be_aic_obj *aic; + struct be_eq_obj *eqo; + int i, num = 0, eqd; + + for_all_evt_queues(adapter, eqo, i) { + aic = &adapter->aic_obj[eqo->idx]; + eqd = be_get_new_eqd(eqo); + if (force_update || eqd != aic->prev_eqd) { set_eqd[num].delay_multiplier = (eqd * 65)/100; set_eqd[num].eq_id = eqo->q.id; aic->prev_eqd = eqd; @@ -2212,7 +2300,7 @@ static void be_eq_clean(struct be_eq_obj *eqo) { int num = events_get(eqo); - be_eq_notify(eqo->adapter, eqo->q.id, false, true, num); + be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0); } static void be_rx_cq_clean(struct be_rx_obj *rxo) @@ -2573,7 +2661,7 @@ static irqreturn_t be_intx(int irq, void *dev) if (num_evts) eqo->spurious_intr = 0; } - be_eq_notify(adapter, eqo->q.id, false, true, num_evts); + be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0); /* Return IRQ_HANDLED only for the the first spurious intr * after a valid intr to stop the kernel from branding @@ -2589,7 +2677,7 @@ static irqreturn_t be_msix(int irq, void *dev) { struct be_eq_obj *eqo = dev; - be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0); + be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0); napi_schedule(&eqo->napi); return IRQ_HANDLED; } @@ -2838,6 +2926,7 @@ int be_poll(struct napi_struct *napi, int budget) int max_work = 0, work, i, num_evts; struct be_rx_obj *rxo; struct be_tx_obj *txo; + u32 mult_enc = 0; num_evts = events_get(eqo); @@ -2863,10 +2952,18 @@ int be_poll(struct napi_struct *napi, int budget) if (max_work < budget) { napi_complete(napi); - be_eq_notify(adapter, eqo->q.id, true, false, num_evts); + + /* Skyhawk EQ_DB has a provision to set the rearm to interrupt + * delay via a delay multiplier encoding value + */ + if (skyhawk_chip(adapter)) + mult_enc = be_get_eq_delay_mult_enc(eqo); + + be_eq_notify(adapter, eqo->q.id, true, false, num_evts, + mult_enc); } else { /* As we'll continue in polling mode, count and clear events */ - be_eq_notify(adapter, eqo->q.id, false, false, num_evts); + be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0); } return max_work; } @@ -3015,7 +3112,7 @@ fail: dev_warn(dev, "MSIx enable failed\n"); /* INTx is not supported in VFs, so fail probe if enable_msix fails */ - if (!be_physfn(adapter)) + if (be_virtfn(adapter)) return num_vec; return 0; } @@ -3062,7 +3159,7 @@ static int be_irq_register(struct be_adapter *adapter) if (status == 0) goto done; /* INTx is not supported for VF */ - if (!be_physfn(adapter)) + if (be_virtfn(adapter)) return status; } @@ -3229,9 +3326,12 @@ static int be_rx_qs_create(struct be_adapter *adapter) memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN); - /* First time posting */ + /* Post 1 less than RXQ-len to avoid head being equal to tail, + * which is a queue empty condition + */ for_all_rx_queues(adapter, rxo, i) - be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); + be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1); + return 0; } @@ -3263,7 +3363,7 @@ static int be_open(struct net_device *netdev) for_all_evt_queues(adapter, eqo, i) { napi_enable(&eqo->napi); be_enable_busy_poll(eqo); - be_eq_notify(adapter, eqo->q.id, true, true, 0); + be_eq_notify(adapter, eqo->q.id, true, true, 0, 0); } adapter->flags |= BE_FLAGS_NAPI_ENABLED; @@ -3563,7 +3663,7 @@ static int be_vfs_if_create(struct be_adapter *adapter) /* If a FW profile exists, then cap_flags are updated */ cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | - BE_IF_FLAGS_MULTICAST; + BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS; for_all_vfs(adapter, vf_cfg, vf) { if (!BE3_chip(adapter)) { @@ -3610,6 +3710,7 @@ static int be_vf_setup(struct be_adapter *adapter) struct device *dev = &adapter->pdev->dev; struct be_vf_cfg *vf_cfg; int status, old_vfs, vf; + bool spoofchk; old_vfs = pci_num_vf(adapter->pdev); @@ -3657,6 +3758,12 @@ static int be_vf_setup(struct be_adapter *adapter) if (!old_vfs) be_cmd_config_qos(adapter, 0, 0, vf + 1); + status = be_cmd_get_hsw_config(adapter, NULL, vf + 1, + vf_cfg->if_handle, NULL, + &spoofchk); + if (!status) + vf_cfg->spoofchk = spoofchk; + if (!old_vfs) { be_cmd_enable_vf(adapter, vf + 1); be_cmd_set_logical_link_config(adapter, @@ -3733,8 +3840,9 @@ static void BEx_get_resources(struct be_adapter *adapter, * *only* if it is RSS-capable. */ if (BE2_chip(adapter) || use_sriov || (adapter->port_num > 1) || - !be_physfn(adapter) || (be_is_mc(adapter) && - !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) { + be_virtfn(adapter) || + (be_is_mc(adapter) && + !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) { res->max_tx_qs = 1; } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) { struct be_resources super_nic_res = {0}; @@ -4182,7 +4290,7 @@ static void be_netpoll(struct net_device *netdev) int i; for_all_evt_queues(adapter, eqo, i) { - be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0); + be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0); napi_schedule(&eqo->napi); } } @@ -4666,14 +4774,11 @@ static int lancer_fw_download(struct be_adapter *adapter, return 0; } -#define BE2_UFI 2 -#define BE3_UFI 3 -#define BE3R_UFI 10 -#define SH_UFI 4 -#define SH_P2_UFI 11 - -static int be_get_ufi_type(struct be_adapter *adapter, - struct flash_file_hdr_g3 *fhdr) +/* Check if the flash image file is compatible with the adapter that + * is being flashed. + */ +static bool be_check_ufi_compatibility(struct be_adapter *adapter, + struct flash_file_hdr_g3 *fhdr) { if (!fhdr) { dev_err(&adapter->pdev->dev, "Invalid FW UFI file"); @@ -4685,43 +4790,22 @@ static int be_get_ufi_type(struct be_adapter *adapter, */ switch (fhdr->build[0]) { case BLD_STR_UFI_TYPE_SH: - return (fhdr->asic_type_rev == ASIC_REV_P2) ? SH_P2_UFI : - SH_UFI; + if (!skyhawk_chip(adapter)) + return false; + break; case BLD_STR_UFI_TYPE_BE3: - return (fhdr->asic_type_rev == ASIC_REV_B0) ? BE3R_UFI : - BE3_UFI; + if (!BE3_chip(adapter)) + return false; + break; case BLD_STR_UFI_TYPE_BE2: - return BE2_UFI; - default: - return -1; - } -} - -/* Check if the flash image file is compatible with the adapter that - * is being flashed. - * BE3 chips with asic-rev B0 must be flashed only with BE3R_UFI type. - * Skyhawk chips with asic-rev P2 must be flashed only with SH_P2_UFI type. - */ -static bool be_check_ufi_compatibility(struct be_adapter *adapter, - struct flash_file_hdr_g3 *fhdr) -{ - int ufi_type = be_get_ufi_type(adapter, fhdr); - - switch (ufi_type) { - case SH_P2_UFI: - return skyhawk_chip(adapter); - case SH_UFI: - return (skyhawk_chip(adapter) && - adapter->asic_rev < ASIC_REV_P2); - case BE3R_UFI: - return BE3_chip(adapter); - case BE3_UFI: - return (BE3_chip(adapter) && adapter->asic_rev < ASIC_REV_B0); - case BE2_UFI: - return BE2_chip(adapter); + if (!BE2_chip(adapter)) + return false; + break; default: return false; } + + return (fhdr->asic_type_rev >= adapter->asic_rev); } static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) @@ -4829,7 +4913,7 @@ static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, adapter->if_handle, mode == BRIDGE_MODE_VEPA ? PORT_FWD_TYPE_VEPA : - PORT_FWD_TYPE_VEB); + PORT_FWD_TYPE_VEB, 0); if (status) goto err; @@ -4861,7 +4945,8 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, hsw_mode = PORT_FWD_TYPE_VEB; } else { status = be_cmd_get_hsw_config(adapter, NULL, 0, - adapter->if_handle, &hsw_mode); + adapter->if_handle, &hsw_mode, + NULL); if (status) return 0; } @@ -5014,6 +5099,7 @@ static const struct net_device_ops be_netdev_ops = { .ndo_set_vf_rate = be_set_vf_tx_rate, .ndo_get_vf_config = be_get_vf_config, .ndo_set_vf_link_state = be_set_vf_link_state, + .ndo_set_vf_spoofchk = be_set_vf_spoofchk, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = be_netpoll, #endif @@ -5182,7 +5268,9 @@ static void be_worker(struct work_struct *work) be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); } - be_eqd_update(adapter); + /* EQ-delay update for Skyhawk is done while notifying EQ */ + if (!skyhawk_chip(adapter)) + be_eqd_update(adapter, false); if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) be_log_sfp_info(adapter); @@ -5202,7 +5290,7 @@ static void be_unmap_pci_bars(struct be_adapter *adapter) static int db_bar(struct be_adapter *adapter) { - if (lancer_chip(adapter) || !be_physfn(adapter)) + if (lancer_chip(adapter) || be_virtfn(adapter)) return 0; else return 4; diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 132866433a25..60368207bf58 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h index e6f7eb1a7d87..cde6ef905ec4 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.h +++ b/drivers/net/ethernet/emulex/benet/be_roce.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005 - 2014 Emulex + * Copyright (C) 2005 - 2015 Emulex * All rights reserved. * * This program is free software; you can redistribute it and/or diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 66d47e448e4d..bf4cf3fbb5f2 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2118,6 +2118,82 @@ static void fec_enet_get_drvinfo(struct net_device *ndev, strlcpy(info->bus_info, dev_name(&ndev->dev), sizeof(info->bus_info)); } +static int fec_enet_get_regs_len(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct resource *r; + int s = 0; + + r = platform_get_resource(fep->pdev, IORESOURCE_MEM, 0); + if (r) + s = resource_size(r); + + return s; +} + +/* List of registers that can be safety be read to dump them with ethtool */ +#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ + defined(CONFIG_M520x) || defined(CONFIG_M532x) || \ + defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28) +static u32 fec_enet_register_offset[] = { + FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0, + FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL, + FEC_X_CNTRL, FEC_ADDR_LOW, FEC_ADDR_HIGH, FEC_OPD, FEC_TXIC0, FEC_TXIC1, + FEC_TXIC2, FEC_RXIC0, FEC_RXIC1, FEC_RXIC2, FEC_HASH_TABLE_HIGH, + FEC_HASH_TABLE_LOW, FEC_GRP_HASH_TABLE_HIGH, FEC_GRP_HASH_TABLE_LOW, + FEC_X_WMRK, FEC_R_BOUND, FEC_R_FSTART, FEC_R_DES_START_1, + FEC_X_DES_START_1, FEC_R_BUFF_SIZE_1, FEC_R_DES_START_2, + FEC_X_DES_START_2, FEC_R_BUFF_SIZE_2, FEC_R_DES_START_0, + FEC_X_DES_START_0, FEC_R_BUFF_SIZE_0, FEC_R_FIFO_RSFL, FEC_R_FIFO_RSEM, + FEC_R_FIFO_RAEM, FEC_R_FIFO_RAFL, FEC_RACC, FEC_RCMR_1, FEC_RCMR_2, + FEC_DMA_CFG_1, FEC_DMA_CFG_2, FEC_R_DES_ACTIVE_1, FEC_X_DES_ACTIVE_1, + FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_2, FEC_QOS_SCHEME, + RMON_T_DROP, RMON_T_PACKETS, RMON_T_BC_PKT, RMON_T_MC_PKT, + RMON_T_CRC_ALIGN, RMON_T_UNDERSIZE, RMON_T_OVERSIZE, RMON_T_FRAG, + RMON_T_JAB, RMON_T_COL, RMON_T_P64, RMON_T_P65TO127, RMON_T_P128TO255, + RMON_T_P256TO511, RMON_T_P512TO1023, RMON_T_P1024TO2047, + RMON_T_P_GTE2048, RMON_T_OCTETS, + IEEE_T_DROP, IEEE_T_FRAME_OK, IEEE_T_1COL, IEEE_T_MCOL, IEEE_T_DEF, + IEEE_T_LCOL, IEEE_T_EXCOL, IEEE_T_MACERR, IEEE_T_CSERR, IEEE_T_SQE, + IEEE_T_FDXFC, IEEE_T_OCTETS_OK, + RMON_R_PACKETS, RMON_R_BC_PKT, RMON_R_MC_PKT, RMON_R_CRC_ALIGN, + RMON_R_UNDERSIZE, RMON_R_OVERSIZE, RMON_R_FRAG, RMON_R_JAB, + RMON_R_RESVD_O, RMON_R_P64, RMON_R_P65TO127, RMON_R_P128TO255, + RMON_R_P256TO511, RMON_R_P512TO1023, RMON_R_P1024TO2047, + RMON_R_P_GTE2048, RMON_R_OCTETS, + IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR, + IEEE_R_FDXFC, IEEE_R_OCTETS_OK +}; +#else +static u32 fec_enet_register_offset[] = { + FEC_ECNTRL, FEC_IEVENT, FEC_IMASK, FEC_IVEC, FEC_R_DES_ACTIVE_0, + FEC_R_DES_ACTIVE_1, FEC_R_DES_ACTIVE_2, FEC_X_DES_ACTIVE_0, + FEC_X_DES_ACTIVE_1, FEC_X_DES_ACTIVE_2, FEC_MII_DATA, FEC_MII_SPEED, + FEC_R_BOUND, FEC_R_FSTART, FEC_X_WMRK, FEC_X_FSTART, FEC_R_CNTRL, + FEC_MAX_FRM_LEN, FEC_X_CNTRL, FEC_ADDR_LOW, FEC_ADDR_HIGH, + FEC_GRP_HASH_TABLE_HIGH, FEC_GRP_HASH_TABLE_LOW, FEC_R_DES_START_0, + FEC_R_DES_START_1, FEC_R_DES_START_2, FEC_X_DES_START_0, + FEC_X_DES_START_1, FEC_X_DES_START_2, FEC_R_BUFF_SIZE_0, + FEC_R_BUFF_SIZE_1, FEC_R_BUFF_SIZE_2 +}; +#endif + +static void fec_enet_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *regbuf) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + u32 __iomem *theregs = (u32 __iomem *)fep->hwp; + u32 *buf = (u32 *)regbuf; + u32 i, off; + + memset(buf, 0, regs->len); + + for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { + off = fec_enet_register_offset[i] / 4; + buf[off] = readl(&theregs[off]); + } +} + static int fec_enet_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) { @@ -2515,6 +2591,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = { .get_settings = fec_enet_get_settings, .set_settings = fec_enet_set_settings, .get_drvinfo = fec_enet_get_drvinfo, + .get_regs_len = fec_enet_get_regs_len, + .get_regs = fec_enet_get_regs, .nway_reset = fec_enet_nway_reset, .get_link = ethtool_op_get_link, .get_coalesce = fec_enet_get_coalesce, diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 4ee080d49bc0..ff875028fdff 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -516,6 +516,15 @@ static struct net_device_stats *gfar_get_stats(struct net_device *dev) return &dev->stats; } +static int gfar_set_mac_addr(struct net_device *dev, void *p) +{ + eth_mac_addr(dev, p); + + gfar_set_mac_for_addr(dev, 0, dev->dev_addr); + + return 0; +} + static const struct net_device_ops gfar_netdev_ops = { .ndo_open = gfar_enet_open, .ndo_start_xmit = gfar_start_xmit, @@ -526,7 +535,7 @@ static const struct net_device_ops gfar_netdev_ops = { .ndo_tx_timeout = gfar_timeout, .ndo_do_ioctl = gfar_ioctl, .ndo_get_stats = gfar_get_stats, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = gfar_set_mac_addr, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = gfar_netpoll, @@ -1411,6 +1420,8 @@ static int gfar_probe(struct platform_device *ofdev) dev->features |= NETIF_F_HW_VLAN_CTAG_RX; } + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + gfar_init_addr_hash_table(priv); /* Insert receive time stamps into padding alignment bytes */ @@ -2254,7 +2265,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) int i, rq = 0; int do_tstamp, do_csum, do_vlan; u32 bufaddr; - unsigned long flags; unsigned int nr_frags, nr_txbds, bytes_sent, fcb_len = 0; rq = skb->queue_mapping; @@ -2434,19 +2444,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(txq, bytes_sent); - /* We can work in parallel with gfar_clean_tx_ring(), except - * when modifying num_txbdfree. Note that we didn't grab the lock - * when we were reading the num_txbdfree and checking for available - * space, that's because outside of this function it can only grow, - * and once we've got needed space, it cannot suddenly disappear. - * - * The lock also protects us from gfar_error(), which can modify - * regs->tstat and thus retrigger the transfers, which is why we - * also must grab the lock before setting ready bit for the first - * to be transmitted BD. - */ - spin_lock_irqsave(&tx_queue->txlock, flags); - gfar_wmb(); txbdp_start->lstatus = cpu_to_be32(lstatus); @@ -2463,8 +2460,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_queue->cur_tx = next_txbd(txbdp, base, tx_queue->tx_ring_size); + /* We can work in parallel with gfar_clean_tx_ring(), except + * when modifying num_txbdfree. Note that we didn't grab the lock + * when we were reading the num_txbdfree and checking for available + * space, that's because outside of this function it can only grow. + */ + spin_lock_bh(&tx_queue->txlock); /* reduce TxBD free count */ tx_queue->num_txbdfree -= (nr_txbds); + spin_unlock_bh(&tx_queue->txlock); /* If the next BD still needs to be cleaned up, then the bds * are full. We need to tell the kernel to stop sending us stuff. @@ -2478,9 +2482,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Tell the DMA to go go go */ gfar_write(®s->tstat, TSTAT_CLEAR_THALT >> tx_queue->qindex); - /* Unlock priv */ - spin_unlock_irqrestore(&tx_queue->txlock, flags); - return NETDEV_TX_OK; dma_map_err: @@ -2622,7 +2623,6 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { - unsigned long flags; frags = skb_shinfo(skb)->nr_frags; @@ -2686,9 +2686,9 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) TX_RING_MOD_MASK(tx_ring_size); howmany++; - spin_lock_irqsave(&tx_queue->txlock, flags); + spin_lock(&tx_queue->txlock); tx_queue->num_txbdfree += nr_txbds; - spin_unlock_irqrestore(&tx_queue->txlock, flags); + spin_unlock(&tx_queue->txlock); } /* If we freed a buffer, we can restart transmission, if necessary */ @@ -3411,21 +3411,12 @@ static irqreturn_t gfar_error(int irq, void *grp_id) if (events & IEVENT_CRL) dev->stats.tx_aborted_errors++; if (events & IEVENT_XFUN) { - unsigned long flags; - netif_dbg(priv, tx_err, dev, "TX FIFO underrun, packet dropped\n"); dev->stats.tx_dropped++; atomic64_inc(&priv->extra_stats.tx_underrun); - local_irq_save(flags); - lock_tx_qs(priv); - - /* Reactivate the Tx Queues */ - gfar_write(®s->tstat, gfargrp->tstat); - - unlock_tx_qs(priv); - local_irq_restore(flags); + schedule_work(&priv->reset_task); } netif_dbg(priv, tx_err, dev, "Transmit Error\n"); } diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 3b39fdddeb57..d49bee38cd31 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -798,7 +798,7 @@ static void hip04_free_ring(struct net_device *ndev, struct device *d) for (i = 0; i < RX_DESC_NUM; i++) if (priv->rx_buf[i]) - put_page(virt_to_head_page(priv->rx_buf[i])); + skb_free_frag(priv->rx_buf[i]); for (i = 0; i < TX_DESC_NUM; i++) if (priv->tx_skb[i]) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 18134766a114..29bbb628d712 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -58,7 +58,7 @@ static struct kobj_type ktype_veth_pool; static const char ibmveth_driver_name[] = "ibmveth"; static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver"; -#define ibmveth_driver_version "1.04" +#define ibmveth_driver_version "1.05" MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>"); MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver"); @@ -100,6 +100,8 @@ struct ibmveth_stat ibmveth_stats[] = { { "tx_send_failed", IBMVETH_STAT_OFF(tx_send_failed) }, { "fw_enabled_ipv4_csum", IBMVETH_STAT_OFF(fw_ipv4_csum_support) }, { "fw_enabled_ipv6_csum", IBMVETH_STAT_OFF(fw_ipv6_csum_support) }, + { "tx_large_packets", IBMVETH_STAT_OFF(tx_large_packets) }, + { "rx_large_packets", IBMVETH_STAT_OFF(rx_large_packets) } }; /* simple methods of getting data from the current rxq entry */ @@ -852,6 +854,10 @@ static int ibmveth_set_features(struct net_device *dev, struct ibmveth_adapter *adapter = netdev_priv(dev); int rx_csum = !!(features & NETIF_F_RXCSUM); int rc; + netdev_features_t changed = features ^ dev->features; + + if (features & NETIF_F_TSO & changed) + netdev_info(dev, "TSO feature requires all partitions to have updated driver"); if (rx_csum == adapter->rx_csum) return 0; @@ -1035,6 +1041,15 @@ retry_bounce: descs[i+1].fields.address = dma_addr; } + if (skb_is_gso(skb) && !skb_is_gso_v6(skb)) { + /* Put -1 in the IP checksum to tell phyp it + * is a largesend packet and put the mss in the TCP checksum. + */ + ip_hdr(skb)->check = 0xffff; + tcp_hdr(skb)->check = cpu_to_be16(skb_shinfo(skb)->gso_size); + adapter->tx_large_packets++; + } + if (ibmveth_send(adapter, descs)) { adapter->tx_send_failed++; netdev->stats.tx_dropped++; @@ -1080,6 +1095,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) struct net_device *netdev = adapter->netdev; int frames_processed = 0; unsigned long lpar_rc; + struct iphdr *iph; restart_poll: while (frames_processed < budget) { @@ -1122,10 +1138,23 @@ restart_poll: skb_put(skb, length); skb->protocol = eth_type_trans(skb, netdev); - if (csum_good) + if (csum_good) { skb->ip_summed = CHECKSUM_UNNECESSARY; + if (be16_to_cpu(skb->protocol) == ETH_P_IP) { + iph = (struct iphdr *)skb->data; + + /* If the IP checksum is not offloaded and if the packet + * is large send, the checksum must be rebuilt. + */ + if (iph->check == 0xffff) { + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + adapter->rx_large_packets++; + } + } + } - netif_receive_skb(skb); /* send it up */ + napi_gro_receive(napi, skb); /* send it up */ netdev->stats.rx_packets++; netdev->stats.rx_bytes += length; @@ -1422,8 +1451,14 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; netdev->features |= netdev->hw_features; + /* TSO is disabled by default */ + netdev->hw_features |= NETIF_F_TSO; + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); + if (firmware_has_feature(FW_FEATURE_CMO)) + memcpy(pool_count, pool_count_cmo, sizeof(pool_count)); + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; int error; diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 1f37499d4398..41dedb1fb2ae 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -104,7 +104,8 @@ static inline long h_illan_attributes(unsigned long unit_address, static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; -static int pool_active[] = { 1, 1, 0, 0, 0}; +static int pool_count_cmo[] = { 256, 512, 256, 256, 64 }; +static int pool_active[] = { 1, 1, 0, 0, 1}; #define IBM_VETH_INVALID_MAP ((u16)0xffff) @@ -160,6 +161,8 @@ struct ibmveth_adapter { u64 rx_no_buffer; u64 tx_map_failed; u64 tx_send_failed; + u64 tx_large_packets; + u64 rx_large_packets; }; /* diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 1a450f4b6b12..35357ae2fe75 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -874,7 +874,7 @@ static int e100_exec_cb(struct nic *nic, struct sk_buff *skb, { struct cb *cb; unsigned long flags; - int err = 0; + int err; spin_lock_irqsave(&nic->cb_lock, flags); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 983eb4e6f7aa..74dc15055971 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2079,11 +2079,6 @@ static void *e1000_alloc_frag(const struct e1000_adapter *a) return data; } -static void e1000_free_frag(const void *data) -{ - put_page(virt_to_head_page(data)); -} - /** * e1000_clean_rx_ring - Free Rx Buffers per Queue * @adapter: board private structure @@ -2107,7 +2102,7 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter, adapter->rx_buffer_len, DMA_FROM_DEVICE); if (buffer_info->rxbuf.data) { - e1000_free_frag(buffer_info->rxbuf.data); + skb_free_frag(buffer_info->rxbuf.data); buffer_info->rxbuf.data = NULL; } } else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) { @@ -4594,28 +4589,28 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, data = e1000_alloc_frag(adapter); /* Failed allocation, critical failure */ if (!data) { - e1000_free_frag(olddata); + skb_free_frag(olddata); adapter->alloc_rx_buff_failed++; break; } if (!e1000_check_64k_bound(adapter, data, bufsz)) { /* give up */ - e1000_free_frag(data); - e1000_free_frag(olddata); + skb_free_frag(data); + skb_free_frag(olddata); adapter->alloc_rx_buff_failed++; break; } /* Use new allocation */ - e1000_free_frag(olddata); + skb_free_frag(olddata); } buffer_info->dma = dma_map_single(&pdev->dev, data, adapter->rx_buffer_len, DMA_FROM_DEVICE); if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { - e1000_free_frag(data); + skb_free_frag(data); buffer_info->dma = 0; adapter->alloc_rx_buff_failed++; break; @@ -4637,7 +4632,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, adapter->rx_buffer_len, DMA_FROM_DEVICE); - e1000_free_frag(data); + skb_free_frag(data); buffer_info->rxbuf.data = NULL; buffer_info->dma = 0; diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index dc79ed85030b..32e77755a9c6 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2010,7 +2010,7 @@ const struct e1000_info e1000_82573_info = { .flags2 = FLAG2_DISABLE_ASPM_L1 | FLAG2_DISABLE_ASPM_L0S, .pba = 20, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_82571, .mac_ops = &e82571_mac_ops, .phy_ops = &e82_phy_ops_m88, diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9d81c0317433..e18443a00bdb 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1015,7 +1015,7 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) u16 max_snoop, max_nosnoop; u16 max_ltr_enc; /* max LTR latency encoded */ s64 lat_ns; /* latency (ns) */ - s64 value; + u64 value; u32 rxa; if (!hw->adapter->max_frame_size) { @@ -1042,12 +1042,13 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) */ lat_ns = ((s64)rxa * 1024 - (2 * (s64)hw->adapter->max_frame_size)) * 8 * 1000; - if (lat_ns < 0) - lat_ns = 0; - else - do_div(lat_ns, speed); + if (lat_ns < 0) { + value = 0; + } else { + value = lat_ns; + do_div(value, speed); + } - value = lat_ns; while (value > PCI_LTR_VALUE_MASK) { scale++; value = DIV_ROUND_UP(value, (1 << 5)); @@ -1563,7 +1564,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) ((adapter->hw.mac.type >= e1000_pch2lan) && (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LSECCK)))) { adapter->flags &= ~FLAG_HAS_JUMBO_FRAMES; - adapter->max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN; + adapter->max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; hw->mac.ops.blink_led = NULL; } @@ -5681,7 +5682,7 @@ const struct e1000_info e1000_ich8_info = { | FLAG_HAS_FLASH | FLAG_APME_IN_WUC, .pba = 8, - .max_hw_frame_size = ETH_FRAME_LEN + ETH_FCS_LEN, + .max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5754,7 +5755,7 @@ const struct e1000_info e1000_pch2_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5774,7 +5775,7 @@ const struct e1000_info e1000_pch_lpt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, @@ -5794,7 +5795,7 @@ const struct e1000_info e1000_pch_spt_info = { .flags2 = FLAG2_HAS_PHY_STATS | FLAG2_HAS_EEE, .pba = 26, - .max_hw_frame_size = 9018, + .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, .mac_ops = &ich8_mac_ops, .phy_ops = &ich8_phy_ops, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c509a5c900f5..7dd2c11c3f61 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3807,7 +3807,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /* reset Packet Buffer Allocation to default */ ew32(PBA, pba); - if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) { + if (adapter->max_frame_size > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) { /* To maintain wire speed transmits, the Tx FIFO should be * large enough to accommodate two full transmit packets, * rounded up to the next 1KB and expressed in KB. Likewise, @@ -4196,9 +4196,9 @@ static int e1000_sw_init(struct e1000_adapter *adapter) { struct net_device *netdev = adapter->netdev; - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN; + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; adapter->rx_ps_bsize0 = 128; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + adapter->max_frame_size = netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; adapter->tx_ring_count = E1000_DEFAULT_TXD; adapter->rx_ring_count = E1000_DEFAULT_RXD; @@ -5781,17 +5781,17 @@ struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev, static int e1000_change_mtu(struct net_device *netdev, int new_mtu) { struct e1000_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + VLAN_HLEN + ETH_HLEN + ETH_FCS_LEN; + int max_frame = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; /* Jumbo frame support */ - if ((max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) && + if ((max_frame > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) && !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { e_err("Jumbo Frames not supported.\n"); return -EINVAL; } /* Supported frame sizes */ - if ((new_mtu < ETH_ZLEN + ETH_FCS_LEN + VLAN_HLEN) || + if ((new_mtu < (VLAN_ETH_ZLEN + ETH_FCS_LEN)) || (max_frame > adapter->max_hw_frame_size)) { e_err("Unsupported MTU setting\n"); return -EINVAL; @@ -5831,10 +5831,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) adapter->rx_buffer_len = 4096; /* adjust allocation if LPE protects us, and we aren't using SBP */ - if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) || - (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)) - adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN - + ETH_FCS_LEN; + if (max_frame <= (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) + adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; if (netif_running(netdev)) e1000e_up(adapter); @@ -6678,6 +6676,19 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter) } } +static netdev_features_t e1000_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + /* Jumbo frame workaround on 82579 and newer requires CRC be stripped */ + if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN)) + features &= ~NETIF_F_RXFCS; + + return features; +} + static int e1000_set_features(struct net_device *netdev, netdev_features_t features) { @@ -6734,6 +6745,7 @@ static const struct net_device_ops e1000e_netdev_ops = { .ndo_poll_controller = e1000_netpoll, #endif .ndo_set_features = e1000_set_features, + .ndo_fix_features = e1000_fix_features, }; /** diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a0a9b1fcb5e8..f287186192bb 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1836,31 +1836,19 @@ void igb_reinit_locked(struct igb_adapter *adapter) * * @adapter: adapter struct **/ -static s32 igb_enable_mas(struct igb_adapter *adapter) +static void igb_enable_mas(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 connsw; - s32 ret_val = 0; - - connsw = rd32(E1000_CONNSW); - if (!(hw->phy.media_type == e1000_media_type_copper)) - return ret_val; + u32 connsw = rd32(E1000_CONNSW); /* configure for SerDes media detect */ - if (!(connsw & E1000_CONNSW_SERDESD)) { + if ((hw->phy.media_type == e1000_media_type_copper) && + (!(connsw & E1000_CONNSW_SERDESD))) { connsw |= E1000_CONNSW_ENRGSRC; connsw |= E1000_CONNSW_AUTOSENSE_EN; wr32(E1000_CONNSW, connsw); wrfl(); - } else if (connsw & E1000_CONNSW_SERDESD) { - /* already SerDes, no need to enable anything */ - return ret_val; - } else { - netdev_info(adapter->netdev, - "MAS: Unable to configure feature, disabling..\n"); - adapter->flags &= ~IGB_FLAG_MAS_ENABLE; } - return ret_val; } void igb_reset(struct igb_adapter *adapter) @@ -1980,10 +1968,9 @@ void igb_reset(struct igb_adapter *adapter) adapter->ei.get_invariants(hw); adapter->flags &= ~IGB_FLAG_MEDIA_RESET; } - if (adapter->flags & IGB_FLAG_MAS_ENABLE) { - if (igb_enable_mas(adapter)) - dev_err(&pdev->dev, - "Error enabling Media Auto Sense\n"); + if ((mac->type == e1000_82575) && + (adapter->flags & IGB_FLAG_MAS_ENABLE)) { + igb_enable_mas(adapter); } if (hw->mac.ops.init_hw(hw)) dev_err(&pdev->dev, "Hardware Error\n"); @@ -4989,6 +4976,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; + unsigned short f; u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = vlan_get_protocol(skb); u8 hdr_len = 0; @@ -4999,14 +4987,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, * + 1 desc for context descriptor, * otherwise try next time */ - if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { - unsigned short f; - - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - } else { - count += skb_shinfo(skb)->nr_frags; - } + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); if (igb_maybe_stop_tx(tx_ring, count + 3)) { /* this is a hard error */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index eafa9ec802ba..9f6fb19062a0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3053,7 +3053,7 @@ static int ixgbe_get_module_info(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - u32 status; + s32 status; u8 sff8472_rev, addr_mode; bool page_swap = false; @@ -3061,14 +3061,14 @@ static int ixgbe_get_module_info(struct net_device *dev, status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_SFF_8472_COMP, &sff8472_rev); - if (status != 0) + if (status) return -EIO; /* addressing mode is not supported */ status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_SFF_8472_SWAP, &addr_mode); - if (status != 0) + if (status) return -EIO; if (addr_mode & IXGBE_SFF_ADDRESSING_MODE) { @@ -3095,7 +3095,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - u32 status = IXGBE_ERR_PHY_ADDR_INVALID; + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; u8 databyte = 0xFF; int i = 0; @@ -3112,7 +3112,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev, else status = hw->phy.ops.read_i2c_sff8472(hw, i, &databyte); - if (status != 0) + if (status) return -EIO; data[i - ee->offset] = databyte; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5be12a00e1f4..23d82b34314e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4757,7 +4757,7 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { u32 speed; bool autoneg, link_up = false; - u32 ret = IXGBE_ERR_LINK_SETUP; + int ret = IXGBE_ERR_LINK_SETUP; if (hw->mac.ops.check_link) ret = hw->mac.ops.check_link(hw, &speed, &link_up, false); @@ -8022,7 +8022,7 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { - u32 status; + int status; __u16 mode; if (nla_type(attr) != IFLA_BRIDGE_MODE) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 8a2be444113d..af828f89419f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -317,14 +317,14 @@ bool ixgbe_check_reset_blocked(struct ixgbe_hw *hw) **/ static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) { - u32 status; + s32 status; u16 phy_id_high = 0; u16 phy_id_low = 0; status = hw->phy.ops.read_reg(hw, MDIO_DEVID1, MDIO_MMD_PMAPMD, &phy_id_high); - if (status == 0) { + if (!status) { hw->phy.id = (u32)(phy_id_high << 16); status = hw->phy.ops.read_reg(hw, MDIO_DEVID2, MDIO_MMD_PMAPMD, &phy_id_low); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index f5f948d08b43..0a8b5e42e1a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -696,14 +696,14 @@ static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw) /* Release both semaphores by writing 0 to the bits REGSMP and SMBI */ - swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); - swsm &= ~IXGBE_SWSM_SMBI; - IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); - swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); swsm &= ~IXGBE_SWFW_REGSMP; IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm); + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + swsm &= ~IXGBE_SWSM_SMBI; + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + IXGBE_WRITE_FLUSH(hw); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index cf5cf819a6b8..b0236985e915 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -103,6 +103,39 @@ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) return 0; } +/** + * ixgbe_iosf_wait - Wait for IOSF command completion + * @hw: pointer to hardware structure + * @ctrl: pointer to location to receive final IOSF control value + * + * Return: failing status on timeout + * + * Note: ctrl can be NULL if the IOSF control register value is not needed + */ +static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) +{ + u32 i, command; + + /* Check every 10 usec to see if the address cycle completed. + * The SB IOSF BUSY bit will clear when the operation is + * complete. + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); + if (!(command & IXGBE_SB_IOSF_CTRL_BUSY)) + break; + usleep_range(10, 20); + } + if (ctrl) + *ctrl = command; + if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { + hw_dbg(hw, "IOSF wait timed out\n"); + return IXGBE_ERR_PHY; + } + + return 0; +} + /** ixgbe_read_iosf_sb_reg_x550 - Writes a value to specified register of the * IOSF device * @hw: pointer to hardware structure @@ -113,7 +146,17 @@ static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 *data) { - u32 i, command, error; + u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM; + u32 command, error; + s32 ret; + + ret = hw->mac.ops.acquire_swfw_sync(hw, gssr); + if (ret) + return ret; + + ret = ixgbe_iosf_wait(hw, NULL); + if (ret) + goto out; command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) | (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT)); @@ -121,17 +164,7 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, /* Write IOSF control register */ IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL, command); - /* Check every 10 usec to see if the address cycle completed. - * The SB IOSF BUSY bit will clear when the operation is - * complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - usleep_range(10, 20); - - command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); - if ((command & IXGBE_SB_IOSF_CTRL_BUSY) == 0) - break; - } + ret = ixgbe_iosf_wait(hw, &command); if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> @@ -140,14 +173,12 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, return IXGBE_ERR_PHY; } - if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { - hw_dbg(hw, "Read timed out\n"); - return IXGBE_ERR_PHY; - } - - *data = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA); + if (!ret) + *data = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA); - return 0; +out: + hw->mac.ops.release_swfw_sync(hw, gssr); + return ret; } /** ixgbe_read_ee_hostif_data_X550 - Read EEPROM word using a host interface @@ -789,7 +820,17 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 data) { - u32 i, command, error; + u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM; + u32 command, error; + s32 ret; + + ret = hw->mac.ops.acquire_swfw_sync(hw, gssr); + if (ret) + return ret; + + ret = ixgbe_iosf_wait(hw, NULL); + if (ret) + goto out; command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) | (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT)); @@ -800,17 +841,7 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, /* Write IOSF data register */ IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA, data); - /* Check every 10 usec to see if the address cycle completed. - * The SB IOSF BUSY bit will clear when the operation is - * complete - */ - for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { - usleep_range(10, 20); - - command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL); - if ((command & IXGBE_SB_IOSF_CTRL_BUSY) == 0) - break; - } + ret = ixgbe_iosf_wait(hw, &command); if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >> @@ -819,12 +850,9 @@ static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, return IXGBE_ERR_PHY; } - if (i == IXGBE_MDIO_COMMAND_TIMEOUT) { - hw_dbg(hw, "Write timed out\n"); - return IXGBE_ERR_PHY; - } - - return 0; +out: + hw->mac.ops.release_swfw_sync(hw, gssr); + return ret; } /** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. @@ -1035,7 +1063,7 @@ static s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw) **/ static s32 ixgbe_setup_internal_phy_x550em(struct ixgbe_hw *hw) { - u32 status; + s32 status; u16 lasi, autoneg_status, speed; ixgbe_link_speed force_speed; @@ -1177,7 +1205,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) **/ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) { - u32 status; + s32 status; u16 reg; u32 retries = 2; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ce5f7f9cff06..ecce8261ce3b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1359,7 +1359,7 @@ static void *mvneta_frag_alloc(const struct mvneta_port *pp) static void mvneta_frag_free(const struct mvneta_port *pp, void *data) { if (likely(pp->frag_size <= PAGE_SIZE)) - put_page(virt_to_head_page(data)); + skb_free_frag(data); else kfree(data); } diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 6af028d5f9bc..2f87909f5186 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -839,7 +839,7 @@ static const struct of_device_id qca_spi_of_match[] = { MODULE_DEVICE_TABLE(of, qca_spi_of_match); static int -qca_spi_probe(struct spi_device *spi_device) +qca_spi_probe(struct spi_device *spi) { struct qcaspi *qca = NULL; struct net_device *qcaspi_devs = NULL; @@ -847,52 +847,52 @@ qca_spi_probe(struct spi_device *spi_device) u16 signature; const char *mac; - if (!spi_device->dev.of_node) { - dev_err(&spi_device->dev, "Missing device tree\n"); + if (!spi->dev.of_node) { + dev_err(&spi->dev, "Missing device tree\n"); return -EINVAL; } - legacy_mode = of_property_read_bool(spi_device->dev.of_node, + legacy_mode = of_property_read_bool(spi->dev.of_node, "qca,legacy-mode"); if (qcaspi_clkspeed == 0) { - if (spi_device->max_speed_hz) - qcaspi_clkspeed = spi_device->max_speed_hz; + if (spi->max_speed_hz) + qcaspi_clkspeed = spi->max_speed_hz; else qcaspi_clkspeed = QCASPI_CLK_SPEED; } if ((qcaspi_clkspeed < QCASPI_CLK_SPEED_MIN) || (qcaspi_clkspeed > QCASPI_CLK_SPEED_MAX)) { - dev_info(&spi_device->dev, "Invalid clkspeed: %d\n", + dev_info(&spi->dev, "Invalid clkspeed: %d\n", qcaspi_clkspeed); return -EINVAL; } if ((qcaspi_burst_len < QCASPI_BURST_LEN_MIN) || (qcaspi_burst_len > QCASPI_BURST_LEN_MAX)) { - dev_info(&spi_device->dev, "Invalid burst len: %d\n", + dev_info(&spi->dev, "Invalid burst len: %d\n", qcaspi_burst_len); return -EINVAL; } if ((qcaspi_pluggable < QCASPI_PLUGGABLE_MIN) || (qcaspi_pluggable > QCASPI_PLUGGABLE_MAX)) { - dev_info(&spi_device->dev, "Invalid pluggable: %d\n", + dev_info(&spi->dev, "Invalid pluggable: %d\n", qcaspi_pluggable); return -EINVAL; } - dev_info(&spi_device->dev, "ver=%s, clkspeed=%d, burst_len=%d, pluggable=%d\n", + dev_info(&spi->dev, "ver=%s, clkspeed=%d, burst_len=%d, pluggable=%d\n", QCASPI_DRV_VERSION, qcaspi_clkspeed, qcaspi_burst_len, qcaspi_pluggable); - spi_device->mode = SPI_MODE_3; - spi_device->max_speed_hz = qcaspi_clkspeed; - if (spi_setup(spi_device) < 0) { - dev_err(&spi_device->dev, "Unable to setup SPI device\n"); + spi->mode = SPI_MODE_3; + spi->max_speed_hz = qcaspi_clkspeed; + if (spi_setup(spi) < 0) { + dev_err(&spi->dev, "Unable to setup SPI device\n"); return -EFAULT; } @@ -905,23 +905,23 @@ qca_spi_probe(struct spi_device *spi_device) qca = netdev_priv(qcaspi_devs); if (!qca) { free_netdev(qcaspi_devs); - dev_err(&spi_device->dev, "Fail to retrieve private structure\n"); + dev_err(&spi->dev, "Fail to retrieve private structure\n"); return -ENOMEM; } qca->net_dev = qcaspi_devs; - qca->spi_dev = spi_device; + qca->spi_dev = spi; qca->legacy_mode = legacy_mode; - spi_set_drvdata(spi_device, qcaspi_devs); + spi_set_drvdata(spi, qcaspi_devs); - mac = of_get_mac_address(spi_device->dev.of_node); + mac = of_get_mac_address(spi->dev.of_node); if (mac) ether_addr_copy(qca->net_dev->dev_addr, mac); if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { eth_hw_addr_random(qca->net_dev); - dev_info(&spi_device->dev, "Using random MAC address: %pM\n", + dev_info(&spi->dev, "Using random MAC address: %pM\n", qca->net_dev->dev_addr); } @@ -932,7 +932,7 @@ qca_spi_probe(struct spi_device *spi_device) qcaspi_read_register(qca, SPI_REG_SIGNATURE, &signature); if (signature != QCASPI_GOOD_SIGNATURE) { - dev_err(&spi_device->dev, "Invalid signature (0x%04X)\n", + dev_err(&spi->dev, "Invalid signature (0x%04X)\n", signature); free_netdev(qcaspi_devs); return -EFAULT; @@ -940,7 +940,7 @@ qca_spi_probe(struct spi_device *spi_device) } if (register_netdev(qcaspi_devs)) { - dev_info(&spi_device->dev, "Unable to register net device %s\n", + dev_info(&spi->dev, "Unable to register net device %s\n", qcaspi_devs->name); free_netdev(qcaspi_devs); return -EFAULT; @@ -952,9 +952,9 @@ qca_spi_probe(struct spi_device *spi_device) } static int -qca_spi_remove(struct spi_device *spi_device) +qca_spi_remove(struct spi_device *spi) { - struct net_device *qcaspi_devs = spi_get_drvdata(spi_device); + struct net_device *qcaspi_devs = spi_get_drvdata(spi); struct qcaspi *qca = netdev_priv(qcaspi_devs); qcaspi_remove_device_debugfs(qca); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index ec251531bd9f..f0a9cb44be6b 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -181,7 +181,7 @@ struct rocker_desc_info { size_t data_size; size_t tlv_size; struct rocker_desc *desc; - DEFINE_DMA_UNMAP_ADDR(mapaddr); + dma_addr_t mapaddr; }; struct rocker_dma_ring_info { @@ -225,6 +225,7 @@ struct rocker_port { struct napi_struct napi_rx; struct rocker_dma_ring_info tx_ring; struct rocker_dma_ring_info rx_ring; + struct list_head trans_mem; }; struct rocker { @@ -236,21 +237,21 @@ struct rocker { struct { u64 id; } hw; - spinlock_t cmd_ring_lock; + spinlock_t cmd_ring_lock; /* for cmd ring accesses */ struct rocker_dma_ring_info cmd_ring; struct rocker_dma_ring_info event_ring; DECLARE_HASHTABLE(flow_tbl, 16); - spinlock_t flow_tbl_lock; + spinlock_t flow_tbl_lock; /* for flow tbl accesses */ u64 flow_tbl_next_cookie; DECLARE_HASHTABLE(group_tbl, 16); - spinlock_t group_tbl_lock; + spinlock_t group_tbl_lock; /* for group tbl accesses */ DECLARE_HASHTABLE(fdb_tbl, 16); - spinlock_t fdb_tbl_lock; + spinlock_t fdb_tbl_lock; /* for fdb tbl accesses */ unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; DECLARE_HASHTABLE(internal_vlan_tbl, 8); - spinlock_t internal_vlan_tbl_lock; + spinlock_t internal_vlan_tbl_lock; /* for vlan tbl accesses */ DECLARE_HASHTABLE(neigh_tbl, 16); - spinlock_t neigh_tbl_lock; + spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */ u32 neigh_tbl_next_index; }; @@ -325,16 +326,83 @@ static bool rocker_port_is_bridged(struct rocker_port *rocker_port) return !!rocker_port->bridge_dev; } +static void *__rocker_port_mem_alloc(struct rocker_port *rocker_port, + enum switchdev_trans trans, size_t size) +{ + struct list_head *elem = NULL; + + /* If in transaction prepare phase, allocate the memory + * and enqueue it on a per-port list. If in transaction + * commit phase, dequeue the memory from the per-port list + * rather than re-allocating the memory. The idea is the + * driver code paths for prepare and commit are identical + * so the memory allocated in the prepare phase is the + * memory used in the commit phase. + */ + + switch (trans) { + case SWITCHDEV_TRANS_PREPARE: + elem = kzalloc(size + sizeof(*elem), GFP_KERNEL); + if (!elem) + return NULL; + list_add_tail(elem, &rocker_port->trans_mem); + break; + case SWITCHDEV_TRANS_COMMIT: + BUG_ON(list_empty(&rocker_port->trans_mem)); + elem = rocker_port->trans_mem.next; + list_del_init(elem); + break; + case SWITCHDEV_TRANS_NONE: + elem = kzalloc(size + sizeof(*elem), GFP_KERNEL); + if (elem) + INIT_LIST_HEAD(elem); + break; + default: + break; + } + + return elem ? elem + 1 : NULL; +} + +static void *rocker_port_kzalloc(struct rocker_port *rocker_port, + enum switchdev_trans trans, size_t size) +{ + return __rocker_port_mem_alloc(rocker_port, trans, size); +} + +static void *rocker_port_kcalloc(struct rocker_port *rocker_port, + enum switchdev_trans trans, size_t n, + size_t size) +{ + return __rocker_port_mem_alloc(rocker_port, trans, n * size); +} + +static void rocker_port_kfree(struct rocker_port *rocker_port, + enum switchdev_trans trans, const void *mem) +{ + struct list_head *elem; + + /* Frees are ignored if in transaction prepare phase. The + * memory remains on the per-port list until freed in the + * commit phase. + */ + + if (trans == SWITCHDEV_TRANS_PREPARE) + return; + + elem = (struct list_head *)mem - 1; + BUG_ON(!list_empty(elem)); + kfree(elem); +} + struct rocker_wait { wait_queue_head_t wait; bool done; - bool nowait; }; static void rocker_wait_reset(struct rocker_wait *wait) { wait->done = false; - wait->nowait = false; } static void rocker_wait_init(struct rocker_wait *wait) @@ -343,20 +411,23 @@ static void rocker_wait_init(struct rocker_wait *wait) rocker_wait_reset(wait); } -static struct rocker_wait *rocker_wait_create(gfp_t gfp) +static struct rocker_wait *rocker_wait_create(struct rocker_port *rocker_port, + enum switchdev_trans trans) { struct rocker_wait *wait; - wait = kmalloc(sizeof(*wait), gfp); + wait = rocker_port_kzalloc(rocker_port, trans, sizeof(*wait)); if (!wait) return NULL; rocker_wait_init(wait); return wait; } -static void rocker_wait_destroy(struct rocker_wait *work) +static void rocker_wait_destroy(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct rocker_wait *wait) { - kfree(work); + rocker_port_kfree(rocker_port, trans, wait); } static bool rocker_wait_event_timeout(struct rocker_wait *wait, @@ -1317,12 +1388,7 @@ static irqreturn_t rocker_cmd_irq_handler(int irq, void *dev_id) spin_lock(&rocker->cmd_ring_lock); while ((desc_info = rocker_desc_tail_get(&rocker->cmd_ring))) { wait = rocker_desc_cookie_ptr_get(desc_info); - if (wait->nowait) { - rocker_desc_gen_clear(desc_info); - rocker_wait_destroy(wait); - } else { - rocker_wait_wake_up(wait); - } + rocker_wait_wake_up(wait); credits++; } spin_unlock(&rocker->cmd_ring_lock); @@ -1374,22 +1440,44 @@ static int rocker_event_link_change(struct rocker *rocker, } #define ROCKER_OP_FLAG_REMOVE BIT(0) -#define ROCKER_OP_FLAG_NOWAIT BIT(1) -#define ROCKER_OP_FLAG_LEARNED BIT(2) -#define ROCKER_OP_FLAG_REFRESH BIT(3) +#define ROCKER_OP_FLAG_LEARNED BIT(1) +#define ROCKER_OP_FLAG_REFRESH BIT(2) static int rocker_port_fdb(struct rocker_port *rocker_port, + enum switchdev_trans trans, const unsigned char *addr, __be16 vlan_id, int flags); +struct rocker_mac_vlan_seen_work { + struct work_struct work; + struct rocker_port *rocker_port; + int flags; + unsigned char addr[ETH_ALEN]; + __be16 vlan_id; +}; + +static void rocker_event_mac_vlan_seen_work(struct work_struct *work) +{ + struct rocker_mac_vlan_seen_work *sw = + container_of(work, struct rocker_mac_vlan_seen_work, work); + + rtnl_lock(); + rocker_port_fdb(sw->rocker_port, SWITCHDEV_TRANS_NONE, + sw->addr, sw->vlan_id, sw->flags); + rtnl_unlock(); + + kfree(work); +} + static int rocker_event_mac_vlan_seen(struct rocker *rocker, const struct rocker_tlv *info) { + struct rocker_mac_vlan_seen_work *sw; struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAC_VLAN_MAX + 1]; unsigned int port_number; struct rocker_port *rocker_port; unsigned char *addr; - int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_LEARNED; + int flags = ROCKER_OP_FLAG_LEARNED; __be16 vlan_id; rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_MAC_VLAN_MAX, info); @@ -1411,7 +1499,20 @@ static int rocker_event_mac_vlan_seen(struct rocker *rocker, rocker_port->stp_state != BR_STATE_FORWARDING) return 0; - return rocker_port_fdb(rocker_port, addr, vlan_id, flags); + sw = kmalloc(sizeof(*sw), GFP_ATOMIC); + if (!sw) + return -ENOMEM; + + INIT_WORK(&sw->work, rocker_event_mac_vlan_seen_work); + + sw->rocker_port = rocker_port; + sw->flags = flags; + ether_addr_copy(sw->addr, addr); + sw->vlan_id = vlan_id; + + schedule_work(&sw->work); + + return 0; } static int rocker_event_process(struct rocker *rocker, @@ -1494,41 +1595,44 @@ typedef int (*rocker_cmd_cb_t)(struct rocker *rocker, static int rocker_cmd_exec(struct rocker *rocker, struct rocker_port *rocker_port, + enum switchdev_trans trans, rocker_cmd_cb_t prepare, void *prepare_priv, - rocker_cmd_cb_t process, void *process_priv, - bool nowait) + rocker_cmd_cb_t process, void *process_priv) { struct rocker_desc_info *desc_info; struct rocker_wait *wait; unsigned long flags; int err; - wait = rocker_wait_create(nowait ? GFP_ATOMIC : GFP_KERNEL); + wait = rocker_wait_create(rocker_port, trans); if (!wait) return -ENOMEM; - wait->nowait = nowait; spin_lock_irqsave(&rocker->cmd_ring_lock, flags); + desc_info = rocker_desc_head_get(&rocker->cmd_ring); if (!desc_info) { spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); err = -EAGAIN; goto out; } + err = prepare(rocker, rocker_port, desc_info, prepare_priv); if (err) { spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); goto out; } + rocker_desc_cookie_ptr_set(desc_info, wait); - rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info); - spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); - if (nowait) - return 0; + if (trans != SWITCHDEV_TRANS_PREPARE) + rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info); - if (!rocker_wait_event_timeout(wait, HZ / 10)) - return -EIO; + spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags); + + if (trans != SWITCHDEV_TRANS_PREPARE) + if (!rocker_wait_event_timeout(wait, HZ / 10)) + return -EIO; err = rocker_desc_err(desc_info); if (err) @@ -1539,7 +1643,7 @@ static int rocker_cmd_exec(struct rocker *rocker, rocker_desc_gen_clear(desc_info); out: - rocker_wait_destroy(wait); + rocker_wait_destroy(rocker_port, trans, wait); return err; } @@ -1762,41 +1866,46 @@ static int rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port, struct ethtool_cmd *ecmd) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_get_port_settings_prep, NULL, rocker_cmd_get_port_settings_ethtool_proc, - ecmd, false); + ecmd); } static int rocker_cmd_get_port_settings_macaddr(struct rocker_port *rocker_port, unsigned char *macaddr) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_get_port_settings_prep, NULL, rocker_cmd_get_port_settings_macaddr_proc, - macaddr, false); + macaddr); } static int rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port, struct ethtool_cmd *ecmd) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_set_port_settings_ethtool_prep, - ecmd, NULL, NULL, false); + ecmd, NULL, NULL); } static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port, unsigned char *macaddr) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_set_port_settings_macaddr_prep, - macaddr, NULL, NULL, false); + macaddr, NULL, NULL); } -static int rocker_port_set_learning(struct rocker_port *rocker_port) +static int rocker_port_set_learning(struct rocker_port *rocker_port, + enum switchdev_trans trans) { - return rocker_cmd_exec(rocker_port->rocker, rocker_port, + return rocker_cmd_exec(rocker_port->rocker, rocker_port, trans, rocker_cmd_set_port_learning_prep, - NULL, NULL, NULL, false); + NULL, NULL, NULL); } static int rocker_cmd_flow_tbl_add_ig_port(struct rocker_desc_info *desc_info, @@ -2308,8 +2417,8 @@ rocker_flow_tbl_find(struct rocker *rocker, struct rocker_flow_tbl_entry *match) } static int rocker_flow_tbl_add(struct rocker_port *rocker_port, - struct rocker_flow_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_flow_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_flow_tbl_entry *found; @@ -2324,8 +2433,9 @@ static int rocker_flow_tbl_add(struct rocker_port *rocker_port, if (found) { match->cookie = found->cookie; - hash_del(&found->entry); - kfree(found); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); + rocker_port_kfree(rocker_port, trans, found); found = match; found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD; } else { @@ -2334,18 +2444,19 @@ static int rocker_flow_tbl_add(struct rocker_port *rocker_port, found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD; } - hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); - return rocker_cmd_exec(rocker, rocker_port, + return rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_flow_tbl_add, - found, NULL, NULL, nowait); + found, NULL, NULL); } static int rocker_flow_tbl_del(struct rocker_port *rocker_port, - struct rocker_flow_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_flow_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_flow_tbl_entry *found; @@ -2360,47 +2471,43 @@ static int rocker_flow_tbl_del(struct rocker_port *rocker_port, found = rocker_flow_tbl_find(rocker, match); if (found) { - hash_del(&found->entry); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL; } spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); - kfree(match); + rocker_port_kfree(rocker_port, trans, match); if (found) { - err = rocker_cmd_exec(rocker, rocker_port, + err = rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_flow_tbl_del, - found, NULL, NULL, nowait); - kfree(found); + found, NULL, NULL); + rocker_port_kfree(rocker_port, trans, found); } return err; } -static gfp_t rocker_op_flags_gfp(int flags) -{ - return flags & ROCKER_OP_FLAG_NOWAIT ? GFP_ATOMIC : GFP_KERNEL; -} - static int rocker_flow_tbl_do(struct rocker_port *rocker_port, - int flags, struct rocker_flow_tbl_entry *entry) + enum switchdev_trans trans, int flags, + struct rocker_flow_tbl_entry *entry) { - bool nowait = flags & ROCKER_OP_FLAG_NOWAIT; - if (flags & ROCKER_OP_FLAG_REMOVE) - return rocker_flow_tbl_del(rocker_port, entry, nowait); + return rocker_flow_tbl_del(rocker_port, trans, entry); else - return rocker_flow_tbl_add(rocker_port, entry, nowait); + return rocker_flow_tbl_add(rocker_port, trans, entry); } static int rocker_flow_tbl_ig_port(struct rocker_port *rocker_port, - int flags, u32 in_pport, u32 in_pport_mask, + enum switchdev_trans trans, int flags, + u32 in_pport, u32 in_pport_mask, enum rocker_of_dpa_table_id goto_tbl) { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2410,18 +2517,19 @@ static int rocker_flow_tbl_ig_port(struct rocker_port *rocker_port, entry->key.ig_port.in_pport_mask = in_pport_mask; entry->key.ig_port.goto_tbl = goto_tbl; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_vlan(struct rocker_port *rocker_port, - int flags, u32 in_pport, - __be16 vlan_id, __be16 vlan_id_mask, + enum switchdev_trans trans, int flags, + u32 in_pport, __be16 vlan_id, + __be16 vlan_id_mask, enum rocker_of_dpa_table_id goto_tbl, bool untagged, __be16 new_vlan_id) { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2435,10 +2543,11 @@ static int rocker_flow_tbl_vlan(struct rocker_port *rocker_port, entry->key.vlan.untagged = untagged; entry->key.vlan.new_vlan_id = new_vlan_id; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port, + enum switchdev_trans trans, u32 in_pport, u32 in_pport_mask, __be16 eth_type, const u8 *eth_dst, const u8 *eth_dst_mask, __be16 vlan_id, @@ -2447,7 +2556,7 @@ static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port, { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2471,11 +2580,11 @@ static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port, entry->key.term_mac.vlan_id_mask = vlan_id_mask; entry->key.term_mac.copy_to_cpu = copy_to_cpu; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, - int flags, + enum switchdev_trans trans, int flags, const u8 *eth_dst, const u8 *eth_dst_mask, __be16 vlan_id, u32 tunnel_id, enum rocker_of_dpa_table_id goto_tbl, @@ -2487,7 +2596,7 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, bool dflt = !eth_dst || (eth_dst && eth_dst_mask); bool wild = false; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2500,7 +2609,7 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, if (eth_dst_mask) { entry->key.bridge.has_eth_dst_mask = 1; ether_addr_copy(entry->key.bridge.eth_dst_mask, eth_dst_mask); - if (memcmp(eth_dst_mask, ff_mac, ETH_ALEN)) + if (!ether_addr_equal(eth_dst_mask, ff_mac)) wild = true; } @@ -2525,10 +2634,11 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port, entry->key.bridge.group_id = group_id; entry->key.bridge.copy_to_cpu = copy_to_cpu; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, + enum switchdev_trans trans, __be16 eth_type, __be32 dst, __be32 dst_mask, u32 priority, enum rocker_of_dpa_table_id goto_tbl, @@ -2536,7 +2646,7 @@ static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, { struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2550,30 +2660,29 @@ static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port, entry->key_len = offsetof(struct rocker_flow_tbl_key, ucast_routing.group_id); - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static int rocker_flow_tbl_acl(struct rocker_port *rocker_port, - int flags, u32 in_pport, - u32 in_pport_mask, + enum switchdev_trans trans, int flags, + u32 in_pport, u32 in_pport_mask, const u8 *eth_src, const u8 *eth_src_mask, const u8 *eth_dst, const u8 *eth_dst_mask, - __be16 eth_type, - __be16 vlan_id, __be16 vlan_id_mask, - u8 ip_proto, u8 ip_proto_mask, - u8 ip_tos, u8 ip_tos_mask, + __be16 eth_type, __be16 vlan_id, + __be16 vlan_id_mask, u8 ip_proto, + u8 ip_proto_mask, u8 ip_tos, u8 ip_tos_mask, u32 group_id) { u32 priority; struct rocker_flow_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; priority = ROCKER_PRIORITY_ACL_NORMAL; if (eth_dst && eth_dst_mask) { - if (memcmp(eth_dst_mask, mcast_mac, ETH_ALEN) == 0) + if (ether_addr_equal(eth_dst_mask, mcast_mac)) priority = ROCKER_PRIORITY_ACL_DFLT; else if (is_link_local_ether_addr(eth_dst)) priority = ROCKER_PRIORITY_ACL_CTRL; @@ -2602,7 +2711,7 @@ static int rocker_flow_tbl_acl(struct rocker_port *rocker_port, entry->key.acl.ip_tos_mask = ip_tos_mask; entry->key.acl.group_id = group_id; - return rocker_flow_tbl_do(rocker_port, flags, entry); + return rocker_flow_tbl_do(rocker_port, trans, flags, entry); } static struct rocker_group_tbl_entry * @@ -2620,22 +2729,24 @@ rocker_group_tbl_find(struct rocker *rocker, return NULL; } -static void rocker_group_tbl_entry_free(struct rocker_group_tbl_entry *entry) +static void rocker_group_tbl_entry_free(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct rocker_group_tbl_entry *entry) { switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) { case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD: case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST: - kfree(entry->group_ids); + rocker_port_kfree(rocker_port, trans, entry->group_ids); break; default: break; } - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); } static int rocker_group_tbl_add(struct rocker_port *rocker_port, - struct rocker_group_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_group_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_group_tbl_entry *found; @@ -2646,8 +2757,9 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port, found = rocker_group_tbl_find(rocker, match); if (found) { - hash_del(&found->entry); - rocker_group_tbl_entry_free(found); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); + rocker_group_tbl_entry_free(rocker_port, trans, found); found = match; found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD; } else { @@ -2655,18 +2767,19 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port, found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD; } - hash_add(rocker->group_tbl, &found->entry, found->group_id); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_add(rocker->group_tbl, &found->entry, found->group_id); spin_unlock_irqrestore(&rocker->group_tbl_lock, flags); - return rocker_cmd_exec(rocker, rocker_port, + return rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_group_tbl_add, - found, NULL, NULL, nowait); + found, NULL, NULL); } static int rocker_group_tbl_del(struct rocker_port *rocker_port, - struct rocker_group_tbl_entry *match, - bool nowait) + enum switchdev_trans trans, + struct rocker_group_tbl_entry *match) { struct rocker *rocker = rocker_port->rocker; struct rocker_group_tbl_entry *found; @@ -2678,93 +2791,95 @@ static int rocker_group_tbl_del(struct rocker_port *rocker_port, found = rocker_group_tbl_find(rocker, match); if (found) { - hash_del(&found->entry); + if (trans != SWITCHDEV_TRANS_PREPARE) + hash_del(&found->entry); found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL; } spin_unlock_irqrestore(&rocker->group_tbl_lock, flags); - rocker_group_tbl_entry_free(match); + rocker_group_tbl_entry_free(rocker_port, trans, match); if (found) { - err = rocker_cmd_exec(rocker, rocker_port, + err = rocker_cmd_exec(rocker, rocker_port, trans, rocker_cmd_group_tbl_del, - found, NULL, NULL, nowait); - rocker_group_tbl_entry_free(found); + found, NULL, NULL); + rocker_group_tbl_entry_free(rocker_port, trans, found); } return err; } static int rocker_group_tbl_do(struct rocker_port *rocker_port, - int flags, struct rocker_group_tbl_entry *entry) + enum switchdev_trans trans, int flags, + struct rocker_group_tbl_entry *entry) { - bool nowait = flags & ROCKER_OP_FLAG_NOWAIT; - if (flags & ROCKER_OP_FLAG_REMOVE) - return rocker_group_tbl_del(rocker_port, entry, nowait); + return rocker_group_tbl_del(rocker_port, trans, entry); else - return rocker_group_tbl_add(rocker_port, entry, nowait); + return rocker_group_tbl_add(rocker_port, trans, entry); } static int rocker_group_l2_interface(struct rocker_port *rocker_port, - int flags, __be16 vlan_id, - u32 out_pport, int pop_vlan) + enum switchdev_trans trans, int flags, + __be16 vlan_id, u32 out_pport, + int pop_vlan) { struct rocker_group_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; entry->group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); entry->l2_interface.pop_vlan = pop_vlan; - return rocker_group_tbl_do(rocker_port, flags, entry); + return rocker_group_tbl_do(rocker_port, trans, flags, entry); } static int rocker_group_l2_fan_out(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, u8 group_count, u32 *group_ids, u32 group_id) { struct rocker_group_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; entry->group_id = group_id; entry->group_count = group_count; - entry->group_ids = kcalloc(group_count, sizeof(u32), - rocker_op_flags_gfp(flags)); + entry->group_ids = rocker_port_kcalloc(rocker_port, trans, group_count, + sizeof(u32)); if (!entry->group_ids) { - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); return -ENOMEM; } memcpy(entry->group_ids, group_ids, group_count * sizeof(u32)); - return rocker_group_tbl_do(rocker_port, flags, entry); + return rocker_group_tbl_do(rocker_port, trans, flags, entry); } static int rocker_group_l2_flood(struct rocker_port *rocker_port, - int flags, __be16 vlan_id, - u8 group_count, u32 *group_ids, - u32 group_id) + enum switchdev_trans trans, int flags, + __be16 vlan_id, u8 group_count, + u32 *group_ids, u32 group_id) { - return rocker_group_l2_fan_out(rocker_port, flags, + return rocker_group_l2_fan_out(rocker_port, trans, flags, group_count, group_ids, group_id); } static int rocker_group_l3_unicast(struct rocker_port *rocker_port, - int flags, u32 index, u8 *src_mac, - u8 *dst_mac, __be16 vlan_id, - bool ttl_check, u32 pport) + enum switchdev_trans trans, int flags, + u32 index, u8 *src_mac, u8 *dst_mac, + __be16 vlan_id, bool ttl_check, u32 pport) { struct rocker_group_tbl_entry *entry; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2777,7 +2892,7 @@ static int rocker_group_l3_unicast(struct rocker_port *rocker_port, entry->l3_unicast.ttl_check = ttl_check; entry->l3_unicast.group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, pport); - return rocker_group_tbl_do(rocker_port, flags, entry); + return rocker_group_tbl_do(rocker_port, trans, flags, entry); } static struct rocker_neigh_tbl_entry * @@ -2802,17 +2917,17 @@ static void _rocker_neigh_add(struct rocker *rocker, be32_to_cpu(entry->ip_addr)); } -static void _rocker_neigh_del(struct rocker *rocker, +static void _rocker_neigh_del(struct rocker_port *rocker_port, + enum switchdev_trans trans, struct rocker_neigh_tbl_entry *entry) { if (--entry->ref_count == 0) { hash_del(&entry->entry); - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); } } -static void _rocker_neigh_update(struct rocker *rocker, - struct rocker_neigh_tbl_entry *entry, +static void _rocker_neigh_update(struct rocker_neigh_tbl_entry *entry, u8 *eth_dst, bool ttl_check) { if (eth_dst) { @@ -2824,6 +2939,7 @@ static void _rocker_neigh_update(struct rocker *rocker, } static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, __be32 ip_addr, u8 *eth_dst) { struct rocker *rocker = rocker_port->rocker; @@ -2840,7 +2956,7 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, bool removing; int err = 0; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2860,9 +2976,9 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, _rocker_neigh_add(rocker, entry); } else if (removing) { memcpy(entry, found, sizeof(*entry)); - _rocker_neigh_del(rocker, found); + _rocker_neigh_del(rocker_port, trans, found); } else if (updating) { - _rocker_neigh_update(rocker, found, eth_dst, true); + _rocker_neigh_update(found, eth_dst, true); memcpy(entry, found, sizeof(*entry)); } else { err = -ENOENT; @@ -2879,7 +2995,7 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, * other routes' nexthops. */ - err = rocker_group_l3_unicast(rocker_port, flags, + err = rocker_group_l3_unicast(rocker_port, trans, flags, entry->index, rocker_port->dev->dev_addr, entry->eth_dst, @@ -2895,7 +3011,7 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, if (adding || removing) { group_id = ROCKER_GROUP_L3_UNICAST(entry->index); - err = rocker_flow_tbl_ucast4_routing(rocker_port, + err = rocker_flow_tbl_ucast4_routing(rocker_port, trans, eth_type, ip_addr, inet_make_mask(32), priority, goto_tbl, @@ -2909,13 +3025,13 @@ static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port, err_out: if (!adding) - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); return err; } static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, - __be32 ip_addr) + enum switchdev_trans trans, __be32 ip_addr) { struct net_device *dev = rocker_port->dev; struct neighbour *n = __ipv4_neigh_lookup(dev, (__force u32)ip_addr); @@ -2932,14 +3048,16 @@ static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port, */ if (n->nud_state & NUD_VALID) - err = rocker_port_ipv4_neigh(rocker_port, 0, ip_addr, n->ha); + err = rocker_port_ipv4_neigh(rocker_port, trans, 0, + ip_addr, n->ha); else neigh_event_send(n, NULL); return err; } -static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, +static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, __be32 ip_addr, u32 *index) { struct rocker *rocker = rocker_port->rocker; @@ -2952,7 +3070,7 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, bool resolved = true; int err = 0; - entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags)); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return -ENOMEM; @@ -2973,9 +3091,9 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, *index = entry->index; resolved = false; } else if (removing) { - _rocker_neigh_del(rocker, found); + _rocker_neigh_del(rocker_port, trans, found); } else if (updating) { - _rocker_neigh_update(rocker, found, NULL, false); + _rocker_neigh_update(found, NULL, false); resolved = !is_zero_ether_addr(found->eth_dst); } else { err = -ENOENT; @@ -2984,7 +3102,7 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags); if (!adding) - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); if (err) return err; @@ -2992,12 +3110,13 @@ static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags, /* Resolved means neigh ip_addr is resolved to neigh mac. */ if (!resolved) - err = rocker_port_ipv4_resolve(rocker_port, ip_addr); + err = rocker_port_ipv4_resolve(rocker_port, trans, ip_addr); return err; } static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, __be16 vlan_id) { struct rocker_port *p; @@ -3008,8 +3127,8 @@ static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, int err = 0; int i; - group_ids = kcalloc(rocker->port_count, sizeof(u32), - rocker_op_flags_gfp(flags)); + group_ids = rocker_port_kcalloc(rocker_port, trans, rocker->port_count, + sizeof(u32)); if (!group_ids) return -ENOMEM; @@ -3032,21 +3151,20 @@ static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, if (group_count == 0) goto no_ports_in_vlan; - err = rocker_group_l2_flood(rocker_port, flags, vlan_id, - group_count, group_ids, - group_id); + err = rocker_group_l2_flood(rocker_port, trans, flags, vlan_id, + group_count, group_ids, group_id); if (err) netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 flood group\n", err); no_ports_in_vlan: - kfree(group_ids); + rocker_port_kfree(rocker_port, trans, group_ids); return err; } static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, - int flags, __be16 vlan_id, - bool pop_vlan) + enum switchdev_trans trans, int flags, + __be16 vlan_id, bool pop_vlan) { struct rocker *rocker = rocker_port->rocker; struct rocker_port *p; @@ -3063,9 +3181,8 @@ static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, if (rocker_port->stp_state == BR_STATE_LEARNING || rocker_port->stp_state == BR_STATE_FORWARDING) { out_pport = rocker_port->pport; - err = rocker_group_l2_interface(rocker_port, flags, - vlan_id, out_pport, - pop_vlan); + err = rocker_group_l2_interface(rocker_port, trans, flags, + vlan_id, out_pport, pop_vlan); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 group for pport %d\n", @@ -3089,9 +3206,8 @@ static int rocker_port_vlan_l2_groups(struct rocker_port *rocker_port, return 0; out_pport = 0; - err = rocker_group_l2_interface(rocker_port, flags, - vlan_id, out_pport, - pop_vlan); + err = rocker_group_l2_interface(rocker_port, trans, flags, + vlan_id, out_pport, pop_vlan); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 group for CPU port\n", err); @@ -3147,8 +3263,8 @@ static struct rocker_ctrl { }; static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port, - int flags, struct rocker_ctrl *ctrl, - __be16 vlan_id) + enum switchdev_trans trans, int flags, + struct rocker_ctrl *ctrl, __be16 vlan_id) { u32 in_pport = rocker_port->pport; u32 in_pport_mask = 0xffffffff; @@ -3163,7 +3279,7 @@ static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port, u32 group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); int err; - err = rocker_flow_tbl_acl(rocker_port, flags, + err = rocker_flow_tbl_acl(rocker_port, trans, flags, in_pport, in_pport_mask, eth_src, eth_src_mask, ctrl->eth_dst, ctrl->eth_dst_mask, @@ -3180,7 +3296,8 @@ static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port, } static int rocker_port_ctrl_vlan_bridge(struct rocker_port *rocker_port, - int flags, struct rocker_ctrl *ctrl, + enum switchdev_trans trans, int flags, + struct rocker_ctrl *ctrl, __be16 vlan_id) { enum rocker_of_dpa_table_id goto_tbl = @@ -3192,7 +3309,7 @@ static int rocker_port_ctrl_vlan_bridge(struct rocker_port *rocker_port, if (!rocker_port_is_bridged(rocker_port)) return 0; - err = rocker_flow_tbl_bridge(rocker_port, flags, + err = rocker_flow_tbl_bridge(rocker_port, trans, flags, ctrl->eth_dst, ctrl->eth_dst_mask, vlan_id, tunnel_id, goto_tbl, group_id, ctrl->copy_to_cpu); @@ -3204,8 +3321,8 @@ static int rocker_port_ctrl_vlan_bridge(struct rocker_port *rocker_port, } static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port, - int flags, struct rocker_ctrl *ctrl, - __be16 vlan_id) + enum switchdev_trans trans, int flags, + struct rocker_ctrl *ctrl, __be16 vlan_id) { u32 in_pport_mask = 0xffffffff; __be16 vlan_id_mask = htons(0xffff); @@ -3214,7 +3331,7 @@ static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port, if (ntohs(vlan_id) == 0) vlan_id = rocker_port->internal_vlan_id; - err = rocker_flow_tbl_term_mac(rocker_port, + err = rocker_flow_tbl_term_mac(rocker_port, trans, rocker_port->pport, in_pport_mask, ctrl->eth_type, ctrl->eth_dst, ctrl->eth_dst_mask, vlan_id, @@ -3227,32 +3344,34 @@ static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port, return err; } -static int rocker_port_ctrl_vlan(struct rocker_port *rocker_port, int flags, +static int rocker_port_ctrl_vlan(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, struct rocker_ctrl *ctrl, __be16 vlan_id) { if (ctrl->acl) - return rocker_port_ctrl_vlan_acl(rocker_port, flags, + return rocker_port_ctrl_vlan_acl(rocker_port, trans, flags, ctrl, vlan_id); if (ctrl->bridge) - return rocker_port_ctrl_vlan_bridge(rocker_port, flags, + return rocker_port_ctrl_vlan_bridge(rocker_port, trans, flags, ctrl, vlan_id); if (ctrl->term) - return rocker_port_ctrl_vlan_term(rocker_port, flags, + return rocker_port_ctrl_vlan_term(rocker_port, trans, flags, ctrl, vlan_id); return -EOPNOTSUPP; } static int rocker_port_ctrl_vlan_add(struct rocker_port *rocker_port, - int flags, __be16 vlan_id) + enum switchdev_trans trans, int flags, + __be16 vlan_id) { int err = 0; int i; for (i = 0; i < ROCKER_CTRL_MAX; i++) { if (rocker_port->ctrls[i]) { - err = rocker_port_ctrl_vlan(rocker_port, flags, + err = rocker_port_ctrl_vlan(rocker_port, trans, flags, &rocker_ctrls[i], vlan_id); if (err) return err; @@ -3262,7 +3381,8 @@ static int rocker_port_ctrl_vlan_add(struct rocker_port *rocker_port, return err; } -static int rocker_port_ctrl(struct rocker_port *rocker_port, int flags, +static int rocker_port_ctrl(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, struct rocker_ctrl *ctrl) { u16 vid; @@ -3271,7 +3391,7 @@ static int rocker_port_ctrl(struct rocker_port *rocker_port, int flags, for (vid = 1; vid < VLAN_N_VID; vid++) { if (!test_bit(vid, rocker_port->vlan_bitmap)) continue; - err = rocker_port_ctrl_vlan(rocker_port, flags, + err = rocker_port_ctrl_vlan(rocker_port, trans, flags, ctrl, htons(vid)); if (err) break; @@ -3280,8 +3400,8 @@ static int rocker_port_ctrl(struct rocker_port *rocker_port, int flags, return err; } -static int rocker_port_vlan(struct rocker_port *rocker_port, int flags, - u16 vid) +static int rocker_port_vlan(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags, u16 vid) { enum rocker_of_dpa_table_id goto_tbl = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC; @@ -3295,50 +3415,57 @@ static int rocker_port_vlan(struct rocker_port *rocker_port, int flags, internal_vlan_id = rocker_port_vid_to_vlan(rocker_port, vid, &untagged); - if (adding && test_and_set_bit(ntohs(internal_vlan_id), - rocker_port->vlan_bitmap)) + if (adding && test_bit(ntohs(internal_vlan_id), + rocker_port->vlan_bitmap)) return 0; /* already added */ - else if (!adding && !test_and_clear_bit(ntohs(internal_vlan_id), - rocker_port->vlan_bitmap)) + else if (!adding && !test_bit(ntohs(internal_vlan_id), + rocker_port->vlan_bitmap)) return 0; /* already removed */ + change_bit(ntohs(internal_vlan_id), rocker_port->vlan_bitmap); + if (adding) { - err = rocker_port_ctrl_vlan_add(rocker_port, flags, + err = rocker_port_ctrl_vlan_add(rocker_port, trans, flags, internal_vlan_id); if (err) { netdev_err(rocker_port->dev, "Error (%d) port ctrl vlan add\n", err); - return err; + goto err_out; } } - err = rocker_port_vlan_l2_groups(rocker_port, flags, + err = rocker_port_vlan_l2_groups(rocker_port, trans, flags, internal_vlan_id, untagged); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 groups\n", err); - return err; + goto err_out; } - err = rocker_port_vlan_flood_group(rocker_port, flags, + err = rocker_port_vlan_flood_group(rocker_port, trans, flags, internal_vlan_id); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 flood group\n", err); - return err; + goto err_out; } - err = rocker_flow_tbl_vlan(rocker_port, flags, + err = rocker_flow_tbl_vlan(rocker_port, trans, flags, in_pport, vlan_id, vlan_id_mask, goto_tbl, untagged, internal_vlan_id); if (err) netdev_err(rocker_port->dev, "Error (%d) port VLAN table\n", err); +err_out: + if (trans == SWITCHDEV_TRANS_PREPARE) + change_bit(ntohs(internal_vlan_id), rocker_port->vlan_bitmap); + return err; } -static int rocker_port_ig_tbl(struct rocker_port *rocker_port, int flags) +static int rocker_port_ig_tbl(struct rocker_port *rocker_port, + enum switchdev_trans trans, int flags) { enum rocker_of_dpa_table_id goto_tbl; u32 in_pport; @@ -3353,7 +3480,7 @@ static int rocker_port_ig_tbl(struct rocker_port *rocker_port, int flags) in_pport_mask = 0xffff0000; goto_tbl = ROCKER_OF_DPA_TABLE_ID_VLAN; - err = rocker_flow_tbl_ig_port(rocker_port, flags, + err = rocker_flow_tbl_ig_port(rocker_port, trans, flags, in_pport, in_pport_mask, goto_tbl); if (err) @@ -3365,7 +3492,8 @@ static int rocker_port_ig_tbl(struct rocker_port *rocker_port, int flags) struct rocker_fdb_learn_work { struct work_struct work; - struct net_device *dev; + struct rocker_port *rocker_port; + enum switchdev_trans trans; int flags; u8 addr[ETH_ALEN]; u16 vid; @@ -3377,23 +3505,24 @@ static void rocker_port_fdb_learn_work(struct work_struct *work) container_of(work, struct rocker_fdb_learn_work, work); bool removing = (lw->flags & ROCKER_OP_FLAG_REMOVE); bool learned = (lw->flags & ROCKER_OP_FLAG_LEARNED); - struct netdev_switch_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info; info.addr = lw->addr; info.vid = lw->vid; if (learned && removing) - call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_DEL, - lw->dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL, + lw->rocker_port->dev, &info.info); else if (learned && !removing) - call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_ADD, - lw->dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_ADD, + lw->rocker_port->dev, &info.info); - kfree(work); + rocker_port_kfree(lw->rocker_port, lw->trans, work); } static int rocker_port_fdb_learn(struct rocker_port *rocker_port, - int flags, const u8 *addr, __be16 vlan_id) + enum switchdev_trans trans, int flags, + const u8 *addr, __be16 vlan_id) { struct rocker_fdb_learn_work *lw; enum rocker_of_dpa_table_id goto_tbl = @@ -3409,8 +3538,8 @@ static int rocker_port_fdb_learn(struct rocker_port *rocker_port, group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_pport); if (!(flags & ROCKER_OP_FLAG_REFRESH)) { - err = rocker_flow_tbl_bridge(rocker_port, flags, addr, NULL, - vlan_id, tunnel_id, goto_tbl, + err = rocker_flow_tbl_bridge(rocker_port, trans, flags, addr, + NULL, vlan_id, tunnel_id, goto_tbl, group_id, copy_to_cpu); if (err) return err; @@ -3422,18 +3551,22 @@ static int rocker_port_fdb_learn(struct rocker_port *rocker_port, if (!rocker_port_is_bridged(rocker_port)) return 0; - lw = kmalloc(sizeof(*lw), rocker_op_flags_gfp(flags)); + lw = rocker_port_kzalloc(rocker_port, trans, sizeof(*lw)); if (!lw) return -ENOMEM; INIT_WORK(&lw->work, rocker_port_fdb_learn_work); - lw->dev = rocker_port->dev; + lw->rocker_port = rocker_port; + lw->trans = trans; lw->flags = flags; ether_addr_copy(lw->addr, addr); lw->vid = rocker_port_vlan_to_vid(rocker_port, vlan_id); - schedule_work(&lw->work); + if (trans == SWITCHDEV_TRANS_PREPARE) + rocker_port_kfree(rocker_port, trans, lw); + else + schedule_work(&lw->work); return 0; } @@ -3451,6 +3584,7 @@ rocker_fdb_tbl_find(struct rocker *rocker, struct rocker_fdb_tbl_entry *match) } static int rocker_port_fdb(struct rocker_port *rocker_port, + enum switchdev_trans trans, const unsigned char *addr, __be16 vlan_id, int flags) { @@ -3460,7 +3594,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, bool removing = (flags & ROCKER_OP_FLAG_REMOVE); unsigned long lock_flags; - fdb = kzalloc(sizeof(*fdb), rocker_op_flags_gfp(flags)); + fdb = rocker_port_kzalloc(rocker_port, trans, sizeof(*fdb)); if (!fdb) return -ENOMEM; @@ -3475,7 +3609,7 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, found = rocker_fdb_tbl_find(rocker, fdb); if (removing && found) { - kfree(fdb); + rocker_port_kfree(rocker_port, trans, fdb); hash_del(&found->entry); } else if (!removing && !found) { hash_add(rocker->fdb_tbl, &fdb->entry, fdb->key_crc32); @@ -3485,22 +3619,23 @@ static int rocker_port_fdb(struct rocker_port *rocker_port, /* Check if adding and already exists, or removing and can't find */ if (!found != !removing) { - kfree(fdb); + rocker_port_kfree(rocker_port, trans, fdb); if (!found && removing) return 0; /* Refreshing existing to update aging timers */ flags |= ROCKER_OP_FLAG_REFRESH; } - return rocker_port_fdb_learn(rocker_port, flags, addr, vlan_id); + return rocker_port_fdb_learn(rocker_port, trans, flags, addr, vlan_id); } -static int rocker_port_fdb_flush(struct rocker_port *rocker_port) +static int rocker_port_fdb_flush(struct rocker_port *rocker_port, + enum switchdev_trans trans) { struct rocker *rocker = rocker_port->rocker; struct rocker_fdb_tbl_entry *found; unsigned long lock_flags; - int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE; + int flags = ROCKER_OP_FLAG_REMOVE; struct hlist_node *tmp; int bkt; int err = 0; @@ -3516,7 +3651,7 @@ static int rocker_port_fdb_flush(struct rocker_port *rocker_port) continue; if (!found->learned) continue; - err = rocker_port_fdb_learn(rocker_port, flags, + err = rocker_port_fdb_learn(rocker_port, trans, flags, found->key.addr, found->key.vlan_id); if (err) @@ -3531,7 +3666,8 @@ err_out: } static int rocker_port_router_mac(struct rocker_port *rocker_port, - int flags, __be16 vlan_id) + enum switchdev_trans trans, int flags, + __be16 vlan_id) { u32 in_pport_mask = 0xffffffff; __be16 eth_type; @@ -3544,7 +3680,7 @@ static int rocker_port_router_mac(struct rocker_port *rocker_port, vlan_id = rocker_port->internal_vlan_id; eth_type = htons(ETH_P_IP); - err = rocker_flow_tbl_term_mac(rocker_port, + err = rocker_flow_tbl_term_mac(rocker_port, trans, rocker_port->pport, in_pport_mask, eth_type, rocker_port->dev->dev_addr, dst_mac_mask, vlan_id, vlan_id_mask, @@ -3553,7 +3689,7 @@ static int rocker_port_router_mac(struct rocker_port *rocker_port, return err; eth_type = htons(ETH_P_IPV6); - err = rocker_flow_tbl_term_mac(rocker_port, + err = rocker_flow_tbl_term_mac(rocker_port, trans, rocker_port->pport, in_pport_mask, eth_type, rocker_port->dev->dev_addr, dst_mac_mask, vlan_id, vlan_id_mask, @@ -3562,13 +3698,14 @@ static int rocker_port_router_mac(struct rocker_port *rocker_port, return err; } -static int rocker_port_fwding(struct rocker_port *rocker_port) +static int rocker_port_fwding(struct rocker_port *rocker_port, + enum switchdev_trans trans) { bool pop_vlan; u32 out_pport; __be16 vlan_id; u16 vid; - int flags = ROCKER_OP_FLAG_NOWAIT; + int flags = 0; int err; /* Port will be forwarding-enabled if its STP state is LEARNING @@ -3588,9 +3725,8 @@ static int rocker_port_fwding(struct rocker_port *rocker_port) continue; vlan_id = htons(vid); pop_vlan = rocker_vlan_id_is_internal(vlan_id); - err = rocker_group_l2_interface(rocker_port, flags, - vlan_id, out_pport, - pop_vlan); + err = rocker_group_l2_interface(rocker_port, trans, flags, + vlan_id, out_pport, pop_vlan); if (err) { netdev_err(rocker_port->dev, "Error (%d) port VLAN l2 group for pport %d\n", @@ -3602,13 +3738,21 @@ static int rocker_port_fwding(struct rocker_port *rocker_port) return 0; } -static int rocker_port_stp_update(struct rocker_port *rocker_port, u8 state) +static int rocker_port_stp_update(struct rocker_port *rocker_port, + enum switchdev_trans trans, u8 state) { bool want[ROCKER_CTRL_MAX] = { 0, }; + bool prev_ctrls[ROCKER_CTRL_MAX]; + u8 prev_state; int flags; int err; int i; + if (trans == SWITCHDEV_TRANS_PREPARE) { + memcpy(prev_ctrls, rocker_port->ctrls, sizeof(prev_ctrls)); + prev_state = rocker_port->stp_state; + } + if (rocker_port->stp_state == state) return 0; @@ -3636,41 +3780,50 @@ static int rocker_port_stp_update(struct rocker_port *rocker_port, u8 state) for (i = 0; i < ROCKER_CTRL_MAX; i++) { if (want[i] != rocker_port->ctrls[i]) { - flags = ROCKER_OP_FLAG_NOWAIT | - (want[i] ? 0 : ROCKER_OP_FLAG_REMOVE); - err = rocker_port_ctrl(rocker_port, flags, + flags = (want[i] ? 0 : ROCKER_OP_FLAG_REMOVE); + err = rocker_port_ctrl(rocker_port, trans, flags, &rocker_ctrls[i]); if (err) - return err; + goto err_out; rocker_port->ctrls[i] = want[i]; } } - err = rocker_port_fdb_flush(rocker_port); + err = rocker_port_fdb_flush(rocker_port, trans); if (err) - return err; + goto err_out; + + err = rocker_port_fwding(rocker_port, trans); + +err_out: + if (trans == SWITCHDEV_TRANS_PREPARE) { + memcpy(rocker_port->ctrls, prev_ctrls, sizeof(prev_ctrls)); + rocker_port->stp_state = prev_state; + } - return rocker_port_fwding(rocker_port); + return err; } -static int rocker_port_fwd_enable(struct rocker_port *rocker_port) +static int rocker_port_fwd_enable(struct rocker_port *rocker_port, + enum switchdev_trans trans) { if (rocker_port_is_bridged(rocker_port)) /* bridge STP will enable port */ return 0; /* port is not bridged, so simulate going to FORWARDING state */ - return rocker_port_stp_update(rocker_port, BR_STATE_FORWARDING); + return rocker_port_stp_update(rocker_port, trans, BR_STATE_FORWARDING); } -static int rocker_port_fwd_disable(struct rocker_port *rocker_port) +static int rocker_port_fwd_disable(struct rocker_port *rocker_port, + enum switchdev_trans trans) { if (rocker_port_is_bridged(rocker_port)) /* bridge STP will disable port */ return 0; /* port is not bridged, so simulate going to DISABLED state */ - return rocker_port_stp_update(rocker_port, BR_STATE_DISABLED); + return rocker_port_stp_update(rocker_port, trans, BR_STATE_DISABLED); } static struct rocker_internal_vlan_tbl_entry * @@ -3688,6 +3841,7 @@ rocker_internal_vlan_tbl_find(struct rocker *rocker, int ifindex) } static __be16 rocker_port_internal_vlan_id_get(struct rocker_port *rocker_port, + enum switchdev_trans trans, int ifindex) { struct rocker *rocker = rocker_port->rocker; @@ -3696,7 +3850,7 @@ static __be16 rocker_port_internal_vlan_id_get(struct rocker_port *rocker_port, unsigned long lock_flags; int i; - entry = kzalloc(sizeof(*entry), GFP_KERNEL); + entry = rocker_port_kzalloc(rocker_port, trans, sizeof(*entry)); if (!entry) return 0; @@ -3706,7 +3860,7 @@ static __be16 rocker_port_internal_vlan_id_get(struct rocker_port *rocker_port, found = rocker_internal_vlan_tbl_find(rocker, ifindex); if (found) { - kfree(entry); + rocker_port_kfree(rocker_port, trans, entry); goto found; } @@ -3730,6 +3884,7 @@ found: } static void rocker_port_internal_vlan_id_put(struct rocker_port *rocker_port, + enum switchdev_trans trans, int ifindex) { struct rocker *rocker = rocker_port->rocker; @@ -3751,14 +3906,15 @@ static void rocker_port_internal_vlan_id_put(struct rocker_port *rocker_port, bit = ntohs(found->vlan_id) - ROCKER_INTERNAL_VLAN_ID_BASE; clear_bit(bit, rocker->internal_vlan_bitmap); hash_del(&found->entry); - kfree(found); + rocker_port_kfree(rocker_port, trans, found); } not_found: spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags); } -static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst, +static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, + enum switchdev_trans trans, __be32 dst, int dst_len, struct fib_info *fi, u32 tb_id, int flags) { @@ -3782,7 +3938,7 @@ static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst, has_gw = !!nh->nh_gw; if (has_gw && nh_on_port) { - err = rocker_port_ipv4_nh(rocker_port, flags, + err = rocker_port_ipv4_nh(rocker_port, trans, flags, nh->nh_gw, &index); if (err) return err; @@ -3793,7 +3949,7 @@ static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst, group_id = ROCKER_GROUP_L2_INTERFACE(internal_vlan_id, 0); } - err = rocker_flow_tbl_ucast4_routing(rocker_port, eth_type, dst, + err = rocker_flow_tbl_ucast4_routing(rocker_port, trans, eth_type, dst, dst_mask, priority, goto_tbl, group_id, flags); if (err) @@ -3832,7 +3988,7 @@ static int rocker_port_open(struct net_device *dev) goto err_request_rx_irq; } - err = rocker_port_fwd_enable(rocker_port); + err = rocker_port_fwd_enable(rocker_port, SWITCHDEV_TRANS_NONE); if (err) goto err_fwd_enable; @@ -3859,7 +4015,7 @@ static int rocker_port_stop(struct net_device *dev) rocker_port_set_enable(rocker_port, false); napi_disable(&rocker_port->napi_rx); napi_disable(&rocker_port->napi_tx); - rocker_port_fwd_disable(rocker_port); + rocker_port_fwd_disable(rocker_port, SWITCHDEV_TRANS_NONE); free_irq(rocker_msix_rx_vector(rocker_port), rocker_port); free_irq(rocker_msix_tx_vector(rocker_port), rocker_port); rocker_port_dma_rings_fini(rocker_port); @@ -4012,11 +4168,12 @@ static int rocker_port_vlan_rx_add_vid(struct net_device *dev, struct rocker_port *rocker_port = netdev_priv(dev); int err; - err = rocker_port_vlan(rocker_port, 0, vid); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, 0, vid); if (err) return err; - return rocker_port_router_mac(rocker_port, 0, htons(vid)); + return rocker_port_router_mac(rocker_port, SWITCHDEV_TRANS_NONE, + 0, htons(vid)); } static int rocker_port_vlan_rx_kill_vid(struct net_device *dev, @@ -4025,12 +4182,13 @@ static int rocker_port_vlan_rx_kill_vid(struct net_device *dev, struct rocker_port *rocker_port = netdev_priv(dev); int err; - err = rocker_port_router_mac(rocker_port, ROCKER_OP_FLAG_REMOVE, - htons(vid)); + err = rocker_port_router_mac(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, htons(vid)); if (err) return err; - return rocker_port_vlan(rocker_port, ROCKER_OP_FLAG_REMOVE, vid); + return rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, vid); } static int rocker_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], @@ -4045,7 +4203,8 @@ static int rocker_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!rocker_port_is_bridged(rocker_port)) return -EINVAL; - return rocker_port_fdb(rocker_port, addr, vlan_id, flags); + return rocker_port_fdb(rocker_port, SWITCHDEV_TRANS_NONE, + addr, vlan_id, flags); } static int rocker_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], @@ -4059,7 +4218,8 @@ static int rocker_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], if (!rocker_port_is_bridged(rocker_port)) return -EINVAL; - return rocker_port_fdb(rocker_port, addr, vlan_id, flags); + return rocker_port_fdb(rocker_port, SWITCHDEV_TRANS_NONE, + addr, vlan_id, flags); } static int rocker_fdb_fill_info(struct sk_buff *skb, @@ -4135,58 +4295,6 @@ skip: return idx; } -static int rocker_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - struct nlattr *protinfo; - struct nlattr *attr; - int err; - - protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_PROTINFO); - if (protinfo) { - attr = nla_find_nested(protinfo, IFLA_BRPORT_LEARNING); - if (attr) { - if (nla_len(attr) < sizeof(u8)) - return -EINVAL; - - if (nla_get_u8(attr)) - rocker_port->brport_flags |= BR_LEARNING; - else - rocker_port->brport_flags &= ~BR_LEARNING; - err = rocker_port_set_learning(rocker_port); - if (err) - return err; - } - attr = nla_find_nested(protinfo, IFLA_BRPORT_LEARNING_SYNC); - if (attr) { - if (nla_len(attr) < sizeof(u8)) - return -EINVAL; - - if (nla_get_u8(attr)) - rocker_port->brport_flags |= BR_LEARNING_SYNC; - else - rocker_port->brport_flags &= ~BR_LEARNING_SYNC; - } - } - - return 0; -} - -static int rocker_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, - u32 filter_mask, int nlflags) -{ - struct rocker_port *rocker_port = netdev_priv(dev); - u16 mode = BRIDGE_MODE_UNDEF; - u32 mask = BR_LEARNING | BR_LEARNING_SYNC; - - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - rocker_port->brport_flags, mask, - nlflags); -} - static int rocker_port_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { @@ -4195,9 +4303,10 @@ static int rocker_port_get_phys_port_name(struct net_device *dev, int err; err = rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_get_port_settings_prep, NULL, rocker_cmd_get_port_settings_phys_name_proc, - &name, false); + &name); return err ? -EOPNOTSUPP : 0; } @@ -4212,8 +4321,9 @@ static const struct net_device_ops rocker_port_netdev_ops = { .ndo_fdb_add = rocker_port_fdb_add, .ndo_fdb_del = rocker_port_fdb_del, .ndo_fdb_dump = rocker_port_fdb_dump, - .ndo_bridge_setlink = rocker_port_bridge_setlink, - .ndo_bridge_getlink = rocker_port_bridge_getlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_phys_port_name = rocker_port_get_phys_port_name, }; @@ -4221,54 +4331,217 @@ static const struct net_device_ops rocker_port_netdev_ops = { * swdev interface ********************/ -static int rocker_port_swdev_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int rocker_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) { struct rocker_port *rocker_port = netdev_priv(dev); struct rocker *rocker = rocker_port->rocker; - psid->id_len = sizeof(rocker->hw.id); - memcpy(&psid->id, &rocker->hw.id, psid->id_len); + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(rocker->hw.id); + memcpy(&attr->u.ppid.id, &rocker->hw.id, attr->u.ppid.id_len); + break; + case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: + attr->u.brport_flags = rocker_port->brport_flags; + break; + default: + return -EOPNOTSUPP; + } + return 0; } -static int rocker_port_swdev_port_stp_update(struct net_device *dev, u8 state) +static void rocker_port_trans_abort(struct rocker_port *rocker_port) +{ + struct list_head *mem, *tmp; + + list_for_each_safe(mem, tmp, &rocker_port->trans_mem) { + list_del(mem); + kfree(mem); + } +} + +static int rocker_port_brport_flags_set(struct rocker_port *rocker_port, + enum switchdev_trans trans, + unsigned long brport_flags) +{ + unsigned long orig_flags; + int err = 0; + + orig_flags = rocker_port->brport_flags; + rocker_port->brport_flags = brport_flags; + if ((orig_flags ^ rocker_port->brport_flags) & BR_LEARNING) + err = rocker_port_set_learning(rocker_port, trans); + + if (trans == SWITCHDEV_TRANS_PREPARE) + rocker_port->brport_flags = orig_flags; + + return err; +} + +static int rocker_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) { struct rocker_port *rocker_port = netdev_priv(dev); + int err = 0; + + switch (attr->trans) { + case SWITCHDEV_TRANS_PREPARE: + BUG_ON(!list_empty(&rocker_port->trans_mem)); + break; + case SWITCHDEV_TRANS_ABORT: + rocker_port_trans_abort(rocker_port); + return 0; + default: + break; + } + + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_STP_STATE: + err = rocker_port_stp_update(rocker_port, attr->trans, + attr->u.stp_state); + break; + case SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS: + err = rocker_port_brport_flags_set(rocker_port, attr->trans, + attr->u.brport_flags); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int rocker_port_vlan_add(struct rocker_port *rocker_port, + enum switchdev_trans trans, u16 vid, u16 flags) +{ + int err; + + /* XXX deal with flags for PVID and untagged */ + + err = rocker_port_vlan(rocker_port, trans, 0, vid); + if (err) + return err; + + return rocker_port_router_mac(rocker_port, trans, 0, htons(vid)); +} + +static int rocker_port_vlans_add(struct rocker_port *rocker_port, + enum switchdev_trans trans, + struct switchdev_obj_vlan *vlan) +{ + u16 vid; + int err; - return rocker_port_stp_update(rocker_port, state); + for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + err = rocker_port_vlan_add(rocker_port, trans, + vid, vlan->flags); + if (err) + return err; + } + + return 0; } -static int rocker_port_swdev_fib_ipv4_add(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static int rocker_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); - int flags = 0; + struct switchdev_obj_ipv4_fib *fib4; + int err = 0; + + switch (obj->trans) { + case SWITCHDEV_TRANS_PREPARE: + BUG_ON(!list_empty(&rocker_port->trans_mem)); + break; + case SWITCHDEV_TRANS_ABORT: + rocker_port_trans_abort(rocker_port); + return 0; + default: + break; + } - return rocker_port_fib_ipv4(rocker_port, dst, dst_len, - fi, tb_id, flags); + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_VLAN: + err = rocker_port_vlans_add(rocker_port, obj->trans, + &obj->u.vlan); + break; + case SWITCHDEV_OBJ_IPV4_FIB: + fib4 = &obj->u.ipv4_fib; + err = rocker_port_fib_ipv4(rocker_port, obj->trans, + htonl(fib4->dst), fib4->dst_len, + fib4->fi, fib4->tb_id, 0); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; } -static int rocker_port_swdev_fib_ipv4_del(struct net_device *dev, - __be32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static int rocker_port_vlan_del(struct rocker_port *rocker_port, + u16 vid, u16 flags) +{ + int err; + + err = rocker_port_router_mac(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, htons(vid)); + if (err) + return err; + + return rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, vid); +} + +static int rocker_port_vlans_del(struct rocker_port *rocker_port, + struct switchdev_obj_vlan *vlan) +{ + u16 vid; + int err; + + for (vid = vlan->vid_start; vid <= vlan->vid_end; vid++) { + err = rocker_port_vlan_del(rocker_port, vid, vlan->flags); + if (err) + return err; + } + + return 0; +} + +static int rocker_port_obj_del(struct net_device *dev, + struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); - int flags = ROCKER_OP_FLAG_REMOVE; + struct switchdev_obj_ipv4_fib *fib4; + int err = 0; - return rocker_port_fib_ipv4(rocker_port, dst, dst_len, - fi, tb_id, flags); + switch (obj->id) { + case SWITCHDEV_OBJ_PORT_VLAN: + err = rocker_port_vlans_del(rocker_port, &obj->u.vlan); + break; + case SWITCHDEV_OBJ_IPV4_FIB: + fib4 = &obj->u.ipv4_fib; + err = rocker_port_fib_ipv4(rocker_port, SWITCHDEV_TRANS_NONE, + htonl(fib4->dst), fib4->dst_len, + fib4->fi, fib4->tb_id, + ROCKER_OP_FLAG_REMOVE); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; } -static const struct swdev_ops rocker_port_swdev_ops = { - .swdev_parent_id_get = rocker_port_swdev_parent_id_get, - .swdev_port_stp_update = rocker_port_swdev_port_stp_update, - .swdev_fib_ipv4_add = rocker_port_swdev_fib_ipv4_add, - .swdev_fib_ipv4_del = rocker_port_swdev_fib_ipv4_del, +static const struct switchdev_ops rocker_port_switchdev_ops = { + .switchdev_port_attr_get = rocker_port_attr_get, + .switchdev_port_attr_set = rocker_port_attr_set, + .switchdev_port_obj_add = rocker_port_obj_add, + .switchdev_port_obj_del = rocker_port_obj_del, }; /******************** @@ -4399,9 +4672,10 @@ static int rocker_cmd_get_port_stats_ethtool(struct rocker_port *rocker_port, void *priv) { return rocker_cmd_exec(rocker_port->rocker, rocker_port, + SWITCHDEV_TRANS_NONE, rocker_cmd_get_port_stats_prep, NULL, rocker_cmd_get_port_stats_ethtool_proc, - priv, false); + priv); } static void rocker_port_get_stats(struct net_device *dev, @@ -4415,8 +4689,6 @@ static void rocker_port_get_stats(struct net_device *dev, for (i = 0; i < ARRAY_SIZE(rocker_port_stats); ++i) data[i] = 0; } - - return; } static int rocker_port_get_sset_count(struct net_device *netdev, int sset) @@ -4470,8 +4742,9 @@ static int rocker_port_poll_tx(struct napi_struct *napi, int budget) if (err == 0) { rocker_port->dev->stats.tx_packets++; rocker_port->dev->stats.tx_bytes += skb->len; - } else + } else { rocker_port->dev->stats.tx_errors++; + } dev_kfree_skb_any(skb); credits++; @@ -4583,7 +4856,8 @@ static void rocker_remove_ports(struct rocker *rocker) for (i = 0; i < rocker->port_count; i++) { rocker_port = rocker->ports[i]; - rocker_port_ig_tbl(rocker_port, ROCKER_OP_FLAG_REMOVE); + rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE); unregister_netdev(rocker_port->dev); } kfree(rocker->ports); @@ -4619,11 +4893,12 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_port->port_number = port_number; rocker_port->pport = port_number + 1; rocker_port->brport_flags = BR_LEARNING | BR_LEARNING_SYNC; + INIT_LIST_HEAD(&rocker_port->trans_mem); rocker_port_dev_addr_init(rocker, rocker_port); dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; - dev->swdev_ops = &rocker_port_swdev_ops; + dev->switchdev_ops = &rocker_port_switchdev_ops; netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, @@ -4631,8 +4906,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) rocker_carrier_init(rocker_port); dev->features |= NETIF_F_NETNS_LOCAL | - NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_SWITCH_OFFLOAD; + NETIF_F_HW_VLAN_CTAG_FILTER; err = register_netdev(dev); if (err) { @@ -4641,11 +4915,13 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) } rocker->ports[port_number] = rocker_port; - rocker_port_set_learning(rocker_port); + rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE); rocker_port->internal_vlan_id = - rocker_port_internal_vlan_id_get(rocker_port, dev->ifindex); - err = rocker_port_ig_tbl(rocker_port, 0); + rocker_port_internal_vlan_id_get(rocker_port, + SWITCHDEV_TRANS_NONE, + dev->ifindex); + err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0); if (err) { dev_err(&pdev->dev, "install ig port table failed\n"); goto err_port_ig_tbl; @@ -4892,43 +5168,47 @@ static int rocker_port_bridge_join(struct rocker_port *rocker_port, { int err; - rocker_port_internal_vlan_id_put(rocker_port, + rocker_port_internal_vlan_id_put(rocker_port, SWITCHDEV_TRANS_NONE, rocker_port->dev->ifindex); rocker_port->bridge_dev = bridge; /* Use bridge internal VLAN ID for untagged pkts */ - err = rocker_port_vlan(rocker_port, ROCKER_OP_FLAG_REMOVE, 0); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, 0); if (err) return err; rocker_port->internal_vlan_id = rocker_port_internal_vlan_id_get(rocker_port, + SWITCHDEV_TRANS_NONE, bridge->ifindex); - return rocker_port_vlan(rocker_port, 0, 0); + return rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, 0, 0); } static int rocker_port_bridge_leave(struct rocker_port *rocker_port) { int err; - rocker_port_internal_vlan_id_put(rocker_port, + rocker_port_internal_vlan_id_put(rocker_port, SWITCHDEV_TRANS_NONE, rocker_port->bridge_dev->ifindex); rocker_port->bridge_dev = NULL; /* Use port internal VLAN ID for untagged pkts */ - err = rocker_port_vlan(rocker_port, ROCKER_OP_FLAG_REMOVE, 0); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, + ROCKER_OP_FLAG_REMOVE, 0); if (err) return err; rocker_port->internal_vlan_id = rocker_port_internal_vlan_id_get(rocker_port, + SWITCHDEV_TRANS_NONE, rocker_port->dev->ifindex); - err = rocker_port_vlan(rocker_port, 0, 0); + err = rocker_port_vlan(rocker_port, SWITCHDEV_TRANS_NONE, 0, 0); if (err) return err; if (rocker_port->dev->flags & IFF_UP) - err = rocker_port_fwd_enable(rocker_port); + err = rocker_port_fwd_enable(rocker_port, SWITCHDEV_TRANS_NONE); return err; } @@ -4990,7 +5270,8 @@ static int rocker_neigh_update(struct net_device *dev, struct neighbour *n) int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE; __be32 ip_addr = *(__be32 *)n->primary_key; - return rocker_port_ipv4_neigh(rocker_port, flags, ip_addr, n->ha); + return rocker_port_ipv4_neigh(rocker_port, SWITCHDEV_TRANS_NONE, + flags, ip_addr, n->ha); } static int rocker_netevent_event(struct notifier_block *unused, diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index a4e9591d7457..c61fbf968036 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -65,9 +65,9 @@ enum { #define ROCKER_TEST_DMA_CTRL 0x0034 /* Rocker test register ctrl */ -#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0) -#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1) -#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2) +#define ROCKER_TEST_DMA_CTRL_CLEAR BIT(0) +#define ROCKER_TEST_DMA_CTRL_FILL BIT(1) +#define ROCKER_TEST_DMA_CTRL_INVERT BIT(2) /* Rocker DMA ring register offsets */ #define ROCKER_DMA_DESC_ADDR(x) (0x1000 + (x) * 32) /* 8-byte */ @@ -79,7 +79,7 @@ enum { #define ROCKER_DMA_DESC_RES1(x) (0x101c + (x) * 32) /* Rocker dma ctrl register bits */ -#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0) +#define ROCKER_DMA_DESC_CTRL_RESET BIT(0) /* Rocker DMA ring types */ enum rocker_dma_type { @@ -111,7 +111,7 @@ struct rocker_desc { u16 comp_err; }; -#define ROCKER_DMA_DESC_COMP_ERR_GEN (1 << 15) +#define ROCKER_DMA_DESC_COMP_ERR_GEN BIT(15) /* Rocker DMA TLV struct */ struct rocker_tlv { @@ -237,14 +237,14 @@ enum { ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1, }; -#define ROCKER_RX_FLAGS_IPV4 (1 << 0) -#define ROCKER_RX_FLAGS_IPV6 (1 << 1) -#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2) -#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3) -#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4) -#define ROCKER_RX_FLAGS_TCP (1 << 5) -#define ROCKER_RX_FLAGS_UDP (1 << 6) -#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7) +#define ROCKER_RX_FLAGS_IPV4 BIT(0) +#define ROCKER_RX_FLAGS_IPV6 BIT(1) +#define ROCKER_RX_FLAGS_CSUM_CALC BIT(2) +#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD BIT(3) +#define ROCKER_RX_FLAGS_IP_FRAG BIT(4) +#define ROCKER_RX_FLAGS_TCP BIT(5) +#define ROCKER_RX_FLAGS_UDP BIT(6) +#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD BIT(7) enum { ROCKER_TLV_TX_UNSPEC, @@ -460,6 +460,6 @@ enum rocker_of_dpa_overlay_type { #define ROCKER_SWITCH_ID 0x0320 /* 8-byte */ /* Rocker control bits */ -#define ROCKER_CONTROL_RESET (1 << 0) +#define ROCKER_CONTROL_RESET BIT(0) #endif diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 3a83c0dca8e6..ce8470fe79d5 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -3,6 +3,6 @@ sfc-y += efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \ tenxpress.o txc43128_phy.o falcon_boards.o \ mcdi.o mcdi_port.o mcdi_mon.o ptp.o sfc-$(CONFIG_SFC_MTD) += mtd.o -sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o +sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fbb6cfa0f5f1..882117a43c3a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -15,6 +15,7 @@ #include "nic.h" #include "workarounds.h" #include "selftest.h" +#include "ef10_sriov.h" #include <linux/in.h> #include <linux/jhash.h> #include <linux/wait.h> @@ -30,6 +31,9 @@ enum { /* The reserved RSS context value */ #define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff +/* The maximum size of a shared RSS context */ +/* TODO: this should really be from the mcdi protocol export */ +#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL /* The filter table(s) are managed by firmware and we have write-only * access. When removing filters we must identify them to the @@ -77,7 +81,6 @@ struct efx_ef10_filter_table { /* An arbitrary search limit for the software hash table */ #define EFX_EF10_FILTER_SEARCH_LIMIT 200 -static void efx_ef10_rx_push_rss_config(struct efx_nic *efx); static void efx_ef10_rx_free_indir_table(struct efx_nic *efx); static void efx_ef10_filter_table_remove(struct efx_nic *efx); @@ -92,7 +95,28 @@ static int efx_ef10_get_warm_boot_count(struct efx_nic *efx) static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx) { - return resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]); + int bar; + + bar = efx->type->mem_bar; + return resource_size(&efx->pci_dev->resource[bar]); +} + +static int efx_ef10_get_pf_index(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf, + sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + + nic_data->pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF); + return 0; } static int efx_ef10_init_datapath_caps(struct efx_nic *efx) @@ -117,6 +141,13 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) nic_data->datapath_caps = MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1); + /* record the DPCPU firmware IDs to determine VEB vswitching support. + */ + nic_data->rx_dpcpu_fw_id = + MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID); + nic_data->tx_dpcpu_fw_id = + MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID); + if (!(nic_data->datapath_caps & (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) { netif_err(efx, drv, efx->net_dev, @@ -178,7 +209,7 @@ static int efx_ef10_probe(struct efx_nic *efx) efx->max_channels = min_t(unsigned int, EFX_MAX_CHANNELS, - resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]) / + efx_ef10_mem_map_size(efx) / (EFX_VI_PAGE_SIZE * EFX_TXQ_TYPES)); if (WARN_ON(efx->max_channels == 0)) return -EIO; @@ -209,6 +240,8 @@ static int efx_ef10_probe(struct efx_nic *efx) nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; + /* In case we're recovering from a crash (kexec), we want to * cancel any outstanding request by the previous user of this * function. We send a special message using the least @@ -230,6 +263,10 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc) goto fail3; + rc = efx_ef10_get_pf_index(efx); + if (rc) + goto fail3; + rc = efx_ef10_init_datapath_caps(efx); if (rc < 0) goto fail3; @@ -251,10 +288,22 @@ static int efx_ef10_probe(struct efx_nic *efx) goto fail3; efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */ - /* Check whether firmware supports bug 35388 workaround */ + /* Check whether firmware supports bug 35388 workaround. + * First try to enable it, then if we get EPERM, just + * ask if it's already enabled + */ rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true); if (rc == 0) nic_data->workaround_35388 = true; + else if (rc == -EPERM) { + unsigned int enabled; + + rc = efx_mcdi_get_workarounds(efx, NULL, &enabled); + if (rc) + goto fail3; + nic_data->workaround_35388 = enabled & + MC_CMD_GET_WORKAROUNDS_OUT_BUG35388; + } else if (rc != -ENOSYS && rc != -ENOENT) goto fail3; netif_dbg(efx, probe, efx->net_dev, @@ -262,7 +311,7 @@ static int efx_ef10_probe(struct efx_nic *efx) nic_data->workaround_35388 ? "en" : "dis"); rc = efx_mcdi_mon_probe(efx); - if (rc) + if (rc && rc != -EPERM) goto fail3; efx_ptp_probe(efx, NULL); @@ -279,6 +328,23 @@ fail1: return rc; } +static int efx_ef10_probe_pf(struct efx_nic *efx) +{ + return efx_ef10_probe(efx); +} + +#ifdef CONFIG_SFC_SRIOV +static int efx_ef10_probe_vf(struct efx_nic *efx) +{ + return efx_ef10_probe(efx); +} +#else +static int efx_ef10_probe_vf(struct efx_nic *efx __attribute__ ((unused))) +{ + return 0; +} +#endif + static int efx_ef10_free_vis(struct efx_nic *efx) { MCDI_DECLARE_BUF_OUT_OR_ERR(outbuf, 0); @@ -687,7 +753,9 @@ static int efx_ef10_init_nic(struct efx_nic *efx) nic_data->must_restore_piobufs = false; } - efx_ef10_rx_push_rss_config(efx); + /* don't fail init if RSS setup doesn't work */ + efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table); + return 0; } @@ -1044,6 +1112,14 @@ static void efx_ef10_push_irq_moderation(struct efx_channel *channel) } } +static void efx_ef10_get_wol_vf(struct efx_nic *efx, + struct ethtool_wolinfo *wol) {} + +static int efx_ef10_set_wol_vf(struct efx_nic *efx, u32 type) +{ + return -EOPNOTSUPP; +} + static void efx_ef10_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol) { wol->supported = 0; @@ -1123,6 +1199,10 @@ static int efx_ef10_mcdi_poll_reboot(struct efx_nic *efx) /* All our allocations have been reset */ efx_ef10_reset_mc_allocations(efx); + /* Driver-created vswitches and vports must be re-created */ + nic_data->must_probe_vswitching = true; + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; + /* The datapath firmware might have been changed */ nic_data->must_check_datapath_caps = true; @@ -1237,6 +1317,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; struct efx_channel *channel = tx_queue->channel; struct efx_nic *efx = tx_queue->efx; + struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t inlen, outlen; dma_addr_t dma_addr; efx_qword_t *txd; @@ -1251,7 +1332,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); dma_addr = tx_queue->txd.buf.dma_addr; @@ -1378,19 +1459,33 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) } } -static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context) +static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context, + bool exclusive, unsigned *context_size) { MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; + u32 alloc_type = exclusive ? + MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE : + MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED; + unsigned rss_spread = exclusive ? + efx->rss_spread : + min(rounddown_pow_of_two(efx->rss_spread), + EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE); + + if (!exclusive && rss_spread == 1) { + *context = EFX_EF10_RSS_CONTEXT_INVALID; + if (context_size) + *context_size = 1; + return 0; + } MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID, - EVB_PORT_ID_ASSIGNED); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, - MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, - EFX_MAX_CHANNELS); + nic_data->vport_id); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread); rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); @@ -1402,6 +1497,9 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context) *context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID); + if (context_size) + *context_size = rss_spread; + return 0; } @@ -1418,7 +1516,8 @@ static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context) WARN_ON(rc != 0); } -static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context) +static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context, + const u32 *rx_indir_table) { MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN); MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN); @@ -1432,7 +1531,7 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context) for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i) MCDI_PTR(tablebuf, RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] = - (u8) efx->rx_indir_table[i]; + (u8) rx_indir_table[i]; rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf, sizeof(tablebuf), NULL, 0, NULL); @@ -1460,27 +1559,119 @@ static void efx_ef10_rx_free_indir_table(struct efx_nic *efx) nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; } -static void efx_ef10_rx_push_rss_config(struct efx_nic *efx) +static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx, + unsigned *context_size) { + u32 new_rx_rss_context; struct efx_ef10_nic_data *nic_data = efx->nic_data; - int rc; + int rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context, + false, context_size); - netif_dbg(efx, drv, efx->net_dev, "pushing RSS config\n"); + if (rc != 0) + return rc; - if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID) { - rc = efx_ef10_alloc_rss_context(efx, &nic_data->rx_rss_context); - if (rc != 0) - goto fail; + nic_data->rx_rss_context = new_rx_rss_context; + nic_data->rx_rss_context_exclusive = false; + efx_set_default_rx_indir_table(efx); + return 0; +} + +static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx, + const u32 *rx_indir_table) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + u32 new_rx_rss_context; + + if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID || + !nic_data->rx_rss_context_exclusive) { + rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context, + true, NULL); + if (rc == -EOPNOTSUPP) + return rc; + else if (rc != 0) + goto fail1; + } else { + new_rx_rss_context = nic_data->rx_rss_context; } - rc = efx_ef10_populate_rss_table(efx, nic_data->rx_rss_context); + rc = efx_ef10_populate_rss_table(efx, new_rx_rss_context, + rx_indir_table); if (rc != 0) - goto fail; + goto fail2; - return; + if (nic_data->rx_rss_context != new_rx_rss_context) + efx_ef10_rx_free_indir_table(efx); + nic_data->rx_rss_context = new_rx_rss_context; + nic_data->rx_rss_context_exclusive = true; + if (rx_indir_table != efx->rx_indir_table) + memcpy(efx->rx_indir_table, rx_indir_table, + sizeof(efx->rx_indir_table)); + return 0; -fail: +fail2: + if (new_rx_rss_context != nic_data->rx_rss_context) + efx_ef10_free_rss_context(efx, new_rx_rss_context); +fail1: netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) +{ + int rc; + + if (efx->rss_spread == 1) + return 0; + + rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table); + + if (rc == -ENOBUFS && !user) { + unsigned context_size; + bool mismatch = false; + size_t i; + + for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table) && !mismatch; + i++) + mismatch = rx_indir_table[i] != + ethtool_rxfh_indir_default(i, efx->rss_spread); + + rc = efx_ef10_rx_push_shared_rss_config(efx, &context_size); + if (rc == 0) { + if (context_size != efx->rss_spread) + netif_warn(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one of" + " different size." + " Wanted %u, got %u.\n", + efx->rss_spread, context_size); + else if (mismatch) + netif_warn(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one but" + " could not apply custom" + " indirection.\n"); + else + netif_info(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one.\n"); + } + } + return rc; +} + +static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table + __attribute__ ((unused))) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + if (user) + return -EOPNOTSUPP; + if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID) + return 0; + return efx_ef10_rx_push_shared_rss_config(efx, NULL); } static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue) @@ -1500,6 +1691,7 @@ static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) struct efx_channel *channel = efx_rx_queue_channel(rx_queue); size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE; struct efx_nic *efx = rx_queue->efx; + struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t inlen, outlen; dma_addr_t dma_addr; int rc; @@ -1517,7 +1709,7 @@ static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) INIT_RXQ_IN_FLAG_PREFIX, 1, INIT_RXQ_IN_FLAG_TIMESTAMP, 1); MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id); dma_addr = rx_queue->rxd.buf.dma_addr; @@ -2286,11 +2478,12 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx, match_fields); } - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST, spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? MC_CMD_FILTER_OP_IN_RX_DEST_DROP : MC_CMD_FILTER_OP_IN_RX_DEST_HOST); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST, MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE, @@ -3494,6 +3687,9 @@ static void efx_ef10_ptp_write_host_time(struct efx_nic *efx, u32 host_time) _efx_writed(efx, cpu_to_le32(host_time), ER_DZ_MC_DB_LWRD); } +static void efx_ef10_ptp_write_host_time_vf(struct efx_nic *efx, + u32 host_time) {} + static int efx_ef10_rx_enable_timestamping(struct efx_channel *channel, bool temp) { @@ -3571,6 +3767,12 @@ static int efx_ef10_ptp_set_ts_sync_events(struct efx_nic *efx, bool en, return 0; } +static int efx_ef10_ptp_set_ts_config_vf(struct efx_nic *efx, + struct hwtstamp_config *init) +{ + return -EOPNOTSUPP; +} + static int efx_ef10_ptp_set_ts_config(struct efx_nic *efx, struct hwtstamp_config *init) { @@ -3607,9 +3809,109 @@ static int efx_ef10_ptp_set_ts_config(struct efx_nic *efx, } } +const struct efx_nic_type efx_hunt_a0_vf_nic_type = { + .is_vf = true, + .mem_bar = EFX_MEM_VF_BAR, + .mem_map_size = efx_ef10_mem_map_size, + .probe = efx_ef10_probe_vf, + .remove = efx_ef10_remove, + .dimension_resources = efx_ef10_dimension_resources, + .init = efx_ef10_init_nic, + .fini = efx_port_dummy_op_void, + .map_reset_reason = efx_mcdi_map_reset_reason, + .map_reset_flags = efx_ef10_map_reset_flags, + .reset = efx_ef10_reset, + .probe_port = efx_mcdi_port_probe, + .remove_port = efx_mcdi_port_remove, + .fini_dmaq = efx_ef10_fini_dmaq, + .prepare_flr = efx_ef10_prepare_flr, + .finish_flr = efx_port_dummy_op_void, + .describe_stats = efx_ef10_describe_stats, + .update_stats = efx_ef10_update_stats, + .start_stats = efx_port_dummy_op_void, + .pull_stats = efx_port_dummy_op_void, + .stop_stats = efx_port_dummy_op_void, + .set_id_led = efx_mcdi_set_id_led, + .push_irq_moderation = efx_ef10_push_irq_moderation, + .reconfigure_mac = efx_ef10_mac_reconfigure, + .check_mac_fault = efx_mcdi_mac_check_fault, + .reconfigure_port = efx_mcdi_port_reconfigure, + .get_wol = efx_ef10_get_wol_vf, + .set_wol = efx_ef10_set_wol_vf, + .resume_wol = efx_port_dummy_op_void, + .mcdi_request = efx_ef10_mcdi_request, + .mcdi_poll_response = efx_ef10_mcdi_poll_response, + .mcdi_read_response = efx_ef10_mcdi_read_response, + .mcdi_poll_reboot = efx_ef10_mcdi_poll_reboot, + .irq_enable_master = efx_port_dummy_op_void, + .irq_test_generate = efx_ef10_irq_test_generate, + .irq_disable_non_ev = efx_port_dummy_op_void, + .irq_handle_msi = efx_ef10_msi_interrupt, + .irq_handle_legacy = efx_ef10_legacy_interrupt, + .tx_probe = efx_ef10_tx_probe, + .tx_init = efx_ef10_tx_init, + .tx_remove = efx_ef10_tx_remove, + .tx_write = efx_ef10_tx_write, + .rx_push_rss_config = efx_ef10_vf_rx_push_rss_config, + .rx_probe = efx_ef10_rx_probe, + .rx_init = efx_ef10_rx_init, + .rx_remove = efx_ef10_rx_remove, + .rx_write = efx_ef10_rx_write, + .rx_defer_refill = efx_ef10_rx_defer_refill, + .ev_probe = efx_ef10_ev_probe, + .ev_init = efx_ef10_ev_init, + .ev_fini = efx_ef10_ev_fini, + .ev_remove = efx_ef10_ev_remove, + .ev_process = efx_ef10_ev_process, + .ev_read_ack = efx_ef10_ev_read_ack, + .ev_test_generate = efx_ef10_ev_test_generate, + .filter_table_probe = efx_ef10_filter_table_probe, + .filter_table_restore = efx_ef10_filter_table_restore, + .filter_table_remove = efx_ef10_filter_table_remove, + .filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter, + .filter_insert = efx_ef10_filter_insert, + .filter_remove_safe = efx_ef10_filter_remove_safe, + .filter_get_safe = efx_ef10_filter_get_safe, + .filter_clear_rx = efx_ef10_filter_clear_rx, + .filter_count_rx_used = efx_ef10_filter_count_rx_used, + .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit, + .filter_get_rx_ids = efx_ef10_filter_get_rx_ids, +#ifdef CONFIG_RFS_ACCEL + .filter_rfs_insert = efx_ef10_filter_rfs_insert, + .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one, +#endif +#ifdef CONFIG_SFC_MTD + .mtd_probe = efx_port_dummy_op_int, +#endif + .ptp_write_host_time = efx_ef10_ptp_write_host_time_vf, + .ptp_set_ts_config = efx_ef10_ptp_set_ts_config_vf, +#ifdef CONFIG_SFC_SRIOV + .vswitching_probe = efx_ef10_vswitching_probe_vf, + .vswitching_restore = efx_ef10_vswitching_restore_vf, + .vswitching_remove = efx_ef10_vswitching_remove_vf, +#endif + .revision = EFX_REV_HUNT_A0, + .max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH), + .rx_prefix_size = ES_DZ_RX_PREFIX_SIZE, + .rx_hash_offset = ES_DZ_RX_PREFIX_HASH_OFST, + .rx_ts_offset = ES_DZ_RX_PREFIX_TSTAMP_OFST, + .can_rx_scatter = true, + .always_rx_scatter = true, + .max_interrupt_mode = EFX_INT_MODE_MSIX, + .timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH, + .offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXHASH | NETIF_F_NTUPLE), + .mcdi_max_ver = 2, + .max_rx_ip_filters = HUNT_FILTER_TBL_ROWS, + .hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE | + 1 << HWTSTAMP_FILTER_ALL, +}; + const struct efx_nic_type efx_hunt_a0_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = efx_ef10_mem_map_size, - .probe = efx_ef10_probe, + .probe = efx_ef10_probe_pf, .remove = efx_ef10_remove, .dimension_resources = efx_ef10_dimension_resources, .init = efx_ef10_init_nic, @@ -3650,7 +3952,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .tx_init = efx_ef10_tx_init, .tx_remove = efx_ef10_tx_remove, .tx_write = efx_ef10_tx_write, - .rx_push_rss_config = efx_ef10_rx_push_rss_config, + .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config, .rx_probe = efx_ef10_rx_probe, .rx_init = efx_ef10_rx_init, .rx_remove = efx_ef10_rx_remove, @@ -3689,11 +3991,22 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .ptp_write_host_time = efx_ef10_ptp_write_host_time, .ptp_set_ts_sync_events = efx_ef10_ptp_set_ts_sync_events, .ptp_set_ts_config = efx_ef10_ptp_set_ts_config, +#ifdef CONFIG_SFC_SRIOV + .sriov_configure = efx_ef10_sriov_configure, .sriov_init = efx_ef10_sriov_init, .sriov_fini = efx_ef10_sriov_fini, .sriov_mac_address_changed = efx_ef10_sriov_mac_address_changed, .sriov_wanted = efx_ef10_sriov_wanted, .sriov_reset = efx_ef10_sriov_reset, + .sriov_flr = efx_ef10_sriov_flr, + .sriov_set_vf_mac = efx_ef10_sriov_set_vf_mac, + .sriov_set_vf_vlan = efx_ef10_sriov_set_vf_vlan, + .sriov_set_vf_spoofchk = efx_ef10_sriov_set_vf_spoofchk, + .sriov_get_vf_config = efx_ef10_sriov_get_vf_config, + .vswitching_probe = efx_ef10_vswitching_probe_pf, + .vswitching_restore = efx_ef10_vswitching_restore_pf, + .vswitching_remove = efx_ef10_vswitching_remove_pf, +#endif .revision = EFX_REV_HUNT_A0, .max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH), diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c new file mode 100644 index 000000000000..1b93acf2d28d --- /dev/null +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -0,0 +1,430 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#include <linux/pci.h> +#include <linux/module.h> +#include "net_driver.h" +#include "ef10_sriov.h" +#include "efx.h" +#include "nic.h" +#include "mcdi_pcol.h" + +static int efx_ef10_evb_port_assign(struct efx_nic *efx, unsigned int port_id, + unsigned int vf_fn) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_EVB_PORT_ASSIGN_IN_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + MCDI_SET_DWORD(inbuf, EVB_PORT_ASSIGN_IN_PORT_ID, port_id); + MCDI_POPULATE_DWORD_2(inbuf, EVB_PORT_ASSIGN_IN_FUNCTION, + EVB_PORT_ASSIGN_IN_PF, nic_data->pf_index, + EVB_PORT_ASSIGN_IN_VF, vf_fn); + + return efx_mcdi_rpc(efx, MC_CMD_EVB_PORT_ASSIGN, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vport_add_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_ADD_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_ADD_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_ADD_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + +static int efx_ef10_vport_del_mac(struct efx_nic *efx, + unsigned int port_id, u8 *mac) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_DEL_MAC_ADDRESS_IN_VPORT_ID, port_id); + ether_addr_copy(MCDI_PTR(inbuf, VPORT_DEL_MAC_ADDRESS_IN_MACADDR), mac); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_DEL_MAC_ADDRESS, inbuf, + sizeof(inbuf), NULL, 0, NULL); +} + +static int efx_ef10_vswitch_alloc(struct efx_nic *efx, unsigned int port_id, + unsigned int vswitch_type) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VSWITCH_ALLOC_IN_LEN); + + MCDI_SET_DWORD(inbuf, VSWITCH_ALLOC_IN_UPSTREAM_PORT_ID, port_id); + MCDI_SET_DWORD(inbuf, VSWITCH_ALLOC_IN_TYPE, vswitch_type); + MCDI_SET_DWORD(inbuf, VSWITCH_ALLOC_IN_NUM_VLAN_TAGS, 0); + MCDI_POPULATE_DWORD_1(inbuf, VSWITCH_ALLOC_IN_FLAGS, + VSWITCH_ALLOC_IN_FLAG_AUTO_PORT, 0); + + return efx_mcdi_rpc(efx, MC_CMD_VSWITCH_ALLOC, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vswitch_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VSWITCH_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VSWITCH_FREE_IN_UPSTREAM_PORT_ID, port_id); + + return efx_mcdi_rpc(efx, MC_CMD_VSWITCH_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vport_alloc(struct efx_nic *efx, + unsigned int port_id_in, + unsigned int vport_type, + unsigned int *port_id_out) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ALLOC_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_VPORT_ALLOC_OUT_LEN); + size_t outlen; + int rc; + + EFX_WARN_ON_PARANOID(!port_id_out); + + MCDI_SET_DWORD(inbuf, VPORT_ALLOC_IN_UPSTREAM_PORT_ID, port_id_in); + MCDI_SET_DWORD(inbuf, VPORT_ALLOC_IN_TYPE, vport_type); + MCDI_SET_DWORD(inbuf, VPORT_ALLOC_IN_NUM_VLAN_TAGS, 0); + MCDI_POPULATE_DWORD_1(inbuf, VPORT_ALLOC_IN_FLAGS, + VPORT_ALLOC_IN_FLAG_AUTO_PORT, 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_VPORT_ALLOC, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_VPORT_ALLOC_OUT_LEN) + return -EIO; + + *port_id_out = MCDI_DWORD(outbuf, VPORT_ALLOC_OUT_VPORT_ID); + return 0; +} + +static int efx_ef10_vport_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VPORT_FREE_IN_VPORT_ID, port_id); + + return efx_mcdi_rpc(efx, MC_CMD_VPORT_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_ALLOC_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_ALLOC_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_ALLOC, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, VADAPTOR_FREE_IN_UPSTREAM_PORT_ID, port_id); + return efx_mcdi_rpc(efx, MC_CMD_VADAPTOR_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static void efx_ef10_sriov_free_vf_vports(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int i; + + if (!nic_data->vf) + return; + + for (i = 0; i < efx->vf_count; i++) { + struct ef10_vf *vf = nic_data->vf + i; + + if (vf->vport_assigned) { + efx_ef10_evb_port_assign(efx, EVB_PORT_ID_NULL, i); + vf->vport_assigned = 0; + } + + if (!is_zero_ether_addr(vf->mac)) { + efx_ef10_vport_del_mac(efx, vf->vport_id, vf->mac); + eth_zero_addr(vf->mac); + } + + if (vf->vport_id) { + efx_ef10_vport_free(efx, vf->vport_id); + vf->vport_id = 0; + } + } +} + +static void efx_ef10_sriov_free_vf_vswitching(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + efx_ef10_sriov_free_vf_vports(efx); + kfree(nic_data->vf); + nic_data->vf = NULL; +} + +static int efx_ef10_sriov_assign_vf_vport(struct efx_nic *efx, + unsigned int vf_i) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct ef10_vf *vf = nic_data->vf + vf_i; + int rc; + + if (WARN_ON_ONCE(!nic_data->vf)) + return -EOPNOTSUPP; + + rc = efx_ef10_vport_alloc(efx, EVB_PORT_ID_ASSIGNED, + MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, + &vf->vport_id); + if (rc) + return rc; + + rc = efx_ef10_vport_add_mac(efx, vf->vport_id, vf->mac); + if (rc) { + eth_zero_addr(vf->mac); + return rc; + } + + rc = efx_ef10_evb_port_assign(efx, vf->vport_id, vf_i); + if (rc) + return rc; + + vf->vport_assigned = 1; + return 0; +} + +static int efx_ef10_sriov_alloc_vf_vswitching(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + unsigned int i; + int rc; + + nic_data->vf = kcalloc(efx->vf_count, sizeof(struct ef10_vf), + GFP_KERNEL); + if (!nic_data->vf) + return -ENOMEM; + + for (i = 0; i < efx->vf_count; i++) { + random_ether_addr(nic_data->vf[i].mac); + + rc = efx_ef10_sriov_assign_vf_vport(efx, i); + if (rc) + goto fail; + } + + return 0; +fail: + efx_ef10_sriov_free_vf_vports(efx); + kfree(nic_data->vf); + nic_data->vf = NULL; + return rc; +} + +static int efx_ef10_sriov_restore_vf_vswitching(struct efx_nic *efx) +{ + unsigned int i; + int rc; + + for (i = 0; i < efx->vf_count; i++) { + rc = efx_ef10_sriov_assign_vf_vport(efx, i); + if (rc) + goto fail; + } + + return 0; +fail: + efx_ef10_sriov_free_vf_vswitching(efx); + return rc; +} + +/* On top of the default firmware vswitch setup, create a VEB vswitch and + * expansion vport for use by this function. + */ +int efx_ef10_vswitching_probe_pf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct net_device *net_dev = efx->net_dev; + int rc; + + if (pci_sriov_get_totalvfs(efx->pci_dev) <= 0) { + /* vswitch not needed as we have no VFs */ + efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); + return 0; + } + + rc = efx_ef10_vswitch_alloc(efx, EVB_PORT_ID_ASSIGNED, + MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB); + if (rc) + goto fail1; + + rc = efx_ef10_vport_alloc(efx, EVB_PORT_ID_ASSIGNED, + MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL, + &nic_data->vport_id); + if (rc) + goto fail2; + + rc = efx_ef10_vport_add_mac(efx, nic_data->vport_id, net_dev->dev_addr); + if (rc) + goto fail3; + ether_addr_copy(nic_data->vport_mac, net_dev->dev_addr); + + rc = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); + if (rc) + goto fail4; + + return 0; +fail4: + efx_ef10_vport_del_mac(efx, nic_data->vport_id, nic_data->vport_mac); + eth_zero_addr(nic_data->vport_mac); +fail3: + efx_ef10_vport_free(efx, nic_data->vport_id); + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; +fail2: + efx_ef10_vswitch_free(efx, EVB_PORT_ID_ASSIGNED); +fail1: + return rc; +} + +int efx_ef10_vswitching_probe_vf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + return efx_ef10_vadaptor_alloc(efx, nic_data->vport_id); +} + +int efx_ef10_vswitching_restore_pf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (!nic_data->must_probe_vswitching) + return 0; + + rc = efx_ef10_vswitching_probe_pf(efx); + if (rc) + goto fail; + + rc = efx_ef10_sriov_restore_vf_vswitching(efx); + if (rc) + goto fail; + + nic_data->must_probe_vswitching = false; +fail: + return rc; +} + +int efx_ef10_vswitching_restore_vf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (!nic_data->must_probe_vswitching) + return 0; + + rc = efx_ef10_vadaptor_free(efx, EVB_PORT_ID_ASSIGNED); + if (rc) + return rc; + + nic_data->must_probe_vswitching = false; + return 0; +} + +void efx_ef10_vswitching_remove_pf(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + efx_ef10_sriov_free_vf_vswitching(efx); + + efx_ef10_vadaptor_free(efx, nic_data->vport_id); + + if (nic_data->vport_id == EVB_PORT_ID_ASSIGNED) + return; /* No vswitch was ever created */ + + if (!is_zero_ether_addr(nic_data->vport_mac)) { + efx_ef10_vport_del_mac(efx, nic_data->vport_id, + efx->net_dev->dev_addr); + eth_zero_addr(nic_data->vport_mac); + } + efx_ef10_vport_free(efx, nic_data->vport_id); + nic_data->vport_id = EVB_PORT_ID_ASSIGNED; + + efx_ef10_vswitch_free(efx, nic_data->vport_id); +} + +void efx_ef10_vswitching_remove_vf(struct efx_nic *efx) +{ + efx_ef10_vadaptor_free(efx, EVB_PORT_ID_ASSIGNED); +} + +static int efx_ef10_pci_sriov_enable(struct efx_nic *efx, int num_vfs) +{ + int rc = 0; + struct pci_dev *dev = efx->pci_dev; + + efx->vf_count = num_vfs; + + rc = efx_ef10_sriov_alloc_vf_vswitching(efx); + if (rc) + goto fail1; + + rc = pci_enable_sriov(dev, num_vfs); + if (rc) + goto fail2; + + return 0; +fail2: + efx_ef10_sriov_free_vf_vswitching(efx); +fail1: + efx->vf_count = 0; + netif_err(efx, probe, efx->net_dev, + "Failed to enable SRIOV VFs\n"); + return rc; +} + +static int efx_ef10_pci_sriov_disable(struct efx_nic *efx) +{ + struct pci_dev *dev = efx->pci_dev; + + pci_disable_sriov(dev); + efx_ef10_sriov_free_vf_vswitching(efx); + efx->vf_count = 0; + return 0; +} + +int efx_ef10_sriov_configure(struct efx_nic *efx, int num_vfs) +{ + if (num_vfs == 0) + return efx_ef10_pci_sriov_disable(efx); + else + return efx_ef10_pci_sriov_enable(efx, num_vfs); +} + +int efx_ef10_sriov_init(struct efx_nic *efx) +{ + return 0; +} + +void efx_ef10_sriov_fini(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (!nic_data->vf) + return; + + rc = efx_ef10_pci_sriov_disable(efx); + if (rc) + netif_dbg(efx, drv, efx->net_dev, + "Disabling SRIOV was not successful rc=%d\n", rc); + else + netif_dbg(efx, drv, efx->net_dev, "SRIOV disabled\n"); +} diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h new file mode 100644 index 000000000000..86bac7ebb01a --- /dev/null +++ b/drivers/net/ethernet/sfc/ef10_sriov.h @@ -0,0 +1,70 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EF10_SRIOV_H +#define EF10_SRIOV_H + +#include "net_driver.h" + +/** + * struct ef10_vf - PF's store of VF data + * @vport_id: vport ID for the VF + * @vport_assigned: record whether the vport is currently assigned to the VF + * @mac: MAC address for the VF, zero when address is removed from the vport + */ +struct ef10_vf { + unsigned int vport_id; + unsigned int vport_assigned; + u8 mac[ETH_ALEN]; +}; + +static inline bool efx_ef10_sriov_wanted(struct efx_nic *efx) +{ + return false; +} + +int efx_ef10_sriov_configure(struct efx_nic *efx, int num_vfs); +int efx_ef10_sriov_init(struct efx_nic *efx); +static inline void efx_ef10_sriov_mac_address_changed(struct efx_nic *efx) {} +static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {} +void efx_ef10_sriov_fini(struct efx_nic *efx); +static inline void efx_ef10_sriov_flr(struct efx_nic *efx, unsigned vf_i) {} + +static inline int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf, + u8 *mac) +{ + return -EOPNOTSUPP; +} + +static inline int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf, + u16 vlan, u8 qos) +{ + return -EOPNOTSUPP; +} + +static inline int efx_ef10_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf, + bool spoofchk) +{ + return -EOPNOTSUPP; +} + +static inline int efx_ef10_sriov_get_vf_config(struct efx_nic *efx, int vf, + struct ifla_vf_info *ivf) +{ + return -EOPNOTSUPP; +} + +int efx_ef10_vswitching_probe_pf(struct efx_nic *efx); +int efx_ef10_vswitching_probe_vf(struct efx_nic *efx); +int efx_ef10_vswitching_restore_pf(struct efx_nic *efx); +int efx_ef10_vswitching_restore_vf(struct efx_nic *efx); +void efx_ef10_vswitching_remove_pf(struct efx_nic *efx); +void efx_ef10_vswitching_remove_vf(struct efx_nic *efx); + +#endif /* EF10_SRIOV_H */ diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 4b00545a3ace..0f127a01b5e8 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -26,6 +26,7 @@ #include "efx.h" #include "nic.h" #include "selftest.h" +#include "sriov.h" #include "mcdi.h" #include "workarounds.h" @@ -1045,7 +1046,7 @@ static int efx_init_port(struct efx_nic *efx) /* Ensure the PHY advertises the correct flow control settings */ rc = efx->phy_op->reconfigure(efx); - if (rc) + if (rc && rc != -EPERM) goto fail2; mutex_unlock(&efx->mac_lock); @@ -1200,10 +1201,12 @@ static int efx_init_io(struct efx_nic *efx) struct pci_dev *pci_dev = efx->pci_dev; dma_addr_t dma_mask = efx->type->max_dma_mask; unsigned int mem_map_size = efx->type->mem_map_size(efx); - int rc; + int rc, bar; netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); + bar = efx->type->mem_bar; + rc = pci_enable_device(pci_dev); if (rc) { netif_err(efx, probe, efx->net_dev, @@ -1234,8 +1237,8 @@ static int efx_init_io(struct efx_nic *efx) netif_dbg(efx, probe, efx->net_dev, "using DMA mask %llx\n", (unsigned long long) dma_mask); - efx->membase_phys = pci_resource_start(efx->pci_dev, EFX_MEM_BAR); - rc = pci_request_region(pci_dev, EFX_MEM_BAR, "sfc"); + efx->membase_phys = pci_resource_start(efx->pci_dev, bar); + rc = pci_request_region(pci_dev, bar, "sfc"); if (rc) { netif_err(efx, probe, efx->net_dev, "request for memory BAR failed\n"); @@ -1258,7 +1261,7 @@ static int efx_init_io(struct efx_nic *efx) return 0; fail4: - pci_release_region(efx->pci_dev, EFX_MEM_BAR); + pci_release_region(efx->pci_dev, bar); fail3: efx->membase_phys = 0; fail2: @@ -1269,6 +1272,8 @@ static int efx_init_io(struct efx_nic *efx) static void efx_fini_io(struct efx_nic *efx) { + int bar; + netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); if (efx->membase) { @@ -1277,13 +1282,23 @@ static void efx_fini_io(struct efx_nic *efx) } if (efx->membase_phys) { - pci_release_region(efx->pci_dev, EFX_MEM_BAR); + bar = efx->type->mem_bar; + pci_release_region(efx->pci_dev, bar); efx->membase_phys = 0; } pci_disable_device(efx->pci_dev); } +void efx_set_default_rx_indir_table(struct efx_nic *efx) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) + efx->rx_indir_table[i] = + ethtool_rxfh_indir_default(i, efx->rss_spread); +} + static unsigned int efx_wanted_parallelism(struct efx_nic *efx) { cpumask_var_t thread_mask; @@ -1314,15 +1329,19 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) /* If RSS is requested for the PF *and* VFs then we can't write RSS * table entries that are inaccessible to VFs */ - if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && - count > efx_vf_size(efx)) { - netif_warn(efx, probe, efx->net_dev, - "Reducing number of RSS channels from %u to %u for " - "VF support. Increase vf-msix-limit to use more " - "channels on the PF.\n", - count, efx_vf_size(efx)); - count = efx_vf_size(efx); +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && + count > efx_vf_size(efx)) { + netif_warn(efx, probe, efx->net_dev, + "Reducing number of RSS channels from %u to %u for " + "VF support. Increase vf-msix-limit to use more " + "channels on the PF.\n", + count, efx_vf_size(efx)); + count = efx_vf_size(efx); + } } +#endif return count; } @@ -1426,10 +1445,15 @@ static int efx_probe_interrupts(struct efx_nic *efx) } /* RSS might be usable on VFs even if it is disabled on the PF */ - - efx->rss_spread = ((efx->n_rx_channels > 1 || - !efx->type->sriov_wanted(efx)) ? - efx->n_rx_channels : efx_vf_size(efx)); +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + efx->rss_spread = ((efx->n_rx_channels > 1 || + !efx->type->sriov_wanted(efx)) ? + efx->n_rx_channels : efx_vf_size(efx)); + return 0; + } +#endif + efx->rss_spread = efx->n_rx_channels; return 0; } @@ -1593,7 +1617,6 @@ static void efx_set_channels(struct efx_nic *efx) static int efx_probe_nic(struct efx_nic *efx) { - size_t i; int rc; netif_dbg(efx, probe, efx->net_dev, "creating NIC\n"); @@ -1616,10 +1639,9 @@ static int efx_probe_nic(struct efx_nic *efx) goto fail2; if (efx->n_channels > 1) - netdev_rss_key_fill(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); - for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) - efx->rx_indir_table[i] = - ethtool_rxfh_indir_default(i, efx->rss_spread); + netdev_rss_key_fill(&efx->rx_hash_key, + sizeof(efx->rx_hash_key)); + efx_set_default_rx_indir_table(efx); netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); @@ -1712,21 +1734,33 @@ static int efx_probe_all(struct efx_nic *efx) } efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; +#ifdef CONFIG_SFC_SRIOV + rc = efx->type->vswitching_probe(efx); + if (rc) /* not fatal; the PF will still work fine */ + netif_warn(efx, probe, efx->net_dev, + "failed to setup vswitching rc=%d;" + " VFs may not function\n", rc); +#endif + rc = efx_probe_filters(efx); if (rc) { netif_err(efx, probe, efx->net_dev, "failed to create filter tables\n"); - goto fail3; + goto fail4; } rc = efx_probe_channels(efx); if (rc) - goto fail4; + goto fail5; return 0; - fail4: + fail5: efx_remove_filters(efx); + fail4: +#ifdef CONFIG_SFC_SRIOV + efx->type->vswitching_remove(efx); +#endif fail3: efx_remove_port(efx); fail2: @@ -1816,6 +1850,9 @@ static void efx_remove_all(struct efx_nic *efx) { efx_remove_channels(efx); efx_remove_filters(efx); +#ifdef CONFIG_SFC_SRIOV + efx->type->vswitching_remove(efx); +#endif efx_remove_port(efx); efx_remove_nic(efx); } @@ -2168,7 +2205,8 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) } ether_addr_copy(net_dev->dev_addr, new_addr); - efx->type->sriov_mac_address_changed(efx); + if (efx->type->sriov_mac_address_changed) + efx->type->sriov_mac_address_changed(efx); /* Reconfigure the MAC */ mutex_lock(&efx->mac_lock); @@ -2199,7 +2237,7 @@ static int efx_set_features(struct net_device *net_dev, netdev_features_t data) return 0; } -static const struct net_device_ops efx_farch_netdev_ops = { +static const struct net_device_ops efx_netdev_ops = { .ndo_open = efx_net_open, .ndo_stop = efx_net_stop, .ndo_get_stats64 = efx_net_stats, @@ -2212,10 +2250,10 @@ static const struct net_device_ops efx_farch_netdev_ops = { .ndo_set_rx_mode = efx_set_rx_mode, .ndo_set_features = efx_set_features, #ifdef CONFIG_SFC_SRIOV - .ndo_set_vf_mac = efx_siena_sriov_set_vf_mac, - .ndo_set_vf_vlan = efx_siena_sriov_set_vf_vlan, - .ndo_set_vf_spoofchk = efx_siena_sriov_set_vf_spoofchk, - .ndo_get_vf_config = efx_siena_sriov_get_vf_config, + .ndo_set_vf_mac = efx_sriov_set_vf_mac, + .ndo_set_vf_vlan = efx_sriov_set_vf_vlan, + .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk, + .ndo_get_vf_config = efx_sriov_get_vf_config, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = efx_netpoll, @@ -2229,29 +2267,6 @@ static const struct net_device_ops efx_farch_netdev_ops = { #endif }; -static const struct net_device_ops efx_ef10_netdev_ops = { - .ndo_open = efx_net_open, - .ndo_stop = efx_net_stop, - .ndo_get_stats64 = efx_net_stats, - .ndo_tx_timeout = efx_watchdog, - .ndo_start_xmit = efx_hard_start_xmit, - .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = efx_ioctl, - .ndo_change_mtu = efx_change_mtu, - .ndo_set_mac_address = efx_set_mac_address, - .ndo_set_rx_mode = efx_set_rx_mode, - .ndo_set_features = efx_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = efx_netpoll, -#endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = efx_busy_poll, -#endif -#ifdef CONFIG_RFS_ACCEL - .ndo_rx_flow_steer = efx_filter_rfs, -#endif -}; - static void efx_update_name(struct efx_nic *efx) { strcpy(efx->name, efx->net_dev->name); @@ -2264,8 +2279,7 @@ static int efx_netdev_event(struct notifier_block *this, { struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); - if ((net_dev->netdev_ops == &efx_farch_netdev_ops || - net_dev->netdev_ops == &efx_ef10_netdev_ops) && + if ((net_dev->netdev_ops == &efx_netdev_ops) && event == NETDEV_CHANGENAME) efx_update_name(netdev_priv(net_dev)); @@ -2292,12 +2306,9 @@ static int efx_register_netdev(struct efx_nic *efx) net_dev->watchdog_timeo = 5 * HZ; net_dev->irq = efx->pci_dev->irq; - if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { - net_dev->netdev_ops = &efx_ef10_netdev_ops; + net_dev->netdev_ops = &efx_netdev_ops; + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) net_dev->priv_flags |= IFF_UNICAST_FLT; - } else { - net_dev->netdev_ops = &efx_farch_netdev_ops; - } net_dev->ethtool_ops = &efx_ethtool_ops; net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; @@ -2426,7 +2437,8 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) rc = efx->phy_op->init(efx); if (rc) goto fail; - if (efx->phy_op->reconfigure(efx)) + rc = efx->phy_op->reconfigure(efx); + if (rc && rc != -EPERM) netif_err(efx, drv, efx->net_dev, "could not restore PHY settings\n"); } @@ -2434,8 +2446,18 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) rc = efx_enable_interrupts(efx); if (rc) goto fail; + +#ifdef CONFIG_SFC_SRIOV + rc = efx->type->vswitching_restore(efx); + if (rc) /* not fatal; the PF will still work fine */ + netif_warn(efx, probe, efx->net_dev, + "failed to restore vswitching rc=%d;" + " VFs may not function\n", rc); +#endif + efx_restore_filters(efx); - efx->type->sriov_reset(efx); + if (efx->type->sriov_reset) + efx->type->sriov_reset(efx); mutex_unlock(&efx->mac_lock); @@ -2655,6 +2677,8 @@ static const struct pci_device_id efx_pci_table[] = { .driver_data = (unsigned long) &siena_a0_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */ .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903), /* SFC9120 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, {0} /* end of list */ @@ -2826,7 +2850,9 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_disable_interrupts(efx); rtnl_unlock(); - efx->type->sriov_fini(efx); + if (efx->type->sriov_fini) + efx->type->sriov_fini(efx); + efx_unregister_netdev(efx); efx_mtd_remove(efx); @@ -3008,7 +3034,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev, netif_info(efx, probe, efx->net_dev, "Solarflare NIC detected\n"); - efx_probe_vpd_strings(efx); + if (!efx->type->is_vf) + efx_probe_vpd_strings(efx); /* Set up basic I/O (BAR mappings etc) */ rc = efx_init_io(efx); @@ -3023,10 +3050,12 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail4; - rc = efx->type->sriov_init(efx); - if (rc) - netif_err(efx, probe, efx->net_dev, - "SR-IOV can't be enabled rc %d\n", rc); + if (efx->type->sriov_init) { + rc = efx->type->sriov_init(efx); + if (rc) + netif_err(efx, probe, efx->net_dev, + "SR-IOV can't be enabled rc %d\n", rc); + } netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); @@ -3058,6 +3087,26 @@ static int efx_pci_probe(struct pci_dev *pci_dev, return rc; } +/* efx_pci_sriov_configure returns the actual number of Virtual Functions + * enabled on success + */ +#ifdef CONFIG_SFC_SRIOV +static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs) +{ + int rc; + struct efx_nic *efx = pci_get_drvdata(dev); + + if (efx->type->sriov_configure) { + rc = efx->type->sriov_configure(efx, num_vfs); + if (rc) + return rc; + else + return num_vfs; + } else + return -EOPNOTSUPP; +} +#endif + static int efx_pm_freeze(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); @@ -3280,6 +3329,9 @@ static struct pci_driver efx_pci_driver = { .remove = efx_pci_remove, .driver.pm = &efx_pm_ops, .err_handler = &efx_err_handlers, +#ifdef CONFIG_SFC_SRIOV + .sriov_configure = efx_pci_sriov_configure, +#endif }; /************************************************************************** @@ -3302,9 +3354,11 @@ static int __init efx_init_module(void) if (rc) goto err_notifier; +#ifdef CONFIG_SFC_SRIOV rc = efx_init_sriov(); if (rc) goto err_sriov; +#endif reset_workqueue = create_singlethread_workqueue("sfc_reset"); if (!reset_workqueue) { @@ -3321,8 +3375,10 @@ static int __init efx_init_module(void) err_pci: destroy_workqueue(reset_workqueue); err_reset: +#ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); err_sriov: +#endif unregister_netdevice_notifier(&efx_netdev_notifier); err_notifier: return rc; @@ -3334,7 +3390,9 @@ static void __exit efx_exit_module(void) pci_unregister_driver(&efx_pci_driver); destroy_workqueue(reset_workqueue); +#ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); +#endif unregister_netdevice_notifier(&efx_netdev_notifier); } diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 2587c582a821..9097906ecfb4 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -15,7 +15,9 @@ #include "filter.h" /* All controllers use BAR 0 for I/O space and BAR 2(&3) for memory */ +/* All VFs use BAR 0/1 for memory */ #define EFX_MEM_BAR 2 +#define EFX_MEM_VF_BAR 0 /* TX */ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); @@ -32,6 +34,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); extern unsigned int efx_piobuf_size; /* RX */ +void efx_set_default_rx_indir_table(struct efx_nic *efx); void efx_rx_config_page_split(struct efx_nic *efx); int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); @@ -220,6 +223,13 @@ static inline void efx_mtd_rename(struct efx_nic *efx) {} static inline void efx_mtd_remove(struct efx_nic *efx) {} #endif +#ifdef CONFIG_SFC_SRIOV +static inline unsigned int efx_vf_size(struct efx_nic *efx) +{ + return 1 << efx->vi_scale; +} +#endif + static inline void efx_schedule_channel(struct efx_channel *channel) { netif_vdbg(channel->efx, intr, channel->efx->net_dev, diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 4835bc0d0de8..03829b48547a 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -1109,9 +1109,8 @@ static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir, return -EOPNOTSUPP; if (!indir) return 0; - memcpy(efx->rx_indir_table, indir, sizeof(efx->rx_indir_table)); - efx->type->rx_push_rss_config(efx); - return 0; + + return efx->type->rx_push_rss_config(efx, true, indir); } static int efx_ethtool_get_ts_info(struct net_device *net_dev, diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index f166c8ef38a3..80e69af21642 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -477,16 +477,29 @@ static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id) * ************************************************************************** */ +static int dummy_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) +{ + (void) efx; + (void) user; + (void) rx_indir_table; + return -ENOSYS; +} -static void falcon_b0_rx_push_rss_config(struct efx_nic *efx) +static int falcon_b0_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) { efx_oword_t temp; + (void) user; /* Set hash key for IPv4 */ memcpy(&temp, efx->rx_hash_key, sizeof(temp)); efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY); + memcpy(efx->rx_indir_table, rx_indir_table, + sizeof(efx->rx_indir_table)); efx_farch_rx_push_indir_table(efx); + return 0; } /************************************************************************** @@ -2507,7 +2520,7 @@ static int falcon_init_nic(struct efx_nic *efx) falcon_init_rx_cfg(efx); if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) { - falcon_b0_rx_push_rss_config(efx); + falcon_b0_rx_push_rss_config(efx, false, efx->rx_indir_table); /* Set destination of both TX and RX Flush events */ EFX_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0); @@ -2687,6 +2700,8 @@ static int falcon_set_wol(struct efx_nic *efx, u32 type) */ const struct efx_nic_type falcon_a1_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = falcon_a1_mem_map_size, .probe = falcon_probe_nic, .remove = falcon_remove_nic, @@ -2729,7 +2744,7 @@ const struct efx_nic_type falcon_a1_nic_type = { .tx_init = efx_farch_tx_init, .tx_remove = efx_farch_tx_remove, .tx_write = efx_farch_tx_write, - .rx_push_rss_config = efx_port_dummy_op_void, + .rx_push_rss_config = dummy_rx_push_rss_config, .rx_probe = efx_farch_rx_probe, .rx_init = efx_farch_rx_init, .rx_remove = efx_farch_rx_remove, @@ -2766,11 +2781,6 @@ const struct efx_nic_type falcon_a1_nic_type = { .mtd_write = falcon_mtd_write, .mtd_sync = falcon_mtd_sync, #endif - .sriov_init = efx_falcon_sriov_init, - .sriov_fini = efx_falcon_sriov_fini, - .sriov_mac_address_changed = efx_falcon_sriov_mac_address_changed, - .sriov_wanted = efx_falcon_sriov_wanted, - .sriov_reset = efx_falcon_sriov_reset, .revision = EFX_REV_FALCON_A1, .txd_ptr_tbl_base = FR_AA_TX_DESC_PTR_TBL_KER, @@ -2788,6 +2798,8 @@ const struct efx_nic_type falcon_a1_nic_type = { }; const struct efx_nic_type falcon_b0_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = falcon_b0_mem_map_size, .probe = falcon_probe_nic, .remove = falcon_remove_nic, @@ -2867,11 +2879,6 @@ const struct efx_nic_type falcon_b0_nic_type = { .mtd_write = falcon_mtd_write, .mtd_sync = falcon_mtd_sync, #endif - .sriov_init = efx_falcon_sriov_init, - .sriov_fini = efx_falcon_sriov_fini, - .sriov_mac_address_changed = efx_falcon_sriov_mac_address_changed, - .sriov_wanted = efx_falcon_sriov_wanted, - .sriov_reset = efx_falcon_sriov_reset, .revision = EFX_REV_FALCON_B0, .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index bb89e96a125e..f08266f0eca2 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -20,6 +20,8 @@ #include "efx.h" #include "nic.h" #include "farch_regs.h" +#include "sriov.h" +#include "siena_sriov.h" #include "io.h" #include "workarounds.h" @@ -1198,13 +1200,17 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", channel->channel, ev_sub_data); efx_farch_handle_tx_flush_done(efx, event); +#ifdef CONFIG_SFC_SRIOV efx_siena_sriov_tx_flush_done(efx, event); +#endif break; case FSE_AZ_RX_DESCQ_FLS_DONE_EV: netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", channel->channel, ev_sub_data); efx_farch_handle_rx_flush_done(efx, event); +#ifdef CONFIG_SFC_SRIOV efx_siena_sriov_rx_flush_done(efx, event); +#endif break; case FSE_AZ_EVQ_INIT_DONE_EV: netif_dbg(efx, hw, efx->net_dev, @@ -1242,8 +1248,11 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR); - } else + } +#ifdef CONFIG_SFC_SRIOV + else efx_siena_sriov_desc_fetch_err(efx, ev_sub_data); +#endif break; case FSE_BZ_TX_DSC_ERROR_EV: if (ev_sub_data < EFX_VI_BASE) { @@ -1252,8 +1261,11 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR); - } else + } +#ifdef CONFIG_SFC_SRIOV + else efx_siena_sriov_desc_fetch_err(efx, ev_sub_data); +#endif break; default: netif_vdbg(efx, hw, efx->net_dev, @@ -1317,9 +1329,11 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget) case FSE_AZ_EV_CODE_DRIVER_EV: efx_farch_handle_driver_event(channel, &event); break; +#ifdef CONFIG_SFC_SRIOV case FSE_CZ_EV_CODE_USER_EV: efx_siena_sriov_event(channel, &event); break; +#endif case FSE_CZ_EV_CODE_MCDI_EV: efx_mcdi_process_event(channel, &event); break; @@ -1685,28 +1699,32 @@ void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); #ifdef CONFIG_SFC_SRIOV - if (efx->type->sriov_wanted(efx)) { - unsigned vi_dc_entries, buftbl_free, entries_per_vf, vf_limit; - - nic_data->vf_buftbl_base = buftbl_min; - - vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES; - vi_count = max(vi_count, EFX_VI_BASE); - buftbl_free = (sram_lim_qw - buftbl_min - - vi_count * vi_dc_entries); - - entries_per_vf = ((vi_dc_entries + EFX_VF_BUFTBL_PER_VI) * - efx_vf_size(efx)); - vf_limit = min(buftbl_free / entries_per_vf, - (1024U - EFX_VI_BASE) >> efx->vi_scale); - - if (efx->vf_count > vf_limit) { - netif_err(efx, probe, efx->net_dev, - "Reducing VF count from from %d to %d\n", - efx->vf_count, vf_limit); - efx->vf_count = vf_limit; + if (efx->type->sriov_wanted) { + if (efx->type->sriov_wanted(efx)) { + unsigned vi_dc_entries, buftbl_free; + unsigned entries_per_vf, vf_limit; + + nic_data->vf_buftbl_base = buftbl_min; + + vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES; + vi_count = max(vi_count, EFX_VI_BASE); + buftbl_free = (sram_lim_qw - buftbl_min - + vi_count * vi_dc_entries); + + entries_per_vf = ((vi_dc_entries + + EFX_VF_BUFTBL_PER_VI) * + efx_vf_size(efx)); + vf_limit = min(buftbl_free / entries_per_vf, + (1024U - EFX_VI_BASE) >> efx->vi_scale); + + if (efx->vf_count > vf_limit) { + netif_err(efx, probe, efx->net_dev, + "Reducing VF count from from %d to %d\n", + efx->vf_count, vf_limit); + efx->vf_count = vf_limit; + } + vi_count += efx->vf_count * efx_vf_size(efx); } - vi_count += efx->vf_count * efx_vf_size(efx); } #endif diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index d37928f01949..b44ee31f1a7a 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -1035,7 +1035,9 @@ void efx_mcdi_process_event(struct efx_channel *channel, /* MAC stats are gather lazily. We can ignore this. */ break; case MCDI_EVENT_CODE_FLR: - efx_siena_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF)); + if (efx->type->sriov_flr) + efx->type->sriov_flr(efx, + MCDI_EVENT_FIELD(*event, FLR_VF)); break; case MCDI_EVENT_CODE_PTP_RX: case MCDI_EVENT_CODE_PTP_FAULT: @@ -1081,9 +1083,7 @@ void efx_mcdi_process_event(struct efx_channel *channel, void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) { - MCDI_DECLARE_BUF(outbuf, - max(MC_CMD_GET_VERSION_OUT_LEN, - MC_CMD_GET_CAPABILITIES_OUT_LEN)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_VERSION_OUT_LEN); size_t outlength; const __le16 *ver_words; size_t offset; @@ -1108,19 +1108,11 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) * single version. Report which variants are running. */ if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { - BUILD_BUG_ON(MC_CMD_GET_CAPABILITIES_IN_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_CAPABILITIES, NULL, 0, - outbuf, sizeof(outbuf), &outlength); - if (rc || outlength < MC_CMD_GET_CAPABILITIES_OUT_LEN) - offset += snprintf( - buf + offset, len - offset, " rx? tx?"); - else - offset += snprintf( - buf + offset, len - offset, " rx%x tx%x", - MCDI_WORD(outbuf, - GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID), - MCDI_WORD(outbuf, - GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID)); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + offset += snprintf(buf + offset, len - offset, " rx%x tx%x", + nic_data->rx_dpcpu_fw_id, + nic_data->tx_dpcpu_fw_id); /* It's theoretically possible for the string to exceed 31 * characters, though in practice the first three version @@ -1150,10 +1142,26 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_UPDATE, 1); MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID, MC_CMD_FW_LOW_LATENCY); - rc = efx_mcdi_rpc(efx, MC_CMD_DRV_ATTACH, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - if (rc) + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + /* If we're not the primary PF, trying to ATTACH with a FIRMWARE_ID + * specified will fail with EPERM, and we have to tell the MC we don't + * care what firmware we get. + */ + if (rc == -EPERM) { + netif_dbg(efx, probe, efx->net_dev, + "efx_mcdi_drv_attach with fw-variant setting failed EPERM, trying without it\n"); + MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID, + MC_CMD_FW_DONT_CARE); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), + &outlen); + } + if (rc) { + efx_mcdi_display_error(efx, MC_CMD_DRV_ATTACH, sizeof(inbuf), + outbuf, outlen, rc); goto fail; + } if (outlen < MC_CMD_DRV_ATTACH_OUT_LEN) { rc = -EIO; goto fail; @@ -1178,16 +1186,6 @@ static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, * and are completely trusted by firmware. Abort probing * if that's not true for this function. */ - if (driver_operating && - (efx->mcdi->fn_flags & - (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | - 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) != - (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | - 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED)) { - netif_err(efx, probe, efx->net_dev, - "This driver version only supports one function per port\n"); - return -ENODEV; - } if (was_attached != NULL) *was_attached = MCDI_DWORD(outbuf, DRV_ATTACH_OUT_OLD_STATE); @@ -1385,6 +1383,9 @@ fail1: return rc; } +/* Returns 1 if an assertion was read, 0 if no assertion had fired, + * negative on error. + */ static int efx_mcdi_read_assertion(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_ASSERTS_IN_LEN); @@ -1406,6 +1407,8 @@ static int efx_mcdi_read_assertion(struct efx_nic *efx) rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_ASSERTS, inbuf, MC_CMD_GET_ASSERTS_IN_LEN, outbuf, sizeof(outbuf), &outlen); + if (rc == -EPERM) + return 0; } while ((rc == -EINTR || rc == -EIO) && retry-- > 0); if (rc) { @@ -1443,24 +1446,31 @@ static int efx_mcdi_read_assertion(struct efx_nic *efx) MCDI_ARRAY_DWORD(outbuf, GET_ASSERTS_OUT_GP_REGS_OFFS, index)); - return 0; + return 1; } -static void efx_mcdi_exit_assertion(struct efx_nic *efx) +static int efx_mcdi_exit_assertion(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_REBOOT_IN_LEN); + int rc; /* If the MC is running debug firmware, it might now be * waiting for a debugger to attach, but we just want it to * reboot. We set a flag that makes the command a no-op if it - * has already done so. We don't know what return code to - * expect (0 or -EIO), so ignore it. + * has already done so. + * The MCDI will thus return either 0 or -EIO. */ BUILD_BUG_ON(MC_CMD_REBOOT_OUT_LEN != 0); MCDI_SET_DWORD(inbuf, REBOOT_IN_FLAGS, MC_CMD_REBOOT_FLAGS_AFTER_ASSERTION); - (void) efx_mcdi_rpc(efx, MC_CMD_REBOOT, inbuf, MC_CMD_REBOOT_IN_LEN, - NULL, 0, NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_REBOOT, inbuf, MC_CMD_REBOOT_IN_LEN, + NULL, 0, NULL); + if (rc == -EIO) + rc = 0; + if (rc) + efx_mcdi_display_error(efx, MC_CMD_REBOOT, MC_CMD_REBOOT_IN_LEN, + NULL, 0, rc); + return rc; } int efx_mcdi_handle_assertion(struct efx_nic *efx) @@ -1468,12 +1478,10 @@ int efx_mcdi_handle_assertion(struct efx_nic *efx) int rc; rc = efx_mcdi_read_assertion(efx); - if (rc) + if (rc <= 0) return rc; - efx_mcdi_exit_assertion(efx); - - return 0; + return efx_mcdi_exit_assertion(efx); } void efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode) @@ -1688,6 +1696,36 @@ int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled) NULL, 0, NULL); } +int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out, + unsigned int *enabled_out) +{ + MCDI_DECLARE_BUF_OUT_OR_ERR(outbuf, MC_CMD_GET_WORKAROUNDS_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_WORKAROUNDS, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_GET_WORKAROUNDS_OUT_LEN) { + rc = -EIO; + goto fail; + } + + if (impl_out) + *impl_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_IMPLEMENTED); + + if (enabled_out) + *enabled_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_ENABLED); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + #ifdef CONFIG_SFC_MTD #define EFX_MCDI_NVRAM_LEN_MAX 128 diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 56465f7465a2..5df1e986e39e 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -339,6 +339,8 @@ bool efx_mcdi_mac_check_fault(struct efx_nic *efx); enum reset_type efx_mcdi_map_reset_reason(enum reset_type reason); int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method); int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled); +int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out, + unsigned int *enabled_out); #ifdef CONFIG_SFC_MCDI_MON int efx_mcdi_mon_probe(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index e028de10e1b7..4fa6eb27cc56 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -1875,6 +1875,8 @@ #define MC_CMD_FW_FULL_FEATURED 0x0 /* enum: Prefer to use firmware with fewer features but lower latency */ #define MC_CMD_FW_LOW_LATENCY 0x1 +/* enum: Only this option is allowed for non-admin functions */ +#define MC_CMD_FW_DONT_CARE 0xffffffff /* MC_CMD_DRV_ATTACH_OUT msgresponse */ #define MC_CMD_DRV_ATTACH_OUT_LEN 4 @@ -4087,6 +4089,27 @@ /***********************************/ +/* MC_CMD_GET_WORKAROUNDS + * Read the list of all implemented and all currently enabled workarounds. The + * enums here must correspond with those in MC_CMD_WORKAROUND. + */ +#define MC_CMD_GET_WORKAROUNDS 0x59 + +/* MC_CMD_GET_WORKAROUNDS_OUT msgresponse */ +#define MC_CMD_GET_WORKAROUNDS_OUT_LEN 8 +/* Each workaround is represented by a single bit according to the enums below. + */ +#define MC_CMD_GET_WORKAROUNDS_OUT_IMPLEMENTED_OFST 0 +#define MC_CMD_GET_WORKAROUNDS_OUT_ENABLED_OFST 4 +/* enum: Bug 17230 work around. */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG17230 0x2 +/* enum: Bug 35388 work around (unsafe EVQ writes). */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG35388 0x4 +/* enum: Bug35017 workaround (A64 tables must be identity map) */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG35017 0x8 + + +/***********************************/ /* MC_CMD_READ_REGS * Get a dump of the MCPU registers */ diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 325dd94bca46..031a3385ad4b 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -793,7 +793,6 @@ union efx_multicast_hash { efx_oword_t oword[EFX_MCAST_HASH_ENTRIES / sizeof(efx_oword_t) / 8]; }; -struct efx_vf; struct vfdi_status; /** @@ -909,7 +908,6 @@ struct vfdi_status; * completed (either success or failure). Not used when MCDI is used to * flush receive queues. * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. - * @vf: Array of &struct efx_vf objects. * @vf_count: Number of VFs intended to be enabled. * @vf_init_count: Number of VFs that have been fully initialised. * @vi_scale: log2 number of vnics per VF. @@ -1053,7 +1051,6 @@ struct efx_nic { wait_queue_head_t flush_wq; #ifdef CONFIG_SFC_SRIOV - struct efx_vf *vf; unsigned vf_count; unsigned vf_init_count; unsigned vi_scale; @@ -1092,6 +1089,7 @@ struct efx_mtd_partition { /** * struct efx_nic_type - Efx device type definition + * @mem_bar: Get the memory BAR * @mem_map_size: Get memory BAR mapped size * @probe: Probe the controller * @remove: Free resources allocated by probe() @@ -1226,6 +1224,8 @@ struct efx_mtd_partition { * @hwtstamp_filters: Mask of hardware timestamp filter types supported */ struct efx_nic_type { + bool is_vf; + unsigned int mem_bar; unsigned int (*mem_map_size)(struct efx_nic *efx); int (*probe)(struct efx_nic *efx); void (*remove)(struct efx_nic *efx); @@ -1277,7 +1277,8 @@ struct efx_nic_type { void (*tx_init)(struct efx_tx_queue *tx_queue); void (*tx_remove)(struct efx_tx_queue *tx_queue); void (*tx_write)(struct efx_tx_queue *tx_queue); - void (*rx_push_rss_config)(struct efx_nic *efx); + int (*rx_push_rss_config)(struct efx_nic *efx, bool user, + const u32 *rx_indir_table); int (*rx_probe)(struct efx_rx_queue *rx_queue); void (*rx_init)(struct efx_rx_queue *rx_queue); void (*rx_remove)(struct efx_rx_queue *rx_queue); @@ -1330,11 +1331,23 @@ struct efx_nic_type { int (*ptp_set_ts_sync_events)(struct efx_nic *efx, bool en, bool temp); int (*ptp_set_ts_config)(struct efx_nic *efx, struct hwtstamp_config *init); + int (*sriov_configure)(struct efx_nic *efx, int num_vfs); int (*sriov_init)(struct efx_nic *efx); void (*sriov_fini)(struct efx_nic *efx); void (*sriov_mac_address_changed)(struct efx_nic *efx); bool (*sriov_wanted)(struct efx_nic *efx); void (*sriov_reset)(struct efx_nic *efx); + void (*sriov_flr)(struct efx_nic *efx, unsigned vf_i); + int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, u8 *mac); + int (*sriov_set_vf_vlan)(struct efx_nic *efx, int vf_i, u16 vlan, + u8 qos); + int (*sriov_set_vf_spoofchk)(struct efx_nic *efx, int vf_i, + bool spoofchk); + int (*sriov_get_vf_config)(struct efx_nic *efx, int vf_i, + struct ifla_vf_info *ivi); + int (*vswitching_probe)(struct efx_nic *efx); + int (*vswitching_restore)(struct efx_nic *efx); + void (*vswitching_remove)(struct efx_nic *efx); int revision; unsigned int txd_ptr_tbl_base; diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 93d10cbbd1cf..2fd30556e6c3 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -381,6 +381,7 @@ enum { * @efx: Pointer back to main interface structure * @wol_filter_id: Wake-on-LAN packet filter id * @stats: Hardware statistics + * @vf: Array of &struct siena_vf objects * @vf_buftbl_base: The zeroth buffer table index used to back VF queues. * @vfdi_status: Common VFDI status page to be dmad to VF address space. * @local_addr_list: List of local addresses. Protected by %local_lock. @@ -394,6 +395,7 @@ struct siena_nic_data { int wol_filter_id; u64 stats[SIENA_STAT_COUNT]; #ifdef CONFIG_SFC_SRIOV + struct siena_vf *vf; struct efx_channel *vfdi_channel; unsigned vf_buftbl_base; struct efx_buffer vfdi_status; @@ -483,12 +485,21 @@ enum { * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC * reboot * @rx_rss_context: Firmware handle for our RSS context + * @rx_rss_context_exclusive: Whether our RSS context is exclusive or shared * @stats: Hardware statistics * @workaround_35388: Flag: firmware supports workaround for bug 35388 * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated * after MC reboot * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of * %MC_CMD_GET_CAPABILITIES response) + * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU + * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU + * @vport_id: The function's vport ID, only relevant for PFs + * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot + * @pf_index: The number for this PF, or the parent PF if this is a VF +#ifdef CONFIG_SFC_SRIOV + * @vf: Pointer to VF data structure +#endif */ struct efx_ef10_nic_data { struct efx_buffer mcdi_buf; @@ -503,126 +514,25 @@ struct efx_ef10_nic_data { unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT]; bool must_restore_piobufs; u32 rx_rss_context; + bool rx_rss_context_exclusive; u64 stats[EF10_STAT_COUNT]; bool workaround_35388; bool must_check_datapath_caps; u32 datapath_caps; -}; - -/* - * On the SFC9000 family each port is associated with 1 PCI physical - * function (PF) handled by sfc and a configurable number of virtual - * functions (VFs) that may be handled by some other driver, often in - * a VM guest. The queue pointer registers are mapped in both PF and - * VF BARs such that an 8K region provides access to a single RX, TX - * and event queue (collectively a Virtual Interface, VI or VNIC). - * - * The PF has access to all 1024 VIs while VFs are mapped to VIs - * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered - * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE). - * The number of VIs and the VI_SCALE value are configurable but must - * be established at boot time by firmware. - */ - -/* Maximum VI_SCALE parameter supported by Siena */ -#define EFX_VI_SCALE_MAX 6 -/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX), - * so this is the smallest allowed value. */ -#define EFX_VI_BASE 128U -/* Maximum number of VFs allowed */ -#define EFX_VF_COUNT_MAX 127 -/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */ -#define EFX_MAX_VF_EVQ_SIZE 8192UL -/* The number of buffer table entries reserved for each VI on a VF */ -#define EFX_VF_BUFTBL_PER_VI \ - ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \ - sizeof(efx_qword_t) / EFX_BUF_SIZE) - + unsigned int rx_dpcpu_fw_id; + unsigned int tx_dpcpu_fw_id; + unsigned int vport_id; + bool must_probe_vswitching; + unsigned int pf_index; #ifdef CONFIG_SFC_SRIOV - -/* SIENA */ -static inline bool efx_siena_sriov_wanted(struct efx_nic *efx) -{ - return efx->vf_count != 0; -} - -static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) -{ - return efx->vf_init_count != 0; -} - -static inline unsigned int efx_vf_size(struct efx_nic *efx) -{ - return 1 << efx->vi_scale; -} + struct ef10_vf *vf; +#endif + u8 vport_mac[ETH_ALEN]; +}; int efx_init_sriov(void); -void efx_siena_sriov_probe(struct efx_nic *efx); -int efx_siena_sriov_init(struct efx_nic *efx); -void efx_siena_sriov_mac_address_changed(struct efx_nic *efx); -void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event); -void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event); -void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event); -void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq); -void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr); -void efx_siena_sriov_reset(struct efx_nic *efx); -void efx_siena_sriov_fini(struct efx_nic *efx); void efx_fini_sriov(void); -/* EF10 */ -static inline bool efx_ef10_sriov_wanted(struct efx_nic *efx) { return false; } -static inline int efx_ef10_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_ef10_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_fini(struct efx_nic *efx) {} - -#else - -/* SIENA */ -static inline bool efx_siena_sriov_wanted(struct efx_nic *efx) { return false; } -static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) { return false; } -static inline unsigned int efx_vf_size(struct efx_nic *efx) { return 0; } -static inline int efx_init_sriov(void) { return 0; } -static inline void efx_siena_sriov_probe(struct efx_nic *efx) {} -static inline int efx_siena_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_siena_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, - efx_qword_t *event) {} -static inline void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, - efx_qword_t *event) {} -static inline void efx_siena_sriov_event(struct efx_channel *channel, - efx_qword_t *event) {} -static inline void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, - unsigned dmaq) {} -static inline void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr) {} -static inline void efx_siena_sriov_reset(struct efx_nic *efx) {} -static inline void efx_siena_sriov_fini(struct efx_nic *efx) {} -static inline void efx_fini_sriov(void) {} - -/* EF10 */ -static inline bool efx_ef10_sriov_wanted(struct efx_nic *efx) { return false; } -static inline int efx_ef10_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_ef10_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {} -static inline void efx_ef10_sriov_fini(struct efx_nic *efx) {} - -#endif - -/* FALCON */ -static inline bool efx_falcon_sriov_wanted(struct efx_nic *efx) { return false; } -static inline int efx_falcon_sriov_init(struct efx_nic *efx) { return -EOPNOTSUPP; } -static inline void efx_falcon_sriov_mac_address_changed(struct efx_nic *efx) {} -static inline void efx_falcon_sriov_reset(struct efx_nic *efx) {} -static inline void efx_falcon_sriov_fini(struct efx_nic *efx) {} - -int efx_siena_sriov_set_vf_mac(struct net_device *dev, int vf, u8 *mac); -int efx_siena_sriov_set_vf_vlan(struct net_device *dev, int vf, - u16 vlan, u8 qos); -int efx_siena_sriov_get_vf_config(struct net_device *dev, int vf, - struct ifla_vf_info *ivf); -int efx_siena_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf, - bool spoofchk); - struct ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); @@ -654,6 +564,7 @@ extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; extern const struct efx_nic_type siena_a0_nic_type; extern const struct efx_nic_type efx_hunt_a0_nic_type; +extern const struct efx_nic_type efx_hunt_a0_vf_nic_type; /************************************************************************** * diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index f12c811938d2..8b4130abad66 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -25,6 +25,7 @@ #include "mcdi.h" #include "mcdi_pcol.h" #include "selftest.h" +#include "siena_sriov.h" /* Hardware control for SFC9000 family including SFL9021 (aka Siena). */ @@ -306,7 +307,9 @@ static int siena_probe_nic(struct efx_nic *efx) if (rc) goto fail5; +#ifdef CONFIG_SFC_SRIOV efx_siena_sriov_probe(efx); +#endif efx_ptp_defer_probe_with_channel(efx); return 0; @@ -321,7 +324,8 @@ fail1: return rc; } -static void siena_rx_push_rss_config(struct efx_nic *efx) +static int siena_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table) { efx_oword_t temp; @@ -343,7 +347,11 @@ static void siena_rx_push_rss_config(struct efx_nic *efx) FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8); efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3); + memcpy(efx->rx_indir_table, rx_indir_table, + sizeof(efx->rx_indir_table)); efx_farch_rx_push_indir_table(efx); + + return 0; } /* This call performs hardware-specific global initialisation, such as @@ -386,7 +394,7 @@ static int siena_init_nic(struct efx_nic *efx) EFX_RX_USR_BUF_SIZE >> 5); efx_writeo(efx, &temp, FR_AZ_RX_CFG); - siena_rx_push_rss_config(efx); + siena_rx_push_rss_config(efx, false, efx->rx_indir_table); /* Enable event logging */ rc = efx_mcdi_log_ctrl(efx, true, false, 0); @@ -909,6 +917,8 @@ fail: */ const struct efx_nic_type siena_a0_nic_type = { + .is_vf = false, + .mem_bar = EFX_MEM_BAR, .mem_map_size = siena_mem_map_size, .probe = siena_probe_nic, .remove = siena_remove_nic, @@ -996,11 +1006,22 @@ const struct efx_nic_type siena_a0_nic_type = { #endif .ptp_write_host_time = siena_ptp_write_host_time, .ptp_set_ts_config = siena_ptp_set_ts_config, +#ifdef CONFIG_SFC_SRIOV + .sriov_configure = efx_siena_sriov_configure, .sriov_init = efx_siena_sriov_init, .sriov_fini = efx_siena_sriov_fini, .sriov_mac_address_changed = efx_siena_sriov_mac_address_changed, .sriov_wanted = efx_siena_sriov_wanted, .sriov_reset = efx_siena_sriov_reset, + .sriov_flr = efx_siena_sriov_flr, + .sriov_set_vf_mac = efx_siena_sriov_set_vf_mac, + .sriov_set_vf_vlan = efx_siena_sriov_set_vf_vlan, + .sriov_set_vf_spoofchk = efx_siena_sriov_set_vf_spoofchk, + .sriov_get_vf_config = efx_siena_sriov_get_vf_config, + .vswitching_probe = efx_port_dummy_op_int, + .vswitching_restore = efx_port_dummy_op_int, + .vswitching_remove = efx_port_dummy_op_void, +#endif .revision = EFX_REV_SIENA_A0, .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index fe83430796fd..caf701ac274f 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -16,6 +16,7 @@ #include "filter.h" #include "mcdi_pcol.h" #include "farch_regs.h" +#include "siena_sriov.h" #include "vfdi.h" /* Number of longs required to track all the VIs in a VF */ @@ -38,7 +39,7 @@ enum efx_vf_tx_filter_mode { }; /** - * struct efx_vf - Back-end resource and protocol state for a PCI VF + * struct siena_vf - Back-end resource and protocol state for a PCI VF * @efx: The Efx NIC owning this VF * @pci_rid: The PCI requester ID for this VF * @pci_name: The PCI name (formatted address) of this VF @@ -83,7 +84,7 @@ enum efx_vf_tx_filter_mode { * @rxq_retry_count: Number of receive queues in @rxq_retry_mask. * @reset_work: Work item to schedule a VF reset. */ -struct efx_vf { +struct siena_vf { struct efx_nic *efx; unsigned int pci_rid; char pci_name[13]; /* dddd:bb:dd.f */ @@ -189,7 +190,7 @@ MODULE_PARM_DESC(max_vfs, */ static struct workqueue_struct *vfdi_workqueue; -static unsigned abs_index(struct efx_vf *vf, unsigned index) +static unsigned abs_index(struct siena_vf *vf, unsigned index) { return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index; } @@ -299,7 +300,7 @@ out: /* The TX filter is entirely controlled by this driver, and is modified * underneath the feet of the VF */ -static void efx_siena_sriov_reset_tx_filter(struct efx_vf *vf) +static void efx_siena_sriov_reset_tx_filter(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct efx_filter_spec filter; @@ -343,7 +344,7 @@ static void efx_siena_sriov_reset_tx_filter(struct efx_vf *vf) } /* The RX filter is managed here on behalf of the VF driver */ -static void efx_siena_sriov_reset_rx_filter(struct efx_vf *vf) +static void efx_siena_sriov_reset_rx_filter(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct efx_filter_spec filter; @@ -382,7 +383,7 @@ static void efx_siena_sriov_reset_rx_filter(struct efx_vf *vf) } } -static void __efx_siena_sriov_update_vf_addr(struct efx_vf *vf) +static void __efx_siena_sriov_update_vf_addr(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -397,7 +398,7 @@ static void __efx_siena_sriov_update_vf_addr(struct efx_vf *vf) * local_page_list, either by acquiring local_lock or by running from * efx_siena_sriov_peer_work() */ -static void __efx_siena_sriov_push_vf_status(struct efx_vf *vf) +static void __efx_siena_sriov_push_vf_status(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -509,8 +510,9 @@ static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count) * Optionally set VF index and VI index within the VF. */ static bool map_vi_index(struct efx_nic *efx, unsigned abs_index, - struct efx_vf **vf_out, unsigned *rel_index_out) + struct siena_vf **vf_out, unsigned *rel_index_out) { + struct siena_nic_data *nic_data = efx->nic_data; unsigned vf_i; if (abs_index < EFX_VI_BASE) @@ -520,13 +522,13 @@ static bool map_vi_index(struct efx_nic *efx, unsigned abs_index, return true; if (vf_out) - *vf_out = efx->vf + vf_i; + *vf_out = nic_data->vf + vf_i; if (rel_index_out) *rel_index_out = abs_index % efx_vf_size(efx); return false; } -static int efx_vfdi_init_evq(struct efx_vf *vf) +static int efx_vfdi_init_evq(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; @@ -567,7 +569,7 @@ static int efx_vfdi_init_evq(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_init_rxq(struct efx_vf *vf) +static int efx_vfdi_init_rxq(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; @@ -608,7 +610,7 @@ static int efx_vfdi_init_rxq(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_init_txq(struct efx_vf *vf) +static int efx_vfdi_init_txq(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; @@ -655,7 +657,7 @@ static int efx_vfdi_init_txq(struct efx_vf *vf) } /* Returns true when efx_vfdi_fini_all_queues should wake */ -static bool efx_vfdi_flush_wake(struct efx_vf *vf) +static bool efx_vfdi_flush_wake(struct siena_vf *vf) { /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */ smp_mb(); @@ -664,7 +666,7 @@ static bool efx_vfdi_flush_wake(struct efx_vf *vf) atomic_read(&vf->rxq_retry_count); } -static void efx_vfdi_flush_clear(struct efx_vf *vf) +static void efx_vfdi_flush_clear(struct siena_vf *vf) { memset(vf->txq_mask, 0, sizeof(vf->txq_mask)); vf->txq_count = 0; @@ -674,7 +676,7 @@ static void efx_vfdi_flush_clear(struct efx_vf *vf) atomic_set(&vf->rxq_retry_count, 0); } -static int efx_vfdi_fini_all_queues(struct efx_vf *vf) +static int efx_vfdi_fini_all_queues(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; efx_oword_t reg; @@ -757,7 +759,7 @@ static int efx_vfdi_fini_all_queues(struct efx_vf *vf) return timeout ? 0 : VFDI_RC_ETIMEDOUT; } -static int efx_vfdi_insert_filter(struct efx_vf *vf) +static int efx_vfdi_insert_filter(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -789,7 +791,7 @@ static int efx_vfdi_insert_filter(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_remove_all_filters(struct efx_vf *vf) +static int efx_vfdi_remove_all_filters(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -801,7 +803,7 @@ static int efx_vfdi_remove_all_filters(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_set_status_page(struct efx_vf *vf) +static int efx_vfdi_set_status_page(struct siena_vf *vf) { struct efx_nic *efx = vf->efx; struct siena_nic_data *nic_data = efx->nic_data; @@ -846,7 +848,7 @@ static int efx_vfdi_set_status_page(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -static int efx_vfdi_clear_status_page(struct efx_vf *vf) +static int efx_vfdi_clear_status_page(struct siena_vf *vf) { mutex_lock(&vf->status_lock); vf->status_addr = 0; @@ -855,7 +857,7 @@ static int efx_vfdi_clear_status_page(struct efx_vf *vf) return VFDI_RC_SUCCESS; } -typedef int (*efx_vfdi_op_t)(struct efx_vf *vf); +typedef int (*efx_vfdi_op_t)(struct siena_vf *vf); static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = { [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq, @@ -870,7 +872,7 @@ static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = { static void efx_siena_sriov_vfdi(struct work_struct *work) { - struct efx_vf *vf = container_of(work, struct efx_vf, req); + struct siena_vf *vf = container_of(work, struct siena_vf, req); struct efx_nic *efx = vf->efx; struct vfdi_req *req = vf->buf.addr; struct efx_memcpy_req copy[2]; @@ -936,7 +938,8 @@ static void efx_siena_sriov_vfdi(struct work_struct *work) * event ring in guest memory with VFDI reset events, then (re-initialise) the * event queue to raise an interrupt. The guest driver will then recover. */ -static void efx_siena_sriov_reset_vf(struct efx_vf *vf, + +static void efx_siena_sriov_reset_vf(struct siena_vf *vf, struct efx_buffer *buffer) { struct efx_nic *efx = vf->efx; @@ -1006,7 +1009,7 @@ static void efx_siena_sriov_reset_vf(struct efx_vf *vf, static void efx_siena_sriov_reset_vf_work(struct work_struct *work) { - struct efx_vf *vf = container_of(work, struct efx_vf, req); + struct siena_vf *vf = container_of(work, struct siena_vf, req); struct efx_nic *efx = vf->efx; struct efx_buffer buf; @@ -1077,7 +1080,7 @@ static void efx_siena_sriov_peer_work(struct work_struct *data) peer_work); struct efx_nic *efx = nic_data->efx; struct vfdi_status *vfdi_status = nic_data->vfdi_status.addr; - struct efx_vf *vf; + struct siena_vf *vf; struct efx_local_addr *local_addr; struct vfdi_endpoint *peer; struct efx_endpoint_page *epp; @@ -1099,7 +1102,7 @@ static void efx_siena_sriov_peer_work(struct work_struct *data) peer_space = ARRAY_SIZE(vfdi_status->peers) - 1; peer_count = 1; for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; mutex_lock(&vf->status_lock); if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) { @@ -1155,7 +1158,7 @@ static void efx_siena_sriov_peer_work(struct work_struct *data) /* Finally, push the pages */ for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; mutex_lock(&vf->status_lock); if (vf->status_addr) @@ -1190,14 +1193,16 @@ static void efx_siena_sriov_free_local(struct efx_nic *efx) static int efx_siena_sriov_vf_alloc(struct efx_nic *efx) { unsigned index; - struct efx_vf *vf; + struct siena_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; - efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL); - if (!efx->vf) + nic_data->vf = kcalloc(efx->vf_count, sizeof(*nic_data->vf), + GFP_KERNEL); + if (!nic_data->vf) return -ENOMEM; for (index = 0; index < efx->vf_count; ++index) { - vf = efx->vf + index; + vf = nic_data->vf + index; vf->efx = efx; vf->index = index; @@ -1216,11 +1221,12 @@ static int efx_siena_sriov_vf_alloc(struct efx_nic *efx) static void efx_siena_sriov_vfs_fini(struct efx_nic *efx) { - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; unsigned int pos; for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; efx_nic_free_buffer(efx, &vf->buf); kfree(vf->peer_page_addrs); @@ -1237,7 +1243,7 @@ static int efx_siena_sriov_vfs_init(struct efx_nic *efx) struct siena_nic_data *nic_data = efx->nic_data; unsigned index, devfn, sriov, buftbl_base; u16 offset, stride; - struct efx_vf *vf; + struct siena_vf *vf; int rc; sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV); @@ -1250,7 +1256,7 @@ static int efx_siena_sriov_vfs_init(struct efx_nic *efx) buftbl_base = nic_data->vf_buftbl_base; devfn = pci_dev->devfn + offset; for (index = 0; index < efx->vf_count; ++index) { - vf = efx->vf + index; + vf = nic_data->vf + index; /* Reserve buffer entries */ vf->buftbl_base = buftbl_base; @@ -1350,7 +1356,7 @@ fail_pci: fail_vfs: cancel_work_sync(&nic_data->peer_work); efx_siena_sriov_free_local(efx); - kfree(efx->vf); + kfree(nic_data->vf); fail_alloc: efx_nic_free_buffer(efx, &nic_data->vfdi_status); fail_status: @@ -1361,7 +1367,7 @@ fail_cmd: void efx_siena_sriov_fini(struct efx_nic *efx) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned int pos; struct siena_nic_data *nic_data = efx->nic_data; @@ -1377,7 +1383,7 @@ void efx_siena_sriov_fini(struct efx_nic *efx) /* Flush all reconfiguration work */ for (pos = 0; pos < efx->vf_count; ++pos) { - vf = efx->vf + pos; + vf = nic_data->vf + pos; cancel_work_sync(&vf->req); cancel_work_sync(&vf->reset_work); } @@ -1388,7 +1394,7 @@ void efx_siena_sriov_fini(struct efx_nic *efx) /* Tear down back-end state */ efx_siena_sriov_vfs_fini(efx); efx_siena_sriov_free_local(efx); - kfree(efx->vf); + kfree(nic_data->vf); efx_nic_free_buffer(efx, &nic_data->vfdi_status); efx_siena_sriov_cmd(efx, false, NULL, NULL); } @@ -1396,7 +1402,7 @@ void efx_siena_sriov_fini(struct efx_nic *efx) void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event) { struct efx_nic *efx = channel->efx; - struct efx_vf *vf; + struct siena_vf *vf; unsigned qid, seq, type, data; qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID); @@ -1452,11 +1458,12 @@ error: void efx_siena_sriov_flr(struct efx_nic *efx, unsigned vf_i) { - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; if (vf_i > efx->vf_init_count) return; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; netif_info(efx, hw, efx->net_dev, "FLR on VF %s\n", vf->pci_name); @@ -1481,7 +1488,7 @@ void efx_siena_sriov_mac_address_changed(struct efx_nic *efx) void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned queue, qid; queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); @@ -1500,7 +1507,7 @@ void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned ev_failed, queue, qid; queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); @@ -1525,7 +1532,7 @@ void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event) /* Called from napi. Schedule the reset work item */ void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) { - struct efx_vf *vf; + struct siena_vf *vf; unsigned int rel; if (map_vi_index(efx, dmaq, &vf, &rel)) @@ -1541,9 +1548,10 @@ void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq) /* Reset all VFs */ void efx_siena_sriov_reset(struct efx_nic *efx) { + struct siena_nic_data *nic_data = efx->nic_data; unsigned int vf_i; struct efx_buffer buf; - struct efx_vf *vf; + struct siena_vf *vf; ASSERT_RTNL(); @@ -1557,7 +1565,7 @@ void efx_siena_sriov_reset(struct efx_nic *efx) return; for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) { - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; efx_siena_sriov_reset_vf(vf, &buf); } @@ -1573,7 +1581,6 @@ int efx_init_sriov(void) vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi"); if (!vfdi_workqueue) return -ENOMEM; - return 0; } @@ -1582,14 +1589,14 @@ void efx_fini_sriov(void) destroy_workqueue(vfdi_workqueue); } -int efx_siena_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) +int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; mutex_lock(&vf->status_lock); ether_addr_copy(vf->addr.mac_addr, mac); @@ -1599,16 +1606,16 @@ int efx_siena_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) return 0; } -int efx_siena_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, +int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i, u16 vlan, u8 qos) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; u16 tci; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; mutex_lock(&vf->status_lock); tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT); @@ -1619,16 +1626,16 @@ int efx_siena_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, return 0; } -int efx_siena_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, +int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf_i, bool spoofchk) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; int rc; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; mutex_lock(&vf->txq_lock); if (vf->txq_count == 0) { @@ -1643,16 +1650,16 @@ int efx_siena_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, return rc; } -int efx_siena_sriov_get_vf_config(struct net_device *net_dev, int vf_i, +int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf_i, struct ifla_vf_info *ivi) { - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_vf *vf; + struct siena_nic_data *nic_data = efx->nic_data; + struct siena_vf *vf; u16 tci; if (vf_i >= efx->vf_init_count) return -EINVAL; - vf = efx->vf + vf_i; + vf = nic_data->vf + vf_i; ivi->vf = vf_i; ether_addr_copy(ivi->mac, vf->addr.mac_addr); @@ -1666,3 +1673,12 @@ int efx_siena_sriov_get_vf_config(struct net_device *net_dev, int vf_i, return 0; } +bool efx_siena_sriov_wanted(struct efx_nic *efx) +{ + return efx->vf_count != 0; +} + +int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs) +{ + return 0; +} diff --git a/drivers/net/ethernet/sfc/siena_sriov.h b/drivers/net/ethernet/sfc/siena_sriov.h new file mode 100644 index 000000000000..64e3e018929e --- /dev/null +++ b/drivers/net/ethernet/sfc/siena_sriov.h @@ -0,0 +1,79 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef SIENA_SRIOV_H +#define SIENA_SRIOV_H + +#include "net_driver.h" + +/* On the SFC9000 family each port is associated with 1 PCI physical + * function (PF) handled by sfc and a configurable number of virtual + * functions (VFs) that may be handled by some other driver, often in + * a VM guest. The queue pointer registers are mapped in both PF and + * VF BARs such that an 8K region provides access to a single RX, TX + * and event queue (collectively a Virtual Interface, VI or VNIC). + * + * The PF has access to all 1024 VIs while VFs are mapped to VIs + * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered + * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE). + * The number of VIs and the VI_SCALE value are configurable but must + * be established at boot time by firmware. + */ + +/* Maximum VI_SCALE parameter supported by Siena */ +#define EFX_VI_SCALE_MAX 6 +/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX), + * so this is the smallest allowed value. + */ +#define EFX_VI_BASE 128U +/* Maximum number of VFs allowed */ +#define EFX_VF_COUNT_MAX 127 +/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */ +#define EFX_MAX_VF_EVQ_SIZE 8192UL +/* The number of buffer table entries reserved for each VI on a VF */ +#define EFX_VF_BUFTBL_PER_VI \ + ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \ + sizeof(efx_qword_t) / EFX_BUF_SIZE) + +int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs); +int efx_siena_sriov_init(struct efx_nic *efx); +void efx_siena_sriov_fini(struct efx_nic *efx); +void efx_siena_sriov_mac_address_changed(struct efx_nic *efx); +bool efx_siena_sriov_wanted(struct efx_nic *efx); +void efx_siena_sriov_reset(struct efx_nic *efx); +void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr); + +int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, u8 *mac); +int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf, + u16 vlan, u8 qos); +int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf, + bool spoofchk); +int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf, + struct ifla_vf_info *ivf); + +#ifdef CONFIG_SFC_SRIOV + +static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) +{ + return efx->vf_init_count != 0; +} +#else /* !CONFIG_SFC_SRIOV */ +static inline bool efx_siena_sriov_enabled(struct efx_nic *efx) +{ + return false; +} +#endif /* CONFIG_SFC_SRIOV */ + +void efx_siena_sriov_probe(struct efx_nic *efx); +void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event); +void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event); +void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event); +void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq); + +#endif /* SIENA_SRIOV_H */ diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c new file mode 100644 index 000000000000..d4b74452a677 --- /dev/null +++ b/drivers/net/ethernet/sfc/sriov.c @@ -0,0 +1,60 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2014-2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#include <linux/module.h> +#include "net_driver.h" +#include "nic.h" +#include "sriov.h" + +int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_mac) + return efx->type->sriov_set_vf_mac(efx, vf_i, mac); + else + return -EOPNOTSUPP; +} + +int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, + u8 qos) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_vlan) { + if ((vlan & ~VLAN_VID_MASK) || + (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))) + return -EINVAL; + + return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos); + } else { + return -EOPNOTSUPP; + } +} + +int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, + bool spoofchk) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_spoofchk) + return efx->type->sriov_set_vf_spoofchk(efx, vf_i, spoofchk); + else + return -EOPNOTSUPP; +} + +int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, + struct ifla_vf_info *ivi) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_get_vf_config) + return efx->type->sriov_get_vf_config(efx, vf_i, ivi); + else + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h new file mode 100644 index 000000000000..0b9f0f6acf3b --- /dev/null +++ b/drivers/net/ethernet/sfc/sriov.h @@ -0,0 +1,27 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2014-2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_SRIOV_H +#define EFX_SRIOV_H + +#include "net_driver.h" + +#ifdef CONFIG_SFC_SRIOV + +int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac); +int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, + u8 qos); +int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, + bool spoofchk); +int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, + struct ifla_vf_info *ivi); + +#endif /* CONFIG_SFC_SRIOV */ + +#endif /* EFX_SRIOV_H */ diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 43efc3a0cda5..0a28c07361cf 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -537,7 +537,7 @@ int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, int order, static void netcp_frag_free(bool is_frag, void *ptr) { if (is_frag) - put_page(virt_to_head_page(ptr)); + skb_free_frag(ptr); else kfree(ptr); } diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index de2850497c09..725106f75d42 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -472,8 +472,7 @@ struct rhine_private { /* Frequently used values: keep some adjacent for cache effect. */ u32 quirks; - struct rx_desc *rx_head_desc; - unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */ + unsigned int cur_rx; unsigned int cur_tx, dirty_tx; unsigned int rx_buf_sz; /* Based on MTU+slack. */ struct rhine_stats rx_stats; @@ -1213,17 +1212,61 @@ static void free_ring(struct net_device* dev) } -static void alloc_rbufs(struct net_device *dev) +struct rhine_skb_dma { + struct sk_buff *skb; + dma_addr_t dma; +}; + +static inline int rhine_skb_dma_init(struct net_device *dev, + struct rhine_skb_dma *sd) { struct rhine_private *rp = netdev_priv(dev); struct device *hwdev = dev->dev.parent; - dma_addr_t next; + const int size = rp->rx_buf_sz; + + sd->skb = netdev_alloc_skb(dev, size); + if (!sd->skb) + return -ENOMEM; + + sd->dma = dma_map_single(hwdev, sd->skb->data, size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(hwdev, sd->dma))) { + netif_err(rp, drv, dev, "Rx DMA mapping failure\n"); + dev_kfree_skb_any(sd->skb); + return -EIO; + } + + return 0; +} + +static void rhine_reset_rbufs(struct rhine_private *rp) +{ int i; - rp->dirty_rx = rp->cur_rx = 0; + rp->cur_rx = 0; + + for (i = 0; i < RX_RING_SIZE; i++) + rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn); +} + +static inline void rhine_skb_dma_nic_store(struct rhine_private *rp, + struct rhine_skb_dma *sd, int entry) +{ + rp->rx_skbuff_dma[entry] = sd->dma; + rp->rx_skbuff[entry] = sd->skb; + + rp->rx_ring[entry].addr = cpu_to_le32(sd->dma); + dma_wmb(); +} + +static void free_rbufs(struct net_device* dev); + +static int alloc_rbufs(struct net_device *dev) +{ + struct rhine_private *rp = netdev_priv(dev); + dma_addr_t next; + int rc, i; rp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); - rp->rx_head_desc = &rp->rx_ring[0]; next = rp->rx_ring_dma; /* Init the ring entries */ @@ -1239,23 +1282,20 @@ static void alloc_rbufs(struct net_device *dev) /* Fill in the Rx buffers. Handle allocation failure gracefully. */ for (i = 0; i < RX_RING_SIZE; i++) { - struct sk_buff *skb = netdev_alloc_skb(dev, rp->rx_buf_sz); - rp->rx_skbuff[i] = skb; - if (skb == NULL) - break; + struct rhine_skb_dma sd; - rp->rx_skbuff_dma[i] = - dma_map_single(hwdev, skb->data, rp->rx_buf_sz, - DMA_FROM_DEVICE); - if (dma_mapping_error(hwdev, rp->rx_skbuff_dma[i])) { - rp->rx_skbuff_dma[i] = 0; - dev_kfree_skb(skb); - break; + rc = rhine_skb_dma_init(dev, &sd); + if (rc < 0) { + free_rbufs(dev); + goto out; } - rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]); - rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn); + + rhine_skb_dma_nic_store(rp, &sd, i); } - rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE); + + rhine_reset_rbufs(rp); +out: + return rc; } static void free_rbufs(struct net_device* dev) @@ -1659,16 +1699,18 @@ static int rhine_open(struct net_device *dev) rc = request_irq(rp->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev); if (rc) - return rc; + goto out; netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->irq); rc = alloc_ring(dev); - if (rc) { - free_irq(rp->irq, dev); - return rc; - } - alloc_rbufs(dev); + if (rc < 0) + goto out_free_irq; + + rc = alloc_rbufs(dev); + if (rc < 0) + goto out_free_ring; + alloc_tbufs(dev); rhine_chip_reset(dev); rhine_task_enable(rp); @@ -1680,7 +1722,14 @@ static int rhine_open(struct net_device *dev) netif_start_queue(dev); - return 0; +out: + return rc; + +out_free_ring: + free_ring(dev); +out_free_irq: + free_irq(rp->irq, dev); + goto out; } static void rhine_reset_task(struct work_struct *work) @@ -1700,9 +1749,9 @@ static void rhine_reset_task(struct work_struct *work) /* clear all descriptors */ free_tbufs(dev); - free_rbufs(dev); alloc_tbufs(dev); - alloc_rbufs(dev); + + rhine_reset_rbufs(rp); /* Reinitialize the hardware. */ rhine_chip_reset(dev); @@ -1730,6 +1779,11 @@ static void rhine_tx_timeout(struct net_device *dev) schedule_work(&rp->reset_task); } +static inline bool rhine_tx_queue_full(struct rhine_private *rp) +{ + return (rp->cur_tx - rp->dirty_tx) >= TX_QUEUE_LEN; +} + static netdev_tx_t rhine_start_tx(struct sk_buff *skb, struct net_device *dev) { @@ -1800,11 +1854,17 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, netdev_sent_queue(dev, skb->len); /* lock eth irq */ - wmb(); + dma_wmb(); rp->tx_ring[entry].tx_status |= cpu_to_le32(DescOwn); wmb(); rp->cur_tx++; + /* + * Nobody wants cur_tx write to rot for ages after the NIC will have + * seen the transmit request, especially as the transmit completion + * handler could miss it. + */ + smp_wmb(); /* Non-x86 Todo: explicitly flush cache lines here. */ @@ -1817,8 +1877,14 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb, ioaddr + ChipCmd1); IOSYNC; - if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN) + /* dirty_tx may be pessimistically out-of-sync. See rhine_tx. */ + if (rhine_tx_queue_full(rp)) { netif_stop_queue(dev); + smp_rmb(); + /* Rejuvenate. */ + if (!rhine_tx_queue_full(rp)) + netif_wake_queue(dev); + } netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n", rp->cur_tx - 1, entry); @@ -1866,13 +1932,24 @@ static void rhine_tx(struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); struct device *hwdev = dev->dev.parent; - int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE; unsigned int pkts_compl = 0, bytes_compl = 0; + unsigned int dirty_tx = rp->dirty_tx; + unsigned int cur_tx; struct sk_buff *skb; + /* + * The race with rhine_start_tx does not matter here as long as the + * driver enforces a value of cur_tx that was relevant when the + * packet was scheduled to the network chipset. + * Executive summary: smp_rmb() balances smp_wmb() in rhine_start_tx. + */ + smp_rmb(); + cur_tx = rp->cur_tx; /* find and cleanup dirty tx descriptors */ - while (rp->dirty_tx != rp->cur_tx) { - txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status); + while (dirty_tx != cur_tx) { + unsigned int entry = dirty_tx % TX_RING_SIZE; + u32 txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status); + netif_dbg(rp, tx_done, dev, "Tx scavenge %d status %08x\n", entry, txstatus); if (txstatus & DescOwn) @@ -1921,12 +1998,23 @@ static void rhine_tx(struct net_device *dev) pkts_compl++; dev_consume_skb_any(skb); rp->tx_skbuff[entry] = NULL; - entry = (++rp->dirty_tx) % TX_RING_SIZE; + dirty_tx++; } + rp->dirty_tx = dirty_tx; + /* Pity we can't rely on the nearby BQL completion implicit barrier. */ + smp_wmb(); + netdev_completed_queue(dev, pkts_compl, bytes_compl); - if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4) + + /* cur_tx may be optimistically out-of-sync. See rhine_start_tx. */ + if (!rhine_tx_queue_full(rp) && netif_queue_stopped(dev)) { netif_wake_queue(dev); + smp_rmb(); + /* Rejuvenate. */ + if (rhine_tx_queue_full(rp)) + netif_stop_queue(dev); + } } /** @@ -1944,22 +2032,33 @@ static inline u16 rhine_get_vlan_tci(struct sk_buff *skb, int data_size) return be16_to_cpup((__be16 *)trailer); } +static inline void rhine_rx_vlan_tag(struct sk_buff *skb, struct rx_desc *desc, + int data_size) +{ + dma_rmb(); + if (unlikely(desc->desc_length & cpu_to_le32(DescTag))) { + u16 vlan_tci; + + vlan_tci = rhine_get_vlan_tci(skb, data_size); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + } +} + /* Process up to limit frames from receive ring */ static int rhine_rx(struct net_device *dev, int limit) { struct rhine_private *rp = netdev_priv(dev); struct device *hwdev = dev->dev.parent; - int count; int entry = rp->cur_rx % RX_RING_SIZE; + int count; netif_dbg(rp, rx_status, dev, "%s(), entry %d status %08x\n", __func__, - entry, le32_to_cpu(rp->rx_head_desc->rx_status)); + entry, le32_to_cpu(rp->rx_ring[entry].rx_status)); /* If EOP is set on the next entry, it's a new packet. Send it up. */ for (count = 0; count < limit; ++count) { - struct rx_desc *desc = rp->rx_head_desc; + struct rx_desc *desc = rp->rx_ring + entry; u32 desc_status = le32_to_cpu(desc->rx_status); - u32 desc_length = le32_to_cpu(desc->desc_length); int data_size = desc_status >> 16; if (desc_status & DescOwn) @@ -1975,10 +2074,6 @@ static int rhine_rx(struct net_device *dev, int limit) "entry %#x length %d status %08x!\n", entry, data_size, desc_status); - netdev_warn(dev, - "Oversized Ethernet frame %p vs %p\n", - rp->rx_head_desc, - &rp->rx_ring[entry]); dev->stats.rx_length_errors++; } else if (desc_status & RxErr) { /* There was a error. */ @@ -2000,16 +2095,17 @@ static int rhine_rx(struct net_device *dev, int limit) } } } else { - struct sk_buff *skb = NULL; /* Length should omit the CRC */ int pkt_len = data_size - 4; - u16 vlan_tci = 0; + struct sk_buff *skb; /* Check if the packet is long enough to accept without copying to a minimally-sized skbuff. */ - if (pkt_len < rx_copybreak) + if (pkt_len < rx_copybreak) { skb = netdev_alloc_skb_ip_align(dev, pkt_len); - if (skb) { + if (unlikely(!skb)) + goto drop; + dma_sync_single_for_cpu(hwdev, rp->rx_skbuff_dma[entry], rp->rx_buf_sz, @@ -2018,32 +2114,31 @@ static int rhine_rx(struct net_device *dev, int limit) skb_copy_to_linear_data(skb, rp->rx_skbuff[entry]->data, pkt_len); - skb_put(skb, pkt_len); + dma_sync_single_for_device(hwdev, rp->rx_skbuff_dma[entry], rp->rx_buf_sz, DMA_FROM_DEVICE); } else { + struct rhine_skb_dma sd; + + if (unlikely(rhine_skb_dma_init(dev, &sd) < 0)) + goto drop; + skb = rp->rx_skbuff[entry]; - if (skb == NULL) { - netdev_err(dev, "Inconsistent Rx descriptor chain\n"); - break; - } - rp->rx_skbuff[entry] = NULL; - skb_put(skb, pkt_len); + dma_unmap_single(hwdev, rp->rx_skbuff_dma[entry], rp->rx_buf_sz, DMA_FROM_DEVICE); + rhine_skb_dma_nic_store(rp, &sd, entry); } - if (unlikely(desc_length & DescTag)) - vlan_tci = rhine_get_vlan_tci(skb, data_size); - + skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, dev); - if (unlikely(desc_length & DescTag)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + rhine_rx_vlan_tag(skb, desc, data_size); + netif_receive_skb(skb); u64_stats_update_begin(&rp->rx_stats.syncp); @@ -2051,35 +2146,16 @@ static int rhine_rx(struct net_device *dev, int limit) rp->rx_stats.packets++; u64_stats_update_end(&rp->rx_stats.syncp); } +give_descriptor_to_nic: + desc->rx_status = cpu_to_le32(DescOwn); entry = (++rp->cur_rx) % RX_RING_SIZE; - rp->rx_head_desc = &rp->rx_ring[entry]; - } - - /* Refill the Rx ring buffers. */ - for (; rp->cur_rx - rp->dirty_rx > 0; rp->dirty_rx++) { - struct sk_buff *skb; - entry = rp->dirty_rx % RX_RING_SIZE; - if (rp->rx_skbuff[entry] == NULL) { - skb = netdev_alloc_skb(dev, rp->rx_buf_sz); - rp->rx_skbuff[entry] = skb; - if (skb == NULL) - break; /* Better luck next round. */ - rp->rx_skbuff_dma[entry] = - dma_map_single(hwdev, skb->data, - rp->rx_buf_sz, - DMA_FROM_DEVICE); - if (dma_mapping_error(hwdev, - rp->rx_skbuff_dma[entry])) { - dev_kfree_skb(skb); - rp->rx_skbuff_dma[entry] = 0; - break; - } - rp->rx_ring[entry].addr = cpu_to_le32(rp->rx_skbuff_dma[entry]); - } - rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn); } return count; + +drop: + dev->stats.rx_dropped++; + goto give_descriptor_to_nic; } static void rhine_restart_tx(struct net_device *dev) { @@ -2484,9 +2560,8 @@ static int rhine_resume(struct device *device) enable_mmio(rp->pioaddr, rp->quirks); rhine_power_init(dev); free_tbufs(dev); - free_rbufs(dev); alloc_tbufs(dev); - alloc_rbufs(dev); + rhine_reset_rbufs(rp); rhine_task_enable(rp); spin_lock_bh(&rp->lock); init_registers(dev); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index af2694dc6f90..3b99a4df71f8 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -688,10 +688,8 @@ static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; if (temac_check_tx_bd_space(lp, num_frag)) { - if (!netif_queue_stopped(ndev)) { + if (!netif_queue_stopped(ndev)) netif_stop_queue(ndev); - return NETDEV_TX_BUSY; - } return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 4c9b4fa1d3c1..7cb9abac95c8 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -11,16 +11,16 @@ #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/if_vlan.h> /* Packet size info */ #define XAE_HDR_SIZE 14 /* Size of Ethernet header */ -#define XAE_HDR_VLAN_SIZE 18 /* Size of an Ethernet hdr + VLAN */ #define XAE_TRL_SIZE 4 /* Size of Ethernet trailer (FCS) */ #define XAE_MTU 1500 /* Max MTU of an Ethernet frame */ #define XAE_JUMBO_MTU 9000 /* Max MTU of a jumbo Eth. frame */ #define XAE_MAX_FRAME_SIZE (XAE_MTU + XAE_HDR_SIZE + XAE_TRL_SIZE) -#define XAE_MAX_VLAN_FRAME_SIZE (XAE_MTU + XAE_HDR_VLAN_SIZE + XAE_TRL_SIZE) +#define XAE_MAX_VLAN_FRAME_SIZE (XAE_MTU + VLAN_ETH_HLEN + XAE_TRL_SIZE) #define XAE_MAX_JUMBO_FRAME_SIZE (XAE_JUMBO_MTU + XAE_HDR_SIZE + XAE_TRL_SIZE) /* Configuration options */ @@ -38,18 +38,21 @@ #define XAE_OPTION_FLOW_CONTROL (1 << 4) /* Strip FCS and PAD from incoming frames. Note: PAD from VLAN frames is not - * stripped. Default: disabled (set) */ + * stripped. Default: disabled (set) + */ #define XAE_OPTION_FCS_STRIP (1 << 5) /* Generate FCS field and add PAD automatically for outgoing frames. - * Default: enabled (set) */ + * Default: enabled (set) + */ #define XAE_OPTION_FCS_INSERT (1 << 6) /* Enable Length/Type error checking for incoming frames. When this option is * set, the MAC will filter frames that have a mismatched type/length field * and if XAE_OPTION_REPORT_RXERR is set, the user is notified when these * types of frames are encountered. When this option is cleared, the MAC will - * allow these types of frames to be received. Default: enabled (set) */ + * allow these types of frames to be received. Default: enabled (set) + */ #define XAE_OPTION_LENTYPE_ERR (1 << 7) /* Enable the transmitter. Default: enabled (set) */ @@ -159,12 +162,12 @@ #define XAE_MDIO_MWD_OFFSET 0x00000508 /* MII Management Write Data */ #define XAE_MDIO_MRD_OFFSET 0x0000050C /* MII Management Read Data */ #define XAE_MDIO_MIS_OFFSET 0x00000600 /* MII Management Interrupt Status */ -#define XAE_MDIO_MIP_OFFSET 0x00000620 /* MII Mgmt Interrupt Pending - * register offset */ -#define XAE_MDIO_MIE_OFFSET 0x00000640 /* MII Management Interrupt Enable - * register offset */ -#define XAE_MDIO_MIC_OFFSET 0x00000660 /* MII Management Interrupt Clear - * register offset. */ +/* MII Mgmt Interrupt Pending register offset */ +#define XAE_MDIO_MIP_OFFSET 0x00000620 +/* MII Management Interrupt Enable register offset */ +#define XAE_MDIO_MIE_OFFSET 0x00000640 +/* MII Management Interrupt Clear register offset. */ +#define XAE_MDIO_MIC_OFFSET 0x00000660 #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ #define XAE_FMI_OFFSET 0x00000708 /* Filter Mask Index */ @@ -176,18 +179,17 @@ #define XAE_MCAST_TABLE_OFFSET 0x00020000 /* Multicast table address */ /* Bit Masks for Axi Ethernet RAF register */ -#define XAE_RAF_MCSTREJ_MASK 0x00000002 /* Reject receive multicast - * destination address */ -#define XAE_RAF_BCSTREJ_MASK 0x00000004 /* Reject receive broadcast - * destination address */ +/* Reject receive multicast destination address */ +#define XAE_RAF_MCSTREJ_MASK 0x00000002 +/* Reject receive broadcast destination address */ +#define XAE_RAF_BCSTREJ_MASK 0x00000004 #define XAE_RAF_TXVTAGMODE_MASK 0x00000018 /* Tx VLAN TAG mode */ #define XAE_RAF_RXVTAGMODE_MASK 0x00000060 /* Rx VLAN TAG mode */ #define XAE_RAF_TXVSTRPMODE_MASK 0x00000180 /* Tx VLAN STRIP mode */ #define XAE_RAF_RXVSTRPMODE_MASK 0x00000600 /* Rx VLAN STRIP mode */ #define XAE_RAF_NEWFNCENBL_MASK 0x00000800 /* New function mode */ -#define XAE_RAF_EMULTIFLTRENBL_MASK 0x00001000 /* Exteneded Multicast - * Filtering mode - */ +/* Exteneded Multicast Filtering mode */ +#define XAE_RAF_EMULTIFLTRENBL_MASK 0x00001000 #define XAE_RAF_STATSRST_MASK 0x00002000 /* Stats. Counter Reset */ #define XAE_RAF_RXBADFRMEN_MASK 0x00004000 /* Recv Bad Frame Enable */ #define XAE_RAF_TXVTAGMODE_SHIFT 3 /* Tx Tag mode shift bits */ @@ -197,15 +199,16 @@ /* Bit Masks for Axi Ethernet TPF and IFGP registers */ #define XAE_TPF_TPFV_MASK 0x0000FFFF /* Tx pause frame value */ -#define XAE_IFGP0_IFGP_MASK 0x0000007F /* Transmit inter-frame - * gap adjustment value */ +/* Transmit inter-frame gap adjustment value */ +#define XAE_IFGP0_IFGP_MASK 0x0000007F /* Bit Masks for Axi Ethernet IS, IE and IP registers, Same masks apply - * for all 3 registers. */ -#define XAE_INT_HARDACSCMPLT_MASK 0x00000001 /* Hard register access - * complete */ -#define XAE_INT_AUTONEG_MASK 0x00000002 /* Auto negotiation - * complete */ + * for all 3 registers. + */ +/* Hard register access complete */ +#define XAE_INT_HARDACSCMPLT_MASK 0x00000001 +/* Auto negotiation complete */ +#define XAE_INT_AUTONEG_MASK 0x00000002 #define XAE_INT_RXCMPIT_MASK 0x00000004 /* Rx complete */ #define XAE_INT_RXRJECT_MASK 0x00000008 /* Rx frame rejected */ #define XAE_INT_RXFIFOOVR_MASK 0x00000010 /* Rx fifo overrun */ @@ -215,10 +218,9 @@ #define XAE_INT_PHYRSTCMPLT_MASK 0x00000100 /* Phy Reset complete */ #define XAE_INT_ALL_MASK 0x0000003F /* All the ints */ +/* INT bits that indicate receive errors */ #define XAE_INT_RECV_ERROR_MASK \ - (XAE_INT_RXRJECT_MASK | XAE_INT_RXFIFOOVR_MASK) /* INT bits that - * indicate receive - * errors */ + (XAE_INT_RXRJECT_MASK | XAE_INT_RXFIFOOVR_MASK) /* Bit masks for Axi Ethernet VLAN TPID Word 0 register */ #define XAE_TPID_0_MASK 0x0000FFFF /* TPID 0 */ @@ -231,27 +233,28 @@ /* Bit masks for Axi Ethernet RCW1 register */ #define XAE_RCW1_RST_MASK 0x80000000 /* Reset */ #define XAE_RCW1_JUM_MASK 0x40000000 /* Jumbo frame enable */ -#define XAE_RCW1_FCS_MASK 0x20000000 /* In-Band FCS enable - * (FCS not stripped) */ +/* In-Band FCS enable (FCS not stripped) */ +#define XAE_RCW1_FCS_MASK 0x20000000 #define XAE_RCW1_RX_MASK 0x10000000 /* Receiver enable */ #define XAE_RCW1_VLAN_MASK 0x08000000 /* VLAN frame enable */ -#define XAE_RCW1_LT_DIS_MASK 0x02000000 /* Length/type field valid check - * disable */ -#define XAE_RCW1_CL_DIS_MASK 0x01000000 /* Control frame Length check - * disable */ -#define XAE_RCW1_PAUSEADDR_MASK 0x0000FFFF /* Pause frame source address - * bits [47:32]. Bits [31:0] are - * stored in register RCW0 */ +/* Length/type field valid check disable */ +#define XAE_RCW1_LT_DIS_MASK 0x02000000 +/* Control frame Length check disable */ +#define XAE_RCW1_CL_DIS_MASK 0x01000000 +/* Pause frame source address bits [47:32]. Bits [31:0] are + * stored in register RCW0 + */ +#define XAE_RCW1_PAUSEADDR_MASK 0x0000FFFF /* Bit masks for Axi Ethernet TC register */ #define XAE_TC_RST_MASK 0x80000000 /* Reset */ #define XAE_TC_JUM_MASK 0x40000000 /* Jumbo frame enable */ -#define XAE_TC_FCS_MASK 0x20000000 /* In-Band FCS enable - * (FCS not generated) */ +/* In-Band FCS enable (FCS not generated) */ +#define XAE_TC_FCS_MASK 0x20000000 #define XAE_TC_TX_MASK 0x10000000 /* Transmitter enable */ #define XAE_TC_VLAN_MASK 0x08000000 /* VLAN frame enable */ -#define XAE_TC_IFG_MASK 0x02000000 /* Inter-frame gap adjustment - * enable */ +/* Inter-frame gap adjustment enable */ +#define XAE_TC_IFG_MASK 0x02000000 /* Bit masks for Axi Ethernet FCC register */ #define XAE_FCC_FCRX_MASK 0x20000000 /* Rx flow control enable */ @@ -301,10 +304,10 @@ #define XAE_MDIO_INT_MIIM_RDY_MASK 0x00000001 /* MIIM Interrupt */ /* Bit masks for Axi Ethernet UAW1 register */ -#define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF /* Station address bits - * [47:32]; Station address - * bits [31:0] are stored in - * register UAW0 */ +/* Station address bits [47:32]; Station address + * bits [31:0] are stored in register UAW0 + */ +#define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF /* Bit masks for Axi Ethernet FMI register */ #define XAE_FMI_PM_MASK 0x80000000 /* Promis. mode enable */ @@ -320,8 +323,8 @@ #define XAE_PHY_TYPE_SGMII 4 #define XAE_PHY_TYPE_1000BASE_X 5 -#define XAE_MULTICAST_CAM_TABLE_NUM 4 /* Total number of entries in the - * hardware multicast table. */ + /* Total number of entries in the hardware multicast table. */ +#define XAE_MULTICAST_CAM_TABLE_NUM 4 /* Axi Ethernet Synthesis features */ #define XAE_FEATURE_PARTIAL_RX_CSUM (1 << 0) @@ -407,8 +410,11 @@ struct axidma_bd { * Txed/Rxed in the existing hardware. If jumbo option is * supported, the maximum frame size would be 9k. Else it is * 1522 bytes (assuming support for basic VLAN) - * @jumbo_support: Stores hardware configuration for jumbo support. If hardware - * can handle jumbo packets, this entry will be 1, else 0. + * @rxmem: Stores rx memory size for jumbo frame handling. + * @csum_offload_on_tx_path: Stores the checksum selection on TX side. + * @csum_offload_on_rx_path: Stores the checksum selection on RX side. + * @coalesce_count_rx: Store the irq coalesce on RX side. + * @coalesce_count_tx: Store the irq coalesce on TX side. */ struct axienet_local { struct net_device *ndev; @@ -446,7 +452,7 @@ struct axienet_local { u32 rx_bd_ci; u32 max_frm_size; - u32 jumbo_support; + u32 rxmem; int csum_offload_on_tx_path; int csum_offload_on_rx_path; @@ -472,7 +478,7 @@ struct axienet_option { * @lp: Pointer to axienet local structure * @offset: Address offset from the base address of Axi Ethernet core * - * returns: The contents of the Axi Ethernet register + * Return: The contents of the Axi Ethernet register * * This function returns the contents of the corresponding register. */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 28b7e7d9c272..4208dd7ef101 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -117,7 +117,7 @@ static struct axienet_option axienet_options[] = { * @lp: Pointer to axienet local structure * @reg: Address offset from the base address of the Axi DMA core * - * returns: The contents of the Axi DMA register + * Return: The contents of the Axi DMA register * * This function returns the contents of the corresponding Axi DMA register. */ @@ -179,8 +179,7 @@ static void axienet_dma_bd_release(struct net_device *ndev) * axienet_dma_bd_init - Setup buffer descriptor rings for Axi DMA * @ndev: Pointer to the net_device structure * - * returns: 0, on success - * -ENOMEM, on failure + * Return: 0, on success -ENOMEM, on failure * * This function is called to initialize the Rx and Tx DMA descriptor * rings. This initializes the descriptors with required default values @@ -198,9 +197,7 @@ static int axienet_dma_bd_init(struct net_device *ndev) lp->tx_bd_tail = 0; lp->rx_bd_ci = 0; - /* - * Allocate the Tx and Rx buffer descriptors. - */ + /* Allocate the Tx and Rx buffer descriptors. */ lp->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent, sizeof(*lp->tx_bd_v) * TX_BD_NUM, &lp->tx_bd_p, GFP_KERNEL); @@ -263,7 +260,8 @@ static int axienet_dma_bd_init(struct net_device *ndev) axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception.*/ + * halted state. This will make the Rx side ready for reception. + */ axienet_dma_out32(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, @@ -273,7 +271,8 @@ static int axienet_dma_bd_init(struct net_device *ndev) /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting */ + * tail pointer register that the Tx channel will start transmitting. + */ axienet_dma_out32(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, @@ -320,7 +319,7 @@ static void axienet_set_mac_address(struct net_device *ndev, void *address) * @ndev: Pointer to the net_device structure * @p: 6 byte Address to be written as MAC address * - * returns: 0 for all conditions. Presently, there is no failure case. + * Return: 0 for all conditions. Presently, there is no failure case. * * This function is called to initialize the MAC address of the Axi Ethernet * core. It calls the core specific axienet_set_mac_address. This is the @@ -354,7 +353,8 @@ static void axienet_set_multicast_list(struct net_device *ndev) netdev_mc_count(ndev) > XAE_MULTICAST_CAM_TABLE_NUM) { /* We must make the kernel realize we had to move into * promiscuous mode. If it was a promiscuous mode request - * the flag is already set. If not we set it. */ + * the flag is already set. If not we set it. + */ ndev->flags |= IFF_PROMISC; reg = axienet_ior(lp, XAE_FMI_OFFSET); reg |= XAE_FMI_PM_MASK; @@ -438,14 +438,15 @@ static void __axienet_device_reset(struct axienet_local *lp, /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset * process of Axi DMA takes a while to complete as all pending * commands/transfers will be flushed or completed during this - * reset process. */ + * reset process. + */ axienet_dma_out32(lp, offset, XAXIDMA_CR_RESET_MASK); timeout = DELAY_OF_ONE_MILLISEC; while (axienet_dma_in32(lp, offset) & XAXIDMA_CR_RESET_MASK) { udelay(1); if (--timeout == 0) { - dev_err(dev, "axienet_device_reset DMA " - "reset timeout!\n"); + netdev_err(lp->ndev, "%s: DMA reset timeout!\n", + __func__); break; } } @@ -471,19 +472,21 @@ static void axienet_device_reset(struct net_device *ndev) __axienet_device_reset(lp, &ndev->dev, XAXIDMA_RX_CR_OFFSET); lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE; + lp->options |= XAE_OPTION_VLAN; lp->options &= (~XAE_OPTION_JUMBO); if ((ndev->mtu > XAE_MTU) && - (ndev->mtu <= XAE_JUMBO_MTU) && - (lp->jumbo_support)) { - lp->max_frm_size = ndev->mtu + XAE_HDR_VLAN_SIZE + - XAE_TRL_SIZE; - lp->options |= XAE_OPTION_JUMBO; + (ndev->mtu <= XAE_JUMBO_MTU)) { + lp->max_frm_size = ndev->mtu + VLAN_ETH_HLEN + + XAE_TRL_SIZE; + + if (lp->max_frm_size <= lp->rxmem) + lp->options |= XAE_OPTION_JUMBO; } if (axienet_dma_bd_init(ndev)) { - dev_err(&ndev->dev, "axienet_device_reset descriptor " - "allocation failed\n"); + netdev_err(ndev, "%s: descriptor allocation failed\n", + __func__); } axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET); @@ -497,7 +500,8 @@ static void axienet_device_reset(struct net_device *ndev) axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK); /* Sync default options with HW but leave receiver and - * transmitter disabled.*/ + * transmitter disabled. + */ axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); @@ -558,8 +562,8 @@ static void axienet_adjust_link(struct net_device *ndev) lp->last_link = link_state; phy_print_status(phy); } else { - dev_err(&ndev->dev, "Error setting Axi Ethernet " - "mac speed\n"); + netdev_err(ndev, + "Error setting Axi Ethernet mac speed\n"); } } } @@ -617,7 +621,7 @@ static void axienet_start_xmit_done(struct net_device *ndev) * @lp: Pointer to the axienet_local structure * @num_frag: The number of BDs to check for * - * returns: 0, on success + * Return: 0, on success * NETDEV_TX_BUSY, if any of the descriptors are not free * * This function is invoked before BDs are allocated and transmission starts. @@ -640,7 +644,7 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, * @skb: sk_buff pointer that contains data to be Txed. * @ndev: Pointer to net_device structure. * - * returns: NETDEV_TX_OK, on success + * Return: NETDEV_TX_OK, on success * NETDEV_TX_BUSY, if any of the descriptors are not free * * This function is invoked from upper layers to initiate transmission. The @@ -726,15 +730,15 @@ static void axienet_recv(struct net_device *ndev) u32 csumstatus; u32 size = 0; u32 packets = 0; - dma_addr_t tail_p; + dma_addr_t tail_p = 0; struct axienet_local *lp = netdev_priv(ndev); struct sk_buff *skb, *new_skb; struct axidma_bd *cur_p; - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { + tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; skb = (struct sk_buff *) (cur_p->sw_id_offset); length = cur_p->app4 & 0x0000FFFF; @@ -786,7 +790,8 @@ static void axienet_recv(struct net_device *ndev) ndev->stats.rx_packets += packets; ndev->stats.rx_bytes += size; - axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); + if (tail_p) + axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p); } /** @@ -794,7 +799,7 @@ static void axienet_recv(struct net_device *ndev) * @irq: irq number * @_ndev: net_device pointer * - * returns: IRQ_HANDLED for all cases. + * Return: IRQ_HANDLED for all cases. * * This is the Axi DMA Tx done Isr. It invokes "axienet_start_xmit_done" * to complete the BD processing. @@ -808,6 +813,7 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) status = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { + axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); axienet_start_xmit_done(lp->ndev); goto out; } @@ -831,9 +837,9 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); tasklet_schedule(&lp->dma_err_tasklet); + axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); } out: - axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status); return IRQ_HANDLED; } @@ -842,7 +848,7 @@ out: * @irq: irq number * @_ndev: net_device pointer * - * returns: IRQ_HANDLED for all cases. + * Return: IRQ_HANDLED for all cases. * * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD * processing. @@ -856,6 +862,7 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) status = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { + axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); axienet_recv(lp->ndev); goto out; } @@ -879,9 +886,9 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); tasklet_schedule(&lp->dma_err_tasklet); + axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); } out: - axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status); return IRQ_HANDLED; } @@ -891,7 +898,7 @@ static void axienet_dma_err_handler(unsigned long data); * axienet_open - Driver open routine. * @ndev: Pointer to net_device structure * - * returns: 0, on success. + * Return: 0, on success. * -ENODEV, if PHY cannot be connected to * non-zero error value on failure * @@ -914,7 +921,8 @@ static int axienet_open(struct net_device *ndev) /* Disable the MDIO interface till Axi Ethernet Reset is completed. * When we do an Axi Ethernet reset, it resets the complete core * including the MDIO. If MDIO is not disabled when the reset - * process is started, MDIO will be broken afterwards. */ + * process is started, MDIO will be broken afterwards. + */ axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg & (~XAE_MDIO_MC_MDIOEN_MASK))); axienet_device_reset(ndev); @@ -925,14 +933,20 @@ static int axienet_open(struct net_device *ndev) return ret; if (lp->phy_node) { - lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, + if (lp->phy_type == XAE_PHY_TYPE_GMII) { + lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, axienet_adjust_link, 0, PHY_INTERFACE_MODE_GMII); - if (!lp->phy_dev) { - dev_err(lp->dev, "of_phy_connect() failed\n"); - return -ENODEV; + } else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) { + lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, + axienet_adjust_link, 0, + PHY_INTERFACE_MODE_RGMII_ID); } - phy_start(lp->phy_dev); + + if (!lp->phy_dev) + dev_err(lp->dev, "of_phy_connect() failed\n"); + else + phy_start(lp->phy_dev); } /* Enable tasklets for Axi DMA error handling */ @@ -965,7 +979,7 @@ err_tx_irq: * axienet_stop - Driver stop routine. * @ndev: Pointer to net_device structure * - * returns: 0, on success. + * Return: 0, on success. * * This is the driver stop routine. It calls phy_disconnect to stop the PHY * device. It also removes the interrupt handlers and disables the interrupts. @@ -1005,7 +1019,7 @@ static int axienet_stop(struct net_device *ndev) * @ndev: Pointer to net_device structure * @new_mtu: New mtu value to be applied * - * returns: Always returns 0 (success). + * Return: Always returns 0 (success). * * This is the change mtu driver routine. It checks if the Axi Ethernet * hardware supports jumbo frames before changing the mtu. This can be @@ -1017,15 +1031,15 @@ static int axienet_change_mtu(struct net_device *ndev, int new_mtu) if (netif_running(ndev)) return -EBUSY; - if (lp->jumbo_support) { - if ((new_mtu > XAE_JUMBO_MTU) || (new_mtu < 64)) - return -EINVAL; - ndev->mtu = new_mtu; - } else { - if ((new_mtu > XAE_MTU) || (new_mtu < 64)) - return -EINVAL; - ndev->mtu = new_mtu; - } + + if ((new_mtu + VLAN_ETH_HLEN + + XAE_TRL_SIZE) > lp->rxmem) + return -EINVAL; + + if ((new_mtu > XAE_JUMBO_MTU) || (new_mtu < 64)) + return -EINVAL; + + ndev->mtu = new_mtu; return 0; } @@ -1072,6 +1086,8 @@ static const struct net_device_ops axienet_netdev_ops = { * not be found, the function returns -ENODEV. This function calls the * relevant PHY ethtool API to get the PHY settings. * Issue "ethtool ethX" under linux prompt to execute this function. + * + * Return: 0 on success, -ENODEV if PHY doesn't exist */ static int axienet_ethtools_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) @@ -1093,6 +1109,8 @@ static int axienet_ethtools_get_settings(struct net_device *ndev, * relevant PHY ethtool API to set the PHY. * Issue e.g. "ethtool -s ethX speed 1000" under linux prompt to execute this * function. + * + * Return: 0 on success, -ENODEV if PHY doesn't exist */ static int axienet_ethtools_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) @@ -1127,6 +1145,8 @@ static void axienet_ethtools_get_drvinfo(struct net_device *ndev, * * This implements ethtool command for getting the total register length * information. + * + * Return: the total regs length */ static int axienet_ethtools_get_regs_len(struct net_device *ndev) { @@ -1213,11 +1233,13 @@ axienet_ethtools_get_pauseparam(struct net_device *ndev, * axienet_ethtools_set_pauseparam - Set device pause parameter(flow control) * settings. * @ndev: Pointer to net_device structure - * @epauseparam:Pointer to ethtool_pauseparam structure + * @epauseparm:Pointer to ethtool_pauseparam structure * * This implements ethtool command for enabling flow control on Rx and Tx * paths. Issue "ethtool -A ethX tx on|off" under linux prompt to execute this * function. + * + * Return: 0 on success, -EFAULT if device is running */ static int axienet_ethtools_set_pauseparam(struct net_device *ndev, @@ -1227,8 +1249,8 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev, struct axienet_local *lp = netdev_priv(ndev); if (netif_running(ndev)) { - printk(KERN_ERR "%s: Please stop netif before applying " - "configruation\n", ndev->name); + netdev_err(ndev, + "Please stop netif before applying configuration\n"); return -EFAULT; } @@ -1254,6 +1276,8 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev, * This implements ethtool command for getting the DMA interrupt coalescing * count on Tx and Rx paths. Issue "ethtool -c ethX" under linux prompt to * execute this function. + * + * Return: 0 always */ static int axienet_ethtools_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ecoalesce) @@ -1277,6 +1301,8 @@ static int axienet_ethtools_get_coalesce(struct net_device *ndev, * This implements ethtool command for setting the DMA interrupt coalescing * count on Tx and Rx paths. Issue "ethtool -C ethX rx-frames 5" under linux * prompt to execute this function. + * + * Return: 0, on success, Non-zero error value on failure. */ static int axienet_ethtools_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ecoalesce) @@ -1284,8 +1310,8 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev, struct axienet_local *lp = netdev_priv(ndev); if (netif_running(ndev)) { - printk(KERN_ERR "%s: Please stop netif before applying " - "configruation\n", ndev->name); + netdev_err(ndev, + "Please stop netif before applying configuration\n"); return -EFAULT; } @@ -1354,7 +1380,8 @@ static void axienet_dma_err_handler(unsigned long data) /* Disable the MDIO interface till Axi Ethernet Reset is completed. * When we do an Axi Ethernet reset, it resets the complete core * including the MDIO. So if MDIO is not disabled when the reset - * process is started, MDIO will be broken afterwards. */ + * process is started, MDIO will be broken afterwards. + */ axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg & ~XAE_MDIO_MC_MDIOEN_MASK)); @@ -1425,7 +1452,8 @@ static void axienet_dma_err_handler(unsigned long data) axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); /* Populate the tail pointer and bring the Rx Axi DMA engine out of - * halted state. This will make the Rx side ready for reception.*/ + * halted state. This will make the Rx side ready for reception. + */ axienet_dma_out32(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, @@ -1435,7 +1463,8 @@ static void axienet_dma_err_handler(unsigned long data) /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the - * tail pointer register that the Tx channel will start transmitting */ + * tail pointer register that the Tx channel will start transmitting + */ axienet_dma_out32(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, @@ -1451,7 +1480,8 @@ static void axienet_dma_err_handler(unsigned long data) axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK); /* Sync default options with HW but leave receiver and - * transmitter disabled.*/ + * transmitter disabled. + */ axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); @@ -1460,11 +1490,10 @@ static void axienet_dma_err_handler(unsigned long data) } /** - * axienet_of_probe - Axi Ethernet probe function. - * @op: Pointer to platform device structure. - * @match: Pointer to device id structure + * axienet_probe - Axi Ethernet probe function. + * @pdev: Pointer to platform device structure. * - * returns: 0, on success + * Return: 0, on success * Non-zero error value on failure. * * This is the probe routine for Axi Ethernet driver. This is called before @@ -1472,22 +1501,23 @@ static void axienet_dma_err_handler(unsigned long data) * device. Parses through device tree and populates fields of * axienet_local. It registers the Ethernet device. */ -static int axienet_of_probe(struct platform_device *op) +static int axienet_probe(struct platform_device *pdev) { - __be32 *p; - int size, ret = 0; + int ret; struct device_node *np; struct axienet_local *lp; struct net_device *ndev; - const void *addr; + u8 mac_addr[6]; + struct resource *ethres, dmares; + u32 value; ndev = alloc_etherdev(sizeof(*lp)); if (!ndev) return -ENOMEM; - platform_set_drvdata(op, ndev); + platform_set_drvdata(pdev, ndev); - SET_NETDEV_DEV(ndev, &op->dev); + SET_NETDEV_DEV(ndev, &pdev->dev); ndev->flags &= ~IFF_MULTICAST; /* clear multicast */ ndev->features = NETIF_F_SG; ndev->netdev_ops = &axienet_netdev_ops; @@ -1495,21 +1525,23 @@ static int axienet_of_probe(struct platform_device *op) lp = netdev_priv(ndev); lp->ndev = ndev; - lp->dev = &op->dev; + lp->dev = &pdev->dev; lp->options = XAE_OPTION_DEFAULTS; /* Map device registers */ - lp->regs = of_iomap(op->dev.of_node, 0); + ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + lp->regs = devm_ioremap_resource(&pdev->dev, ethres); if (!lp->regs) { - dev_err(&op->dev, "could not map Axi Ethernet regs.\n"); + dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); ret = -ENOMEM; - goto nodev; + goto free_netdev; } + /* Setup checksum offload, but default to off if not specified */ lp->features = 0; - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,txcsum", NULL); - if (p) { - switch (be32_to_cpup(p)) { + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,txcsum", &value); + if (!ret) { + switch (value) { case 1: lp->csum_offload_on_tx_path = XAE_FEATURE_PARTIAL_TX_CSUM; @@ -1528,9 +1560,9 @@ static int axienet_of_probe(struct platform_device *op) lp->csum_offload_on_tx_path = XAE_NO_CSUM_OFFLOAD; } } - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,rxcsum", NULL); - if (p) { - switch (be32_to_cpup(p)) { + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,rxcsum", &value); + if (!ret) { + switch (value) { case 1: lp->csum_offload_on_rx_path = XAE_FEATURE_PARTIAL_RX_CSUM; @@ -1546,82 +1578,77 @@ static int axienet_of_probe(struct platform_device *op) } } /* For supporting jumbo frames, the Axi Ethernet hardware must have - * a larger Rx/Tx Memory. Typically, the size must be more than or - * equal to 16384 bytes, so that we can enable jumbo option and start - * supporting jumbo frames. Here we check for memory allocated for - * Rx/Tx in the hardware from the device-tree and accordingly set - * flags. */ - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,rxmem", NULL); - if (p) { - if ((be32_to_cpup(p)) >= 0x4000) - lp->jumbo_support = 1; - } - p = (__be32 *) of_get_property(op->dev.of_node, "xlnx,phy-type", NULL); - if (p) - lp->phy_type = be32_to_cpup(p); + * a larger Rx/Tx Memory. Typically, the size must be large so that + * we can enable jumbo option and start supporting jumbo frames. + * Here we check for memory allocated for Rx/Tx in the hardware from + * the device-tree and accordingly set flags. + */ + of_property_read_u32(pdev->dev.of_node, "xlnx,rxmem", &lp->rxmem); + of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &lp->phy_type); /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ - np = of_parse_phandle(op->dev.of_node, "axistream-connected", 0); - if (!np) { - dev_err(&op->dev, "could not find DMA node\n"); - ret = -ENODEV; - goto err_iounmap; + np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0); + if (IS_ERR(np)) { + dev_err(&pdev->dev, "could not find DMA node\n"); + ret = PTR_ERR(np); + goto free_netdev; } - lp->dma_regs = of_iomap(np, 0); - if (lp->dma_regs) { - dev_dbg(&op->dev, "MEM base: %p\n", lp->dma_regs); - } else { - dev_err(&op->dev, "unable to map DMA registers\n"); - of_node_put(np); + ret = of_address_to_resource(np, 0, &dmares); + if (ret) { + dev_err(&pdev->dev, "unable to get DMA resource\n"); + goto free_netdev; + } + lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); + if (!lp->dma_regs) { + dev_err(&pdev->dev, "could not map DMA regs\n"); + ret = -ENOMEM; + goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); lp->tx_irq = irq_of_parse_and_map(np, 0); of_node_put(np); if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) { - dev_err(&op->dev, "could not determine irqs\n"); + dev_err(&pdev->dev, "could not determine irqs\n"); ret = -ENOMEM; - goto err_iounmap_2; + goto free_netdev; } /* Retrieve the MAC address */ - addr = of_get_property(op->dev.of_node, "local-mac-address", &size); - if ((!addr) || (size != 6)) { - dev_err(&op->dev, "could not find MAC address\n"); - ret = -ENODEV; - goto err_iounmap_2; + ret = of_property_read_u8_array(pdev->dev.of_node, + "local-mac-address", mac_addr, 6); + if (ret) { + dev_err(&pdev->dev, "could not find MAC address\n"); + goto free_netdev; } - axienet_set_mac_address(ndev, (void *) addr); + axienet_set_mac_address(ndev, (void *)mac_addr); lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; - lp->phy_node = of_parse_phandle(op->dev.of_node, "phy-handle", 0); - ret = axienet_mdio_setup(lp, op->dev.of_node); - if (ret) - dev_warn(&op->dev, "error registering MDIO bus\n"); + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (lp->phy_node) { + ret = axienet_mdio_setup(lp, pdev->dev.of_node); + if (ret) + dev_warn(&pdev->dev, "error registering MDIO bus\n"); + } ret = register_netdev(lp->ndev); if (ret) { dev_err(lp->dev, "register_netdev() error (%i)\n", ret); - goto err_iounmap_2; + goto free_netdev; } return 0; -err_iounmap_2: - if (lp->dma_regs) - iounmap(lp->dma_regs); -err_iounmap: - iounmap(lp->regs); -nodev: +free_netdev: free_netdev(ndev); - ndev = NULL; + return ret; } -static int axienet_of_remove(struct platform_device *op) +static int axienet_remove(struct platform_device *pdev) { - struct net_device *ndev = platform_get_drvdata(op); + struct net_device *ndev = platform_get_drvdata(pdev); struct axienet_local *lp = netdev_priv(ndev); axienet_mdio_teardown(lp); @@ -1630,24 +1657,21 @@ static int axienet_of_remove(struct platform_device *op) of_node_put(lp->phy_node); lp->phy_node = NULL; - iounmap(lp->regs); - if (lp->dma_regs) - iounmap(lp->dma_regs); free_netdev(ndev); return 0; } -static struct platform_driver axienet_of_driver = { - .probe = axienet_of_probe, - .remove = axienet_of_remove, +static struct platform_driver axienet_driver = { + .probe = axienet_probe, + .remove = axienet_remove, .driver = { .name = "xilinx_axienet", .of_match_table = axienet_of_match, }, }; -module_platform_driver(axienet_of_driver); +module_platform_driver(axienet_driver); MODULE_DESCRIPTION("Xilinx Axi Ethernet driver"); MODULE_AUTHOR("Xilinx"); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 3b67d60d4378..2a5a16834c01 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -37,7 +37,7 @@ int axienet_mdio_wait_until_ready(struct axienet_local *lp) * @phy_id: Address of the PHY device * @reg: PHY register to read * - * returns: The register contents on success, -ETIMEDOUT on a timeout + * Return: The register contents on success, -ETIMEDOUT on a timeout * * Reads the contents of the requested register from the requested PHY * address by first writing the details into MCR register. After a while @@ -80,7 +80,7 @@ static int axienet_mdio_read(struct mii_bus *bus, int phy_id, int reg) * @reg: PHY register to write to * @val: Value to be written into the register * - * returns: 0 on success, -ETIMEDOUT on a timeout + * Return: 0 on success, -ETIMEDOUT on a timeout * * Writes the value to the requested register by first writing the value * into MWD register. The the MCR register is then appropriately setup @@ -119,7 +119,7 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg, * @lp: Pointer to axienet local data structure. * @np: Pointer to device node * - * returns: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when + * Return: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when * mdiobus_alloc (to allocate memory for mii bus structure) fails. * * Sets up the MDIO interface by initializing the MDIO clock and enabling the @@ -161,19 +161,19 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) np1 = of_find_node_by_name(NULL, "cpu"); if (!np1) { - printk(KERN_WARNING "%s(): Could not find CPU device node.", - __func__); - printk(KERN_WARNING "Setting MDIO clock divisor to " - "default %d\n", DEFAULT_CLOCK_DIVISOR); + netdev_warn(lp->ndev, "Could not find CPU device node.\n"); + netdev_warn(lp->ndev, + "Setting MDIO clock divisor to default %d\n", + DEFAULT_CLOCK_DIVISOR); clk_div = DEFAULT_CLOCK_DIVISOR; goto issue; } property_p = (u32 *) of_get_property(np1, "clock-frequency", NULL); if (!property_p) { - printk(KERN_WARNING "%s(): Could not find CPU property: " - "clock-frequency.", __func__); - printk(KERN_WARNING "Setting MDIO clock divisor to " - "default %d\n", DEFAULT_CLOCK_DIVISOR); + netdev_warn(lp->ndev, "clock-frequency property not found.\n"); + netdev_warn(lp->ndev, + "Setting MDIO clock divisor to default %d\n", + DEFAULT_CLOCK_DIVISOR); clk_div = DEFAULT_CLOCK_DIVISOR; of_node_put(np1); goto issue; @@ -183,12 +183,14 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1; /* If there is any remainder from the division of * fHOST / (MAX_MDIO_FREQ * 2), then we need to add - * 1 to the clock divisor or we will surely be above 2.5 MHz */ + * 1 to the clock divisor or we will surely be above 2.5 MHz + */ if (host_clock % (MAX_MDIO_FREQ * 2)) clk_div++; - printk(KERN_DEBUG "%s(): Setting MDIO clock divisor to %u based " - "on %u Hz host clock.\n", __func__, clk_div, host_clock); + netdev_dbg(lp->ndev, + "Setting MDIO clock divisor to %u/%u Hz host clock.\n", + clk_div, host_clock); of_node_put(np1); issue: diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ea091bc5ff09..b0249685139c 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -743,6 +743,7 @@ static inline int netvsc_send_pkt( u64 req_id; int ret; struct hv_page_buffer *pgbuf; + u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound); nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; if (packet->is_data_pkt) { @@ -769,32 +770,42 @@ static inline int netvsc_send_pkt( if (out_channel->rescind) return -ENODEV; + /* + * It is possible that once we successfully place this packet + * on the ringbuffer, we may stop the queue. In that case, we want + * to notify the host independent of the xmit_more flag. We don't + * need to be precise here; in the worst case we may signal the host + * unnecessarily. + */ + if (ring_avail < (RING_AVAIL_PERCENT_LOWATER + 1)) + packet->xmit_more = false; + if (packet->page_buf_cnt) { pgbuf = packet->cp_partial ? packet->page_buf + packet->rmsg_pgcnt : packet->page_buf; - ret = vmbus_sendpacket_pagebuffer(out_channel, - pgbuf, - packet->page_buf_cnt, - &nvmsg, - sizeof(struct nvsp_message), - req_id); + ret = vmbus_sendpacket_pagebuffer_ctl(out_channel, + pgbuf, + packet->page_buf_cnt, + &nvmsg, + sizeof(struct nvsp_message), + req_id, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED, + !packet->xmit_more); } else { - ret = vmbus_sendpacket( - out_channel, &nvmsg, - sizeof(struct nvsp_message), - req_id, - VM_PKT_DATA_INBAND, - VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + ret = vmbus_sendpacket_ctl(out_channel, &nvmsg, + sizeof(struct nvsp_message), + req_id, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED, + !packet->xmit_more); } if (ret == 0) { atomic_inc(&net_device->num_outstanding_sends); atomic_inc(&net_device->queue_sends[q_idx]); - if (hv_ringbuf_avail_percent(&out_channel->outbound) < - RING_AVAIL_PERCENT_LOWATER) { - netif_tx_stop_queue(netdev_get_tx_queue( - ndev, q_idx)); + if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { + netif_tx_stop_queue(netdev_get_tx_queue(ndev, q_idx)); if (atomic_read(&net_device-> queue_sends[q_idx]) < 1) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 54549a6223dd..953a97492fab 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -39,6 +39,8 @@ #define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS) #define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1) +#define IPVLAN_QBACKLOG_LIMIT 1000 + typedef enum { IPVL_IPV6 = 0, IPVL_ICMPV6, @@ -93,6 +95,8 @@ struct ipvl_port { struct hlist_head hlhead[IPVLAN_HASH_SIZE]; struct list_head ipvlans; struct rcu_head rcu; + struct work_struct wq; + struct sk_buff_head backlog; int count; u16 mode; }; @@ -112,6 +116,7 @@ void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval); void ipvlan_init_secret(void); unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); +void ipvlan_process_multicast(struct work_struct *work); int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index c30b5c300c05..8afbedad620d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -189,62 +189,69 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr) return hash & IPVLAN_MAC_FILTER_MASK; } -static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, - const struct ipvl_dev *in_dev, bool local) +void ipvlan_process_multicast(struct work_struct *work) { - struct ethhdr *eth = eth_hdr(skb); + struct ipvl_port *port = container_of(work, struct ipvl_port, wq); + struct ethhdr *ethh; struct ipvl_dev *ipvlan; - struct sk_buff *nskb; + struct sk_buff *skb, *nskb; + struct sk_buff_head list; unsigned int len; unsigned int mac_hash; int ret; + u8 pkt_type; + bool hlocal, dlocal; - if (skb->protocol == htons(ETH_P_PAUSE)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { - if (local && (ipvlan == in_dev)) - continue; + __skb_queue_head_init(&list); - mac_hash = ipvlan_mac_hash(eth->h_dest); - if (!test_bit(mac_hash, ipvlan->mac_filters)) - continue; + spin_lock_bh(&port->backlog.lock); + skb_queue_splice_tail_init(&port->backlog, &list); + spin_unlock_bh(&port->backlog.lock); - ret = NET_RX_DROP; - len = skb->len + ETH_HLEN; - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - goto mcast_acct; + while ((skb = __skb_dequeue(&list)) != NULL) { + ethh = eth_hdr(skb); + hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr); + mac_hash = ipvlan_mac_hash(ethh->h_dest); - if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; + if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) + pkt_type = PACKET_BROADCAST; else - nskb->pkt_type = PACKET_MULTICAST; - - nskb->dev = ipvlan->dev; - if (local) - ret = dev_forward_skb(ipvlan->dev, nskb); - else - ret = netif_rx(nskb); -mcast_acct: - ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); - } - rcu_read_unlock(); - - /* Locally generated? ...Forward a copy to the main-device as - * well. On the RX side we'll ignore it (wont give it to any - * of the virtual devices. - */ - if (local) { - nskb = skb_clone(skb, GFP_ATOMIC); - if (nskb) { - if (ether_addr_equal(eth->h_dest, port->dev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; + pkt_type = PACKET_MULTICAST; + + dlocal = false; + rcu_read_lock(); + list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { + if (hlocal && (ipvlan->dev == skb->dev)) { + dlocal = true; + continue; + } + if (!test_bit(mac_hash, ipvlan->mac_filters)) + continue; + + ret = NET_RX_DROP; + len = skb->len + ETH_HLEN; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + goto acct; + + nskb->pkt_type = pkt_type; + nskb->dev = ipvlan->dev; + if (hlocal) + ret = dev_forward_skb(ipvlan->dev, nskb); else - nskb->pkt_type = PACKET_MULTICAST; - - dev_forward_skb(port->dev, nskb); + ret = netif_rx(nskb); +acct: + ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); + } + rcu_read_unlock(); + + if (dlocal) { + /* If the packet originated here, send it out. */ + skb->dev = port->dev; + skb->pkt_type = pkt_type; + dev_queue_xmit(skb); + } else { + kfree_skb(skb); } } } @@ -446,6 +453,26 @@ out: return ret; } +static void ipvlan_multicast_enqueue(struct ipvl_port *port, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_PAUSE)) { + kfree_skb(skb); + return; + } + + spin_lock(&port->backlog.lock); + if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { + __skb_queue_tail(&port->backlog, skb); + spin_unlock(&port->backlog.lock); + schedule_work(&port->wq); + } else { + spin_unlock(&port->backlog.lock); + atomic_long_inc(&skb->dev->rx_dropped); + kfree_skb(skb); + } +} + static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -493,11 +520,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) return dev_forward_skb(ipvlan->phy_dev, skb); } else if (is_multicast_ether_addr(eth->h_dest)) { - u8 ip_summed = skb->ip_summed; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true); - skb->ip_summed = ip_summed; + ipvlan_multicast_enqueue(ipvlan->port, skb); + return NET_XMIT_SUCCESS; } skb->dev = ipvlan->phy_dev; @@ -581,8 +605,18 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, int addr_type; if (is_multicast_ether_addr(eth->h_dest)) { - if (ipvlan_external_frame(skb, port)) - ipvlan_multicast_frame(port, skb, NULL, false); + if (ipvlan_external_frame(skb, port)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + /* External frames are queued for device local + * distribution, but a copy is given to master + * straight away to avoid sending duplicates later + * when work-queue processes this frame. This is + * achieved by returning RX_HANDLER_PASS. + */ + if (nskb) + ipvlan_multicast_enqueue(port, nskb); + } } else { struct ipvl_addr *addr; diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 77b92a0fe557..1acc283160d9 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -54,6 +54,9 @@ static int ipvlan_port_create(struct net_device *dev) for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) INIT_HLIST_HEAD(&port->hlhead[idx]); + skb_queue_head_init(&port->backlog); + INIT_WORK(&port->wq, ipvlan_process_multicast); + err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); if (err) goto err; @@ -72,6 +75,8 @@ static void ipvlan_port_destroy(struct net_device *dev) dev->priv_flags &= ~IFF_IPVLAN_MASTER; netdev_rx_handler_unregister(dev); + cancel_work_sync(&port->wq); + __skb_queue_purge(&port->backlog); kfree_rcu(port, rcu); } @@ -213,17 +218,6 @@ static void ipvlan_change_rx_flags(struct net_device *dev, int change) dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); } -static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set) -{ - struct net_device *dev = ipvlan->dev; - unsigned int hashbit = ipvlan_mac_hash(dev->broadcast); - - if (set && !test_bit(hashbit, ipvlan->mac_filters)) - __set_bit(hashbit, ipvlan->mac_filters); - else if (!set && test_bit(hashbit, ipvlan->mac_filters)) - __clear_bit(hashbit, ipvlan->mac_filters); -} - static void ipvlan_set_multicast_mac_filter(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); @@ -238,6 +232,12 @@ static void ipvlan_set_multicast_mac_filter(struct net_device *dev) netdev_for_each_mc_addr(ha, dev) __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); + /* Turn-on broadcast bit irrespective of address family, + * since broadcast is deferred to a work-queue, hence no + * impact on fast-path processing. + */ + __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters); + bitmap_copy(ipvlan->mac_filters, mc_filters, IPVLAN_MAC_FILTER_SIZE); } @@ -705,7 +705,6 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) */ if (netif_running(ipvlan->dev)) ipvlan_ht_addr_add(ipvlan, addr); - ipvlan_set_broadcast_mac_filter(ipvlan, true); return 0; } @@ -722,8 +721,6 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) list_del(&addr->anode); ipvlan->ipv4cnt--; WARN_ON(ipvlan->ipv4cnt < 0); - if (!ipvlan->ipv4cnt) - ipvlan_set_broadcast_mac_filter(ipvlan, false); kfree_rcu(addr, rcu); return; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8c350c5d54ad..483afb19596c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -476,7 +476,7 @@ static int macvtap_open(struct inode *inode, struct file *file) err = -ENOMEM; q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &macvtap_proto); + &macvtap_proto, 0); if (!q) goto out; @@ -1006,6 +1006,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, unsigned int __user *up = argp; unsigned short u; int __user *sp = argp; + struct sockaddr sa; int s; int ret; @@ -1101,6 +1102,37 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return ret; + case SIOCGIFHWADDR: + rtnl_lock(); + vlan = macvtap_get_vlan(q); + if (!vlan) { + rtnl_unlock(); + return -ENOLINK; + } + ret = 0; + u = vlan->dev->type; + if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || + copy_to_user(&ifr->ifr_hwaddr.sa_data, vlan->dev->dev_addr, ETH_ALEN) || + put_user(u, &ifr->ifr_hwaddr.sa_family)) + ret = -EFAULT; + macvtap_put_vlan(vlan); + rtnl_unlock(); + return ret; + + case SIOCSIFHWADDR: + if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) + return -EFAULT; + rtnl_lock(); + vlan = macvtap_get_vlan(q); + if (!vlan) { + rtnl_unlock(); + return -ENOLINK; + } + ret = dev_set_mac_address(vlan->dev, &sa); + macvtap_put_vlan(vlan); + rtnl_unlock(); + return ret; + default: return -EINVAL; } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index b62a5e3a1c65..3837ae344f63 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -550,11 +550,11 @@ static struct proto pppoe_sk_proto __read_mostly = { * Initialize a new struct sock. * **********************************************************************/ -static int pppoe_create(struct net *net, struct socket *sock) +static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index 2940e9fe351b..0e1b30622477 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -118,7 +118,7 @@ static int pppox_create(struct net *net, struct socket *sock, int protocol, !try_module_get(pppox_protos[protocol]->owner)) goto out; - rc = pppox_protos[protocol]->create(net, sock); + rc = pppox_protos[protocol]->create(net, sock, kern); module_put(pppox_protos[protocol]->owner); out: diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index e3bfbd4d0136..14839bc0aaf5 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -561,14 +561,14 @@ static void pptp_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); } -static int pptp_create(struct net *net, struct socket *sock) +static int pptp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; struct pppox_sock *po; struct pptp_opt *opt; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto, kern); if (!sk) goto out; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6928448f6b7f..1ec035a53c3d 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1924,7 +1924,7 @@ static netdev_features_t team_fix_features(struct net_device *dev, struct team *team = netdev_priv(dev); netdev_features_t mask; - mask = features | NETIF_F_HW_SWITCH_OFFLOAD; + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; @@ -1977,8 +1977,9 @@ static const struct net_device_ops team_netdev_ops = { .ndo_del_slave = team_del_slave, .ndo_fix_features = team_fix_features, .ndo_change_carrier = team_change_carrier, - .ndo_bridge_setlink = ndo_dflt_netdev_switch_port_bridge_setlink, - .ndo_bridge_dellink = ndo_dflt_netdev_switch_port_bridge_dellink, + .ndo_bridge_setlink = switchdev_port_bridge_setlink, + .ndo_bridge_getlink = switchdev_port_bridge_getlink, + .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_features_check = passthru_features_check, }; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e470ae59d405..1a1c4f7b3ec5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -146,7 +146,6 @@ struct tun_file { struct socket socket; struct socket_wq wq; struct tun_struct __rcu *tun; - struct net *net; struct fasync_struct *fasync; /* only used for fasnyc */ unsigned int flags; @@ -493,10 +492,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - - BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, - &tfile->socket.flags)); - sk_release_kernel(&tfile->sk); + sock_put(&tfile->sk); } } @@ -1492,18 +1488,10 @@ out: return ret; } -static int tun_release(struct socket *sock) -{ - if (sock->sk) - sock_put(sock->sk); - return 0; -} - /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tun_socket_ops = { .sendmsg = tun_sendmsg, .recvmsg = tun_recvmsg, - .release = tun_release, }; static struct proto tun_proto = { @@ -1865,7 +1853,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (cmd == TUNSETIFF && !tun) { ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ret = tun_set_iff(tfile->net, file, &ifr); + ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); if (ret) goto unlock; @@ -2154,16 +2142,16 @@ out: static int tun_chr_open(struct inode *inode, struct file * file) { + struct net *net = current->nsproxy->net_ns; struct tun_file *tfile; DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, - &tun_proto); + tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &tun_proto, 0); if (!tfile) return -ENOMEM; RCU_INIT_POINTER(tfile->tun, NULL); - tfile->net = get_net(current->nsproxy->net_ns); tfile->flags = 0; tfile->ifindex = 0; @@ -2174,13 +2162,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.ops = &tun_socket_ops; sock_init_data(&tfile->socket, &tfile->sk); - sk_change_net(&tfile->sk, tfile->net); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; file->private_data = tfile; - set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); INIT_LIST_HEAD(&tfile->next); sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); @@ -2191,10 +2177,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; - struct net *net = tfile->net; tun_detach(tfile, true); - put_net(net); return 0; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 27a5f954f8e9..48341ae49012 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -336,7 +336,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (!net_eq(dev_net(vxlan->dev), vxlan->net) && nla_put_s32(skb, NDA_LINK_NETNSID, - peernet2id(dev_net(vxlan->dev), vxlan->net))) + peernet2id_alloc(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) @@ -1921,6 +1921,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_oif = rdst->remote_ifindex; fl4.flowi4_tos = RT_TOS(tos); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; fl4.daddr = dst->sin.sin_addr.s_addr; fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr; @@ -1981,6 +1983,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, fl6.flowi6_oif = rdst->remote_ifindex; fl6.daddr = dst->sin6.sin6_addr; fl6.saddr = vxlan->saddr.sin6.sin6_addr; + fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; if (ipv6_stub->ipv6_dst_lookup(sk, &ndst, &fl6)) { diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index f07a61899545..413528295d72 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1353,12 +1353,7 @@ static void adm8211_configure_filter(struct ieee80211_hw *dev, new_flags = 0; - if (*total_flags & FIF_PROMISC_IN_BSS) { - new_flags |= FIF_PROMISC_IN_BSS; - priv->nar |= ADM8211_NAR_PR; - priv->nar &= ~ADM8211_NAR_MM; - mc_filter[1] = mc_filter[0] = ~0; - } else if (*total_flags & FIF_ALLMULTI || multicast == ~(0ULL)) { + if (*total_flags & FIF_ALLMULTI || multicast == ~(0ULL)) { new_flags |= FIF_ALLMULTI; priv->nar &= ~ADM8211_NAR_PR; priv->nar |= ADM8211_NAR_MM; diff --git a/drivers/net/wireless/at76c50x-usb.h b/drivers/net/wireless/at76c50x-usb.h index 55090a38ac95..ae03271f878e 100644 --- a/drivers/net/wireless/at76c50x-usb.h +++ b/drivers/net/wireless/at76c50x-usb.h @@ -447,7 +447,7 @@ struct at76_priv { int mac80211_registered; }; -#define AT76_SUPPORTED_FILTERS FIF_PROMISC_IN_BSS +#define AT76_SUPPORTED_FILTERS 0 #define SCAN_POLL_INTERVAL (HZ / 4) diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 5147ebe4cd05..14937cbeca56 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1319,8 +1319,7 @@ out_unlock: } -#define AR5523_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define AR5523_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_OTHER_BSS) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 973485bd4121..fcd08b2f8d26 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -773,7 +773,6 @@ static int ath10k_monitor_recalc(struct ath10k *ar) lockdep_assert_held(&ar->conf_mutex); should_start = ar->monitor || - ar->filter_flags & FIF_PROMISC_IN_BSS || test_bit(ATH10K_CAC_RUNNING, &ar->dev_flags); ath10k_dbg(ar, ATH10K_DBG_MAC, @@ -3493,8 +3492,7 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, * FIXME: Has to be verified. */ #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ @@ -5500,7 +5498,8 @@ int ath10k_mac_register(struct ath10k *ar) IEEE80211_HW_HAS_RATE_CONTROL | IEEE80211_HW_AP_LINK_PS | IEEE80211_HW_SPECTRUM_MGMT | - IEEE80211_HW_SW_CRYPTO_CONTROL; + IEEE80211_HW_SW_CRYPTO_CONTROL | + IEEE80211_HW_SUPPORT_FAST_XMIT; ar->hw->wiphy->features |= NL80211_FEATURE_STATIC_SMPS; diff --git a/drivers/net/wireless/ath/ath5k/ath5k.h b/drivers/net/wireless/ath/ath5k/ath5k.h index 7ca0d6f930fd..e22b0e778927 100644 --- a/drivers/net/wireless/ath/ath5k/ath5k.h +++ b/drivers/net/wireless/ath/ath5k/ath5k.h @@ -1280,7 +1280,6 @@ struct ath5k_hw { DECLARE_BITMAP(status, 4); #define ATH_STAT_INVALID 0 /* disable hardware accesses */ -#define ATH_STAT_PROMISC 1 #define ATH_STAT_LEDSOFT 2 /* enable LED gpio status */ #define ATH_STAT_STARTED 3 /* opened & irqs enabled */ #define ATH_STAT_RESET 4 /* hw reset */ diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c index ca4b7ccd697f..803030fd17d3 100644 --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -124,7 +124,7 @@ ath5k_led_brightness_set(struct led_classdev *led_dev, static int ath5k_register_led(struct ath5k_hw *ah, struct ath5k_led *led, - const char *name, char *trigger) + const char *name, const char *trigger) { int err; diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c index 3b4a6463d87a..dc44cfef7517 100644 --- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c +++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c @@ -369,7 +369,7 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, u64 multicast) { #define SUPPORTED_FIF_FLAGS \ - (FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | \ + (FIF_ALLMULTI | FIF_FCSFAIL | \ FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | \ FIF_BCN_PRBRESP_PROMISC) @@ -393,16 +393,6 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, (AR5K_RX_FILTER_UCAST | AR5K_RX_FILTER_BCAST | AR5K_RX_FILTER_MCAST); - if (changed_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) { - if (*new_flags & FIF_PROMISC_IN_BSS) - __set_bit(ATH_STAT_PROMISC, ah->status); - else - __clear_bit(ATH_STAT_PROMISC, ah->status); - } - - if (test_bit(ATH_STAT_PROMISC, ah->status)) - rfilt |= AR5K_RX_FILTER_PROM; - /* Note, AR5K_RX_FILTER_MCAST is already enabled */ if (*new_flags & FIF_ALLMULTI) { mfilt[0] = ~0; @@ -418,8 +408,7 @@ ath5k_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, if ((*new_flags & FIF_BCN_PRBRESP_PROMISC) || (ah->nvifs > 1)) rfilt |= AR5K_RX_FILTER_BEACON; - /* FIF_CONTROL doc says that if FIF_PROMISC_IN_BSS is not - * set we should only pass on control frames for this + /* FIF_CONTROL doc says we should only pass on control frames for this * station. This needs testing. I believe right now this * enables *all* control frames, which is OK.. but * but we should see if we can improve on granularity */ @@ -809,7 +798,6 @@ const struct ieee80211_ops ath5k_hw_ops = { .sw_scan_start = ath5k_sw_scan_start, .sw_scan_complete = ath5k_sw_scan_complete, .get_stats = ath5k_get_stats, - /* .get_tkip_seq = not implemented */ /* .set_frag_threshold = not implemented */ /* .set_rts_threshold = not implemented */ /* .sta_add = not implemented */ diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 564923c0df87..b71f3072fd9a 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1238,8 +1238,7 @@ out: } #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index a0f58e2aa553..cc9648f844ae 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -872,14 +872,7 @@ u32 ath9k_htc_calcrxfilter(struct ath9k_htc_priv *priv) if (priv->rxfilter & FIF_PROBE_REQ) rfilt |= ATH9K_RX_FILTER_PROBEREQ; - /* - * Set promiscuous mode when FIF_PROMISC_IN_BSS is enabled for station - * mode interface or when in monitor mode. AP mode does not need this - * since it receives all in-BSS frames anyway. - */ - if (((ah->opmode != NL80211_IFTYPE_AP) && - (priv->rxfilter & FIF_PROMISC_IN_BSS)) || - ah->is_monitoring) + if (ah->is_monitoring) rfilt |= ATH9K_RX_FILTER_PROM; if (priv->rxfilter & FIF_CONTROL) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b0badef71ce7..d285e3a89853 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1442,8 +1442,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) } #define SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ + (FIF_ALLMULTI | \ FIF_CONTROL | \ FIF_PSPOLL | \ FIF_OTHER_BSS | \ diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 6fb40ef86fd6..6c75fb1ab77d 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -392,11 +392,6 @@ u32 ath_calcrxfilter(struct ath_softc *sc) if (sc->cur_chan->rxfilter & FIF_PROBE_REQ) rfilt |= ATH9K_RX_FILTER_PROBEREQ; - /* - * Set promiscuous mode when FIF_PROMISC_IN_BSS is enabled for station - * mode interface or when in monitor mode. AP mode does not need this - * since it receives all in-BSS frames anyway. - */ if (sc->sc_ah->is_monitoring) rfilt |= ATH9K_RX_FILTER_PROM; diff --git a/drivers/net/wireless/ath/carl9170/fw.c b/drivers/net/wireless/ath/carl9170/fw.c index 47d5c2e910ad..020cd46471f5 100644 --- a/drivers/net/wireless/ath/carl9170/fw.c +++ b/drivers/net/wireless/ath/carl9170/fw.c @@ -310,8 +310,7 @@ static int carl9170_fw(struct ar9170 *ar, const __u8 *data, size_t len) if (SUPP(CARL9170FW_RX_FILTER)) { ar->fw.rx_filter = true; ar->rx_filter_caps = FIF_FCSFAIL | FIF_PLCPFAIL | - FIF_CONTROL | FIF_PSPOLL | FIF_OTHER_BSS | - FIF_PROMISC_IN_BSS; + FIF_CONTROL | FIF_PSPOLL | FIF_OTHER_BSS; } if (SUPP(CARL9170FW_HW_COUNTERS)) diff --git a/drivers/net/wireless/ath/carl9170/led.c b/drivers/net/wireless/ath/carl9170/led.c index 78dadc797558..2c74425f5059 100644 --- a/drivers/net/wireless/ath/carl9170/led.c +++ b/drivers/net/wireless/ath/carl9170/led.c @@ -122,7 +122,7 @@ static void carl9170_led_set_brightness(struct led_classdev *led, } static int carl9170_led_register_led(struct ar9170 *ar, int i, char *name, - char *trigger) + const char *trigger) { int err; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index f1455a04cb62..59db6732d4e3 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1011,9 +1011,8 @@ static void carl9170_op_configure_filter(struct ieee80211_hw *hw, if (multicast != ar->cur_mc_hash) WARN_ON(carl9170_update_multicast(ar, multicast)); - if (changed_flags & (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS)) { - ar->sniffer_enabled = !!(*new_flags & - (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS)); + if (changed_flags & FIF_OTHER_BSS) { + ar->sniffer_enabled = !!(*new_flags & FIF_OTHER_BSS); WARN_ON(carl9170_set_operating_mode(ar)); } @@ -1033,7 +1032,7 @@ static void carl9170_op_configure_filter(struct ieee80211_hw *hw, if (!(*new_flags & FIF_PSPOLL)) rx_filter |= CARL9170_RX_FILTER_CTL_PSPOLL; - if (!(*new_flags & (FIF_OTHER_BSS | FIF_PROMISC_IN_BSS))) { + if (!(*new_flags & FIF_OTHER_BSS)) { rx_filter |= CARL9170_RX_FILTER_OTHER_RA; rx_filter |= CARL9170_RX_FILTER_DECRY_FAIL; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index b2f9521fe551..f40992969b4a 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3131,8 +3131,6 @@ static void b43_adjust_opmode(struct b43_wldev *dev) ctl |= B43_MACCTL_KEEP_BAD; if (wl->filter_flags & FIF_PLCPFAIL) ctl |= B43_MACCTL_KEEP_BADPLCP; - if (wl->filter_flags & FIF_PROMISC_IN_BSS) - ctl |= B43_MACCTL_PROMISC; if (wl->filter_flags & FIF_BCN_PRBRESP_PROMISC) ctl |= B43_MACCTL_BEACPROMISC; @@ -4310,16 +4308,14 @@ static void b43_op_configure_filter(struct ieee80211_hw *hw, goto out_unlock; } - *fflags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + *fflags &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - changed &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + changed &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index c77b7f59505c..39d49d6cd07f 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2055,8 +2055,6 @@ static void b43legacy_adjust_opmode(struct b43legacy_wldev *dev) ctl |= B43legacy_MACCTL_KEEP_BAD; if (wl->filter_flags & FIF_PLCPFAIL) ctl |= B43legacy_MACCTL_KEEP_BADPLCP; - if (wl->filter_flags & FIF_PROMISC_IN_BSS) - ctl |= B43legacy_MACCTL_PROMISC; if (wl->filter_flags & FIF_BCN_PRBRESP_PROMISC) ctl |= B43legacy_MACCTL_BEACPROMISC; @@ -2922,16 +2920,14 @@ static void b43legacy_op_configure_filter(struct ieee80211_hw *hw, } spin_lock_irqsave(&wl->irq_lock, flags); - *fflags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + *fflags &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; - changed &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | + changed &= FIF_ALLMULTI | FIF_FCSFAIL | FIF_PLCPFAIL | FIF_CONTROL | diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c index 48135063347e..b46cab250615 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c @@ -41,8 +41,7 @@ #define BRCMS_FLUSH_TIMEOUT 500 /* msec */ /* Flags we support */ -#define MAC_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define MAC_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_CONTROL | \ FIF_OTHER_BSS | \ @@ -743,8 +742,6 @@ brcms_ops_configure_filter(struct ieee80211_hw *hw, changed_flags &= MAC_FILTERS; *total_flags &= MAC_FILTERS; - if (changed_flags & FIF_PROMISC_IN_BSS) - brcms_dbg_info(core, "FIF_PROMISC_IN_BSS\n"); if (changed_flags & FIF_ALLMULTI) brcms_dbg_info(core, "FIF_ALLMULTI\n"); if (changed_flags & FIF_FCSFAIL) diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index 369527e27689..9728be0e704b 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -3571,7 +3571,7 @@ void brcms_c_mac_promisc(struct brcms_c_info *wlc, uint filter_flags) wlc->filter_flags = filter_flags; - if (filter_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) + if (filter_flags & FIF_OTHER_BSS) promisc_bits |= MCTL_PROMISC; if (filter_flags & FIF_BCN_PRBRESP_PROMISC) diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c index b0f65fa09428..b86500b4418f 100644 --- a/drivers/net/wireless/cw1200/sta.c +++ b/drivers/net/wireless/cw1200/sta.c @@ -578,13 +578,11 @@ void cw1200_configure_filter(struct ieee80211_hw *dev, { struct cw1200_common *priv = dev->priv; bool listening = !!(*total_flags & - (FIF_PROMISC_IN_BSS | - FIF_OTHER_BSS | + (FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC | FIF_PROBE_REQ)); - *total_flags &= FIF_PROMISC_IN_BSS | - FIF_OTHER_BSS | + *total_flags &= FIF_OTHER_BSS | FIF_FCSFAIL | FIF_BCN_PRBRESP_PROMISC | FIF_PROBE_REQ; @@ -592,14 +590,12 @@ void cw1200_configure_filter(struct ieee80211_hw *dev, down(&priv->scan.lock); mutex_lock(&priv->conf_mutex); - priv->rx_filter.promiscuous = (*total_flags & FIF_PROMISC_IN_BSS) - ? 1 : 0; + priv->rx_filter.promiscuous = 0; priv->rx_filter.bssid = (*total_flags & (FIF_OTHER_BSS | FIF_PROBE_REQ)) ? 1 : 0; priv->rx_filter.fcs = (*total_flags & FIF_FCSFAIL) ? 1 : 0; priv->disable_beacon_filter = !(*total_flags & (FIF_BCN_PRBRESP_PROMISC | - FIF_PROMISC_IN_BSS | FIF_PROBE_REQ)); if (priv->listening != listening) { priv->listening = listening; diff --git a/drivers/net/wireless/iwlegacy/3945-mac.c b/drivers/net/wireless/iwlegacy/3945-mac.c index e5665804d986..189cdf58084b 100644 --- a/drivers/net/wireless/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/iwlegacy/3945-mac.c @@ -3048,7 +3048,7 @@ il3945_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, D_MAC80211("Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -3074,7 +3074,7 @@ il3945_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, * filters into the device. */ *total_flags &= - FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c index 976f65fe9c38..e4b175cbeefd 100644 --- a/drivers/net/wireless/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/iwlegacy/4965-mac.c @@ -6166,7 +6166,7 @@ il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, D_MAC80211("Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); /* Setting _just_ RXON_FILTER_CTL2HOST_MSK causes FH errors */ CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_PROMISC_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -6192,7 +6192,7 @@ il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, * filters into the device. */ *total_flags &= - FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 5abd62ed8cb4..ba7fc42edf97 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1061,7 +1061,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, IWL_DEBUG_MAC80211(priv, "Enter: changed: 0x%x, total: 0x%x\n", changed_flags, *total_flags); - CHK(FIF_OTHER_BSS | FIF_PROMISC_IN_BSS, RXON_FILTER_PROMISC_MSK); + CHK(FIF_OTHER_BSS, RXON_FILTER_PROMISC_MSK); /* Setting _just_ RXON_FILTER_CTL2HOST_MSK causes FH errors */ CHK(FIF_CONTROL, RXON_FILTER_CTL2HOST_MSK | RXON_FILTER_PROMISC_MSK); CHK(FIF_BCN_PRBRESP_PROMISC, RXON_FILTER_BCON_AWARE_MSK); @@ -1088,7 +1088,7 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw, * since we currently do not support programming multicast * filters into the device. */ - *total_flags &= FIF_OTHER_BSS | FIF_ALLMULTI | FIF_PROMISC_IN_BSS | + *total_flags &= FIF_OTHER_BSS | FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL; } @@ -1140,7 +1140,6 @@ static void iwlagn_mac_event_callback(struct ieee80211_hw *hw, return; IWL_DEBUG_MAC80211(priv, "enter\n"); - mutex_lock(&priv->mutex); if (priv->lib->bt_params && priv->lib->bt_params->advanced_bt_coexist) { @@ -1149,13 +1148,12 @@ static void iwlagn_mac_event_callback(struct ieee80211_hw *hw, else if (event->u.rssi.data == RSSI_EVENT_HIGH) priv->bt_enable_pspoll = false; - iwlagn_send_advance_bt_config(priv); + queue_work(priv->workqueue, &priv->bt_runtime_config); } else { IWL_DEBUG_MAC80211(priv, "Advanced BT coex disabled," "ignoring RSSI callback\n"); } - mutex_unlock(&priv->mutex); IWL_DEBUG_MAC80211(priv, "leave\n"); } diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c index 83903a5025c2..0b5a81d52a3e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c @@ -248,7 +248,7 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, */ if ((nvm_flags & NVM_CHANNEL_GO_CONCURRENT) && (flags & IEEE80211_CHAN_NO_IR)) - flags |= IEEE80211_CHAN_GO_CONCURRENT; + flags |= IEEE80211_CHAN_IR_CONCURRENT; return flags; } diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index ed02e4bf2c26..1bdf18674fb8 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -439,7 +439,7 @@ static u64 lbtf_op_prepare_multicast(struct ieee80211_hw *hw, return mc_count; } -#define SUPPORTED_FIF_FLAGS (FIF_PROMISC_IN_BSS | FIF_ALLMULTI) +#define SUPPORTED_FIF_FLAGS FIF_ALLMULTI static void lbtf_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *new_flags, @@ -458,10 +458,7 @@ static void lbtf_op_configure_filter(struct ieee80211_hw *hw, return; } - if (*new_flags & (FIF_PROMISC_IN_BSS)) - priv->mac_control |= CMD_ACT_MAC_PROMISCUOUS_ENABLE; - else - priv->mac_control &= ~CMD_ACT_MAC_PROMISCUOUS_ENABLE; + priv->mac_control &= ~CMD_ACT_MAC_PROMISCUOUS_ENABLE; if (*new_flags & (FIF_ALLMULTI) || multicast > MRVDRV_MAX_MULTICAST_LIST_SIZE) { priv->mac_control |= CMD_ACT_MAC_ALL_MULTICAST_ENABLE; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index d5c0a1af08b9..8d2f6bbf9598 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1554,8 +1554,6 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw, wiphy_debug(hw->wiphy, "%s\n", __func__); data->rx_filter = 0; - if (*total_flags & FIF_PROMISC_IN_BSS) - data->rx_filter |= FIF_PROMISC_IN_BSS; if (*total_flags & FIF_ALLMULTI) data->rx_filter |= FIF_ALLMULTI; @@ -2399,7 +2397,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, IEEE80211_HW_WANT_MONITOR_VIF | IEEE80211_HW_QUEUE_CONTROL | IEEE80211_HW_SUPPORTS_HT_CCK_RATES | - IEEE80211_HW_CHANCTX_STA_CSA; + IEEE80211_HW_CHANCTX_STA_CSA | + IEEE80211_HW_SUPPORT_FAST_XMIT; if (rctbl) hw->flags |= IEEE80211_HW_SUPPORTS_RC_TABLE; @@ -2438,6 +2437,31 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, sband->n_channels = ARRAY_SIZE(hwsim_channels_5ghz); sband->bitrates = data->rates + 4; sband->n_bitrates = ARRAY_SIZE(hwsim_rates) - 4; + + sband->vht_cap.vht_supported = true; + sband->vht_cap.cap = + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160 | + IEEE80211_VHT_CAP_TXSTBC | + IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_2 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; + sband->vht_cap.vht_mcs.rx_mcs_map = + cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 2 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 6 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 8 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | + IEEE80211_VHT_MCS_SUPPORT_0_9 << 14); + sband->vht_cap.vht_mcs.tx_mcs_map = + sband->vht_cap.vht_mcs.rx_mcs_map; break; default: continue; @@ -2458,31 +2482,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, sband->ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; hw->wiphy->bands[band] = sband; - - sband->vht_cap.vht_supported = true; - sband->vht_cap.cap = - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | - IEEE80211_VHT_CAP_RXLDPC | - IEEE80211_VHT_CAP_SHORT_GI_80 | - IEEE80211_VHT_CAP_SHORT_GI_160 | - IEEE80211_VHT_CAP_TXSTBC | - IEEE80211_VHT_CAP_RXSTBC_1 | - IEEE80211_VHT_CAP_RXSTBC_2 | - IEEE80211_VHT_CAP_RXSTBC_3 | - IEEE80211_VHT_CAP_RXSTBC_4 | - IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; - sband->vht_cap.vht_mcs.rx_mcs_map = - cpu_to_le16(IEEE80211_VHT_MCS_SUPPORT_0_8 << 0 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 2 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 4 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 6 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 8 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 10 | - IEEE80211_VHT_MCS_SUPPORT_0_9 << 12 | - IEEE80211_VHT_MCS_SUPPORT_0_8 << 14); - sband->vht_cap.vht_mcs.tx_mcs_map = - sband->vht_cap.vht_mcs.rx_mcs_map; } /* By default all radios belong to the first group */ diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 95921167b53f..b71fc74d14ab 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -5192,7 +5192,7 @@ mwl8k_configure_filter_sniffer(struct ieee80211_hw *hw, priv->sniffer_enabled = true; } - *total_flags &= FIF_PROMISC_IN_BSS | FIF_ALLMULTI | + *total_flags &= FIF_ALLMULTI | FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL | FIF_OTHER_BSS; diff --git a/drivers/net/wireless/p54/fwio.c b/drivers/net/wireless/p54/fwio.c index 275408eaf95e..257a9eadd595 100644 --- a/drivers/net/wireless/p54/fwio.c +++ b/drivers/net/wireless/p54/fwio.c @@ -351,8 +351,7 @@ int p54_setup_mac(struct p54_common *priv) * "TRANSPARENT and PROMISCUOUS are mutually exclusive" * STSW45X0C LMAC API - page 12 */ - if (((priv->filter_flags & FIF_PROMISC_IN_BSS) || - (priv->filter_flags & FIF_OTHER_BSS)) && + if (priv->filter_flags & FIF_OTHER_BSS && (mode != P54_FILTER_TYPE_PROMISCUOUS)) mode |= P54_FILTER_TYPE_TRANSPARENT; } else { diff --git a/drivers/net/wireless/p54/led.c b/drivers/net/wireless/p54/led.c index 1f6fd5ff5531..9a8fedd3c0f5 100644 --- a/drivers/net/wireless/p54/led.c +++ b/drivers/net/wireless/p54/led.c @@ -83,7 +83,7 @@ static void p54_led_brightness_set(struct led_classdev *led_dev, static int p54_register_led(struct p54_common *priv, unsigned int led_index, - char *name, char *trigger) + char *name, const char *trigger) { struct p54_led_dev *led = &priv->leds[led_index]; int err; diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index e79674f73dc5..2947ad21053c 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -395,13 +395,11 @@ static void p54_configure_filter(struct ieee80211_hw *dev, { struct p54_common *priv = dev->priv; - *total_flags &= FIF_PROMISC_IN_BSS | - FIF_ALLMULTI | - FIF_OTHER_BSS; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS; priv->filter_flags = *total_flags; - if (changed_flags & (FIF_PROMISC_IN_BSS | FIF_OTHER_BSS)) + if (changed_flags & FIF_OTHER_BSS) p54_setup_mac(priv); if (changed_flags & FIF_ALLMULTI || multicast) diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c index bdf5590ba304..7da138892026 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/rt2x00/rt2400pci.c @@ -273,10 +273,8 @@ static void rt2400pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RXCSR0_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, RXCSR0_DROP_VERSION_ERROR, 1); rt2x00mmio_register_write(rt2x00dev, RXCSR0, reg); diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index 79f4fe65a119..4ea53aa9ede3 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -274,10 +274,8 @@ static void rt2500pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, RXCSR0_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RXCSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RXCSR0_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, RXCSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, RXCSR0_DROP_MCAST, diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 05c64597838d..237bbb54c7a8 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -434,10 +434,8 @@ static void rt2500usb_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field16(®, TXRX_CSR2_DROP_CONTROL, !(filter_flags & FIF_CONTROL)); - rt2x00_set_field16(®, TXRX_CSR2_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field16(®, TXRX_CSR2_DROP_NOT_TO_ME, 1); rt2x00_set_field16(®, TXRX_CSR2_DROP_TODS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field16(®, TXRX_CSR2_DROP_VERSION_ERROR, 1); rt2x00_set_field16(®, TXRX_CSR2_DROP_MULTICAST, diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index be2d54f257b1..dfeca8355b22 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -1513,8 +1513,7 @@ void rt2800_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_FCSFAIL)); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_PHY_ERROR, !(filter_flags & FIF_PLCPFAIL)); - rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_NOT_MY_BSSD, 0); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_VER_ERROR, 1); rt2x00_set_field32(®, RX_FILTER_CFG_DROP_MULTICAST, @@ -7818,21 +7817,25 @@ EXPORT_SYMBOL_GPL(rt2800_probe_hw); /* * IEEE80211 stack callback functions. */ -void rt2800_get_tkip_seq(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, - u16 *iv16) +void rt2800_get_key_seq(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq) { struct rt2x00_dev *rt2x00dev = hw->priv; struct mac_iveiv_entry iveiv_entry; u32 offset; - offset = MAC_IVEIV_ENTRY(hw_key_idx); + if (key->cipher != WLAN_CIPHER_SUITE_TKIP) + return; + + offset = MAC_IVEIV_ENTRY(key->hw_key_idx); rt2800_register_multiread(rt2x00dev, offset, &iveiv_entry, sizeof(iveiv_entry)); - memcpy(iv16, &iveiv_entry.iv[0], sizeof(*iv16)); - memcpy(iv32, &iveiv_entry.iv[4], sizeof(*iv32)); + memcpy(&seq->tkip.iv16, &iveiv_entry.iv[0], 2); + memcpy(&seq->tkip.iv32, &iveiv_entry.iv[4], 4); } -EXPORT_SYMBOL_GPL(rt2800_get_tkip_seq); +EXPORT_SYMBOL_GPL(rt2800_get_key_seq); int rt2800_set_rts_threshold(struct ieee80211_hw *hw, u32 value) { diff --git a/drivers/net/wireless/rt2x00/rt2800lib.h b/drivers/net/wireless/rt2x00/rt2800lib.h index 3019db637a4b..1609b8a7f7eb 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.h +++ b/drivers/net/wireless/rt2x00/rt2800lib.h @@ -209,8 +209,9 @@ int rt2800_read_eeprom_efuse(struct rt2x00_dev *rt2x00dev); int rt2800_probe_hw(struct rt2x00_dev *rt2x00dev); -void rt2800_get_tkip_seq(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, - u16 *iv16); +void rt2800_get_key_seq(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq); int rt2800_set_rts_threshold(struct ieee80211_hw *hw, u32 value); int rt2800_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue_idx, diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c index cc1b3cc73c5a..0af22573a2eb 100644 --- a/drivers/net/wireless/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/rt2x00/rt2800pci.c @@ -309,7 +309,7 @@ static const struct ieee80211_ops rt2800pci_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2800soc.c b/drivers/net/wireless/rt2x00/rt2800soc.c index aaa7aa4cad9d..a985a5a7945e 100644 --- a/drivers/net/wireless/rt2x00/rt2800soc.c +++ b/drivers/net/wireless/rt2x00/rt2800soc.c @@ -148,7 +148,7 @@ static const struct ieee80211_ops rt2800soc_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 6ec2466b52b6..5932306084fd 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -835,7 +835,7 @@ static const struct ieee80211_ops rt2800usb_mac80211_ops = { .sw_scan_start = rt2x00mac_sw_scan_start, .sw_scan_complete = rt2x00mac_sw_scan_complete, .get_stats = rt2x00mac_get_stats, - .get_tkip_seq = rt2800_get_tkip_seq, + .get_key_seq = rt2800_get_key_seq, .set_rts_threshold = rt2800_set_rts_threshold, .sta_add = rt2x00mac_sta_add, .sta_remove = rt2x00mac_sta_remove, diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 300876df056f..1b8a459a412b 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -359,8 +359,7 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw, FIF_PLCPFAIL | FIF_CONTROL | FIF_PSPOLL | - FIF_OTHER_BSS | - FIF_PROMISC_IN_BSS; + FIF_OTHER_BSS; /* * Apply some rules to the filters: @@ -369,9 +368,6 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw, * - Multicast filter seems to kill broadcast traffic so never use it. */ *total_flags |= FIF_ALLMULTI; - if (*total_flags & FIF_OTHER_BSS || - *total_flags & FIF_PROMISC_IN_BSS) - *total_flags |= FIF_PROMISC_IN_BSS | FIF_OTHER_BSS; /* * If the device has a single filter for all control frames, diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 819455009fe4..c8a967247a9a 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -530,10 +530,8 @@ static void rt61pci_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, TXRX_CSR0_DROP_CONTROL, !(filter_flags & (FIF_CONTROL | FIF_PSPOLL))); - rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_TO_DS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, TXRX_CSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_MULTICAST, diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index a5458cf01fb2..65ce3afb888a 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -480,10 +480,8 @@ static void rt73usb_config_filter(struct rt2x00_dev *rt2x00dev, !(filter_flags & FIF_PLCPFAIL)); rt2x00_set_field32(®, TXRX_CSR0_DROP_CONTROL, !(filter_flags & (FIF_CONTROL | FIF_PSPOLL))); - rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, - !(filter_flags & FIF_PROMISC_IN_BSS)); + rt2x00_set_field32(®, TXRX_CSR0_DROP_NOT_TO_ME, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_TO_DS, - !(filter_flags & FIF_PROMISC_IN_BSS) && !rt2x00dev->intf_ap_count); rt2x00_set_field32(®, TXRX_CSR0_DROP_VERSION_ERROR, 1); rt2x00_set_field32(®, TXRX_CSR0_DROP_MULTICAST, diff --git a/drivers/net/wireless/rtlwifi/core.h b/drivers/net/wireless/rtlwifi/core.h index 82733c6b8c46..782ac2fc4b28 100644 --- a/drivers/net/wireless/rtlwifi/core.h +++ b/drivers/net/wireless/rtlwifi/core.h @@ -27,8 +27,7 @@ #define __RTL_CORE_H__ #define RTL_SUPPORTED_FILTERS \ - (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | FIF_CONTROL | \ + (FIF_ALLMULTI | FIF_CONTROL | \ FIF_OTHER_BSS | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC) diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 5d54d16a59e7..f238ee54226c 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -763,8 +763,7 @@ static u64 wl1251_op_prepare_multicast(struct ieee80211_hw *hw, return (u64)(unsigned long)fp; } -#define WL1251_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define WL1251_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC | \ FIF_CONTROL | \ @@ -795,10 +794,6 @@ static void wl1251_op_configure_filter(struct ieee80211_hw *hw, wl->rx_config = WL1251_DEFAULT_RX_CONFIG; wl->rx_filter = WL1251_DEFAULT_RX_FILTER; - if (*total & FIF_PROMISC_IN_BSS) { - wl->rx_config |= CFG_BSSID_FILTER_EN; - wl->rx_config |= CFG_RX_ALL_GOOD; - } if (*total & FIF_ALLMULTI) /* * CFG_MC_FILTER_EN in rx_config needs to be 0 to receive @@ -825,7 +820,7 @@ static void wl1251_op_configure_filter(struct ieee80211_hw *hw, if (ret < 0) goto out; - if (*total & FIF_ALLMULTI || *total & FIF_PROMISC_IN_BSS) + if (*total & FIF_ALLMULTI) ret = wl1251_acx_group_address_tbl(wl, false, NULL, 0); else if (fp) ret = wl1251_acx_group_address_tbl(wl, fp->enabled, diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 0be807951afe..7fe50f8182f3 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -3175,8 +3175,7 @@ static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw, return (u64)(unsigned long)fp; } -#define WL1271_SUPPORTED_FILTERS (FIF_PROMISC_IN_BSS | \ - FIF_ALLMULTI | \ +#define WL1271_SUPPORTED_FILTERS (FIF_ALLMULTI | \ FIF_FCSFAIL | \ FIF_BCN_PRBRESP_PROMISC | \ FIF_CONTROL | \ @@ -6077,7 +6076,8 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) IEEE80211_HW_AMPDU_AGGREGATION | IEEE80211_HW_TX_AMPDU_SETUP_IN_HW | IEEE80211_HW_QUEUE_CONTROL | - IEEE80211_HW_CHANCTX_STA_CSA; + IEEE80211_HW_CHANCTX_STA_CSA | + IEEE80211_HW_SUPPORT_FAST_XMIT; wl->hw->wiphy->cipher_suites = cipher_suites; wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index e7af261e9198..89b6f69f09c8 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1230,7 +1230,7 @@ static u64 zd_op_prepare_multicast(struct ieee80211_hw *hw, } #define SUPPORTED_FIF_FLAGS \ - (FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | FIF_CONTROL | \ + (FIF_ALLMULTI | FIF_FCSFAIL | FIF_CONTROL | \ FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC) static void zd_op_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, @@ -1256,7 +1256,7 @@ static void zd_op_configure_filter(struct ieee80211_hw *hw, * we will have some issue with IPv6 which uses multicast for link * layer address resolution. */ - if (*new_flags & (FIF_PROMISC_IN_BSS | FIF_ALLMULTI)) + if (*new_flags & FIF_ALLMULTI) zd_mc_add_all(&hash); spin_lock_irqsave(&mac->lock, flags); diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 4bb4f8ee4132..6b2f813afb52 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1516,21 +1516,12 @@ static void vnt_configure(struct ieee80211_hw *hw, struct vnt_private *priv = hw->priv; u8 rx_mode = 0; - *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_PROMISC_IN_BSS | - FIF_BCN_PRBRESP_PROMISC; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; VNSvInPortB(priv->PortOffset + MAC_REG_RCR, &rx_mode); dev_dbg(&priv->pcid->dev, "rx mode in = %x\n", rx_mode); - if (changed_flags & FIF_PROMISC_IN_BSS) { - /* unconditionally log net taps */ - if (*total_flags & FIF_PROMISC_IN_BSS) - rx_mode |= RCR_UNICAST; - else - rx_mode &= ~RCR_UNICAST; - } - if (changed_flags & FIF_ALLMULTI) { if (*total_flags & FIF_ALLMULTI) { unsigned long flags; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index ab3ab84cb0a7..0d97b6457ead 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -785,8 +785,7 @@ static void vnt_configure(struct ieee80211_hw *hw, u8 rx_mode = 0; int rc; - *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_PROMISC_IN_BSS | - FIF_BCN_PRBRESP_PROMISC; + *total_flags &= FIF_ALLMULTI | FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC; rc = vnt_control_in(priv, MESSAGE_TYPE_READ, MAC_REG_RCR, MESSAGE_REQUEST_MACREG, sizeof(u8), &rx_mode); @@ -796,14 +795,6 @@ static void vnt_configure(struct ieee80211_hw *hw, dev_dbg(&priv->usb->dev, "rx mode in = %x\n", rx_mode); - if (changed_flags & FIF_PROMISC_IN_BSS) { - /* unconditionally log net taps */ - if (*total_flags & FIF_PROMISC_IN_BSS) - rx_mode |= RCR_UNICAST; - else - rx_mode &= ~RCR_UNICAST; - } - if (changed_flags & FIF_ALLMULTI) { if (*total_flags & FIF_ALLMULTI) { if (priv->mc_list_count > 2) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 3a57a1b0fb51..b50642870a43 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -85,7 +85,7 @@ int afs_open_socket(void) return -ENOMEM; } - ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket); if (ret < 0) { destroy_workqueue(afs_async_calls); _leave(" = %d [socket]", ret); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d08e079ea5d3..754fd6c0b747 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -921,8 +921,8 @@ static int tcp_accept_from_sock(struct connection *con) mutex_unlock(&connections_lock); memset(&peeraddr, 0, sizeof(peeraddr)); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &newsock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &newsock); if (result < 0) return -ENOMEM; @@ -1173,8 +1173,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out; /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) goto out_err; @@ -1258,8 +1258,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con, addr_len = sizeof(struct sockaddr_in6); /* Create a socket to communicate with */ - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (result < 0) { log_print("Can't create listening comms socket"); goto create_out; @@ -1365,8 +1365,8 @@ static int sctp_listen_for_all(void) log_print("Using SCTP for communications"); - result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET, - IPPROTO_SCTP, &sock); + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, + SOCK_SEQPACKET, IPPROTO_SCTP, &sock); if (result < 0) { log_print("Can't create comms socket, check SCTP is loaded"); goto out; diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 606563ef8a72..9012f8775208 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -110,7 +110,29 @@ static inline bool is_zero_ether_addr(const u8 *addr) */ static inline bool is_multicast_ether_addr(const u8 *addr) { - return 0x01 & addr[0]; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + u32 a = *(const u32 *)addr; +#else + u16 a = *(const u16 *)addr; +#endif +#ifdef __BIG_ENDIAN + return 0x01 & (a >> ((sizeof(a) * 8) - 8)); +#else + return 0x01 & a; +#endif +} + +static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 +#ifdef __BIG_ENDIAN + return 0x01 & ((*(const u64 *)addr) >> 56); +#else + return 0x01 & (*(const u64 *)addr); +#endif +#else + return is_multicast_ether_addr(addr); +#endif } /** @@ -169,6 +191,24 @@ static inline bool is_valid_ether_addr(const u8 *addr) } /** + * eth_proto_is_802_3 - Determine if a given Ethertype/length is a protocol + * @proto: Ethertype/length value to be tested + * + * Check that the value from the Ethertype/length field is a valid Ethertype. + * + * Return true if the valid is an 802.3 supported Ethertype. + */ +static inline bool eth_proto_is_802_3(__be16 proto) +{ +#ifndef __BIG_ENDIAN + /* if CPU is little endian mask off bits representing LSB */ + proto &= htons(0xFF00); +#endif + /* cast both to u16 and compare since LSB can be ignored */ + return (__force u16)proto >= (__force u16)htons(ETH_P_802_3_MIN); +} + +/** * eth_random_addr - Generate software assigned random Ethernet address * @addr: Pointer to a six-byte array containing the Ethernet address * diff --git a/include/linux/filter.h b/include/linux/filter.h index fa11b3a367be..ce1d72d34382 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -207,6 +207,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ @@ -363,9 +373,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); void bpf_prog_free(struct bpf_prog *fp); -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len); - struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); @@ -377,14 +384,17 @@ static inline void bpf_prog_unlock_free(struct bpf_prog *fp) __bpf_prog_free(fp); } +typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, + unsigned int flen); + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); - -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a9373e61e8..70a7fee1efb3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -366,6 +366,11 @@ extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_cold_page(struct page *page, bool cold); extern void free_hot_cold_page_list(struct list_head *list, bool cold); +struct page_frag_cache; +extern void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask); +extern void __free_page_frag(void *addr); + extern void __free_kmem_pages(struct page *page, unsigned int order); extern void free_kmem_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 66a7d7600f43..b49cf923becc 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -74,7 +74,7 @@ static inline struct sock *sk_pppox(struct pppox_sock *po) struct module; struct pppox_proto { - int (*create)(struct net *net, struct socket *sock); + int (*create)(struct net *net, struct socket *sock, int kern); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 920e4457ce6e..b9ab677c0c0a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -539,7 +539,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ proto = vhdr->h_vlan_encapsulated_proto; - if (ntohs(proto) >= ETH_P_802_3_MIN) { + if (eth_proto_is_802_3(proto)) { skb->protocol = proto; return; } diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 2c677afeea47..193ad488d3e2 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -130,5 +130,6 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed); #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d37e26a1007..0038ac7466fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,6 +226,24 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) + +struct page_frag_cache { + void * va; +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + __u16 offset; + __u16 size; +#else + __u32 offset; +#endif + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_count every time we allocate a fragment. + */ + unsigned int pagecnt_bias; + bool pfmemalloc; +}; + typedef unsigned long __nocast vm_flags_t; /* diff --git a/include/linux/net.h b/include/linux/net.h index 738ea48be889..04aa06852771 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -38,7 +38,6 @@ struct net; #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 -#define SOCK_EXTERNALLY_ALLOCATED 5 #ifndef ARCH_HAS_SOCKET_TYPES /** @@ -208,7 +207,7 @@ void sock_unregister(int family); int __sock_create(struct net *net, int family, int type, int proto, struct socket **res, int kern); int sock_create(int family, int type, int proto, struct socket **res); -int sock_create_kern(int family, int type, int proto, struct socket **res); +int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res); int sock_create_lite(int family, int type, int proto, struct socket **res); void sock_release(struct socket *sock); int sock_sendmsg(struct socket *sock, struct msghdr *msg); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7d59dc6ab789..9672781c593d 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -66,7 +66,6 @@ enum { NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_BUSY_POLL_BIT, /* Busy poll */ - NETIF_F_HW_SWITCH_OFFLOAD_BIT, /* HW switch offload */ /* * Add your fresh new feature above and remember to update @@ -125,7 +124,6 @@ enum { #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) -#define NETIF_F_HW_SWITCH_OFFLOAD __NETIF_F(HW_SWITCH_OFFLOAD) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ @@ -161,8 +159,7 @@ enum { */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ NETIF_F_SG | NETIF_F_HIGHDMA | \ - NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_HW_SWITCH_OFFLOAD) + NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED) /* * If one device doesn't support one of these features, then disable it diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 05b9a694e213..cd0951c1893d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1564,7 +1564,7 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_SWITCHDEV - const struct swdev_ops *swdev_ops; + const struct switchdev_ops *switchdev_ops; #endif const struct header_ops *header_ops; @@ -1652,7 +1652,11 @@ struct net_device { rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; +#ifdef CONFIG_NET_CLS_ACT + struct tcf_proto __rcu *ingress_cl_list; +#endif struct netdev_queue __rcu *ingress_queue; + unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rx_cpu_rmap; @@ -2552,10 +2556,6 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { - if (WARN_ON(!dev_queue)) { - pr_info("netif_stop_queue() cannot be called before register_netdev()\n"); - return; - } set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -2571,15 +2571,7 @@ static inline void netif_stop_queue(struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } -static inline void netif_tx_stop_all_queues(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - netif_tx_stop_queue(txq); - } -} +void netif_tx_stop_all_queues(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6835c1279df7..9120edb650a0 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -28,6 +28,8 @@ struct netlink_skb_parms { __u32 dst_group; __u32 flags; struct sock *sk; + bool nsid_is_set; + int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66e374d62f64..c0b574a414e7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -927,6 +927,8 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); + static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { return skb->hash; @@ -2126,10 +2128,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, @@ -2184,6 +2182,11 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } +static inline void skb_free_frag(void *addr) +{ + __free_page_frag(addr); +} + void *napi_alloc_frag(unsigned int fragsz); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int length, gfp_t gfp_mask); @@ -3417,6 +3420,9 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 3b2911502a8c..e6fb5df22db1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -199,6 +199,7 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ @@ -326,6 +327,7 @@ struct tcp_sock { * socket. Used to retransmit SYNACKs etc. */ struct request_sock *fastopen_rsk; + u32 *saved_syn; }; enum tsq_flags { @@ -393,4 +395,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog) return 0; } +static inline void tcp_saved_syn_free(struct tcp_sock *tp) +{ + kfree(tp->saved_syn); + tp->saved_syn = NULL; +} + #endif /* _LINUX_TCP_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 80456f72d70a..def59d3a34d5 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -142,6 +142,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); void addrconf_dad_failure(struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 172632dd9930..db639a4c5ab8 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -74,7 +74,7 @@ void vsock_pending_work(struct work_struct *work); struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, - gfp_t priority, unsigned short type); + gfp_t priority, unsigned short type, int kern); /**** TRANSPORT ****/ diff --git a/include/net/bond_options.h b/include/net/bond_options.h index ea6546d2c946..c28aca25320e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -63,6 +63,9 @@ enum { BOND_OPT_LP_INTERVAL, BOND_OPT_SLAVES, BOND_OPT_TLB_DYNAMIC_LB, + BOND_OPT_AD_ACTOR_SYS_PRIO, + BOND_OPT_AD_ACTOR_SYSTEM, + BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 78ed135e9dea..20defc0353d1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -136,6 +136,9 @@ struct bond_params { int packets_per_slave; int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; + u16 ad_actor_sys_prio; + u16 ad_user_port_key; + u8 ad_actor_system[ETH_ALEN]; }; struct bond_parm_tbl { diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f8d6813cd5b2..d63ecec73090 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -111,7 +111,7 @@ enum ieee80211_band { * This may be due to the driver or due to regulatory bandwidth * restrictions. * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY - * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted * on this channel. * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted @@ -129,7 +129,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_80MHZ = 1<<7, IEEE80211_CHAN_NO_160MHZ = 1<<8, IEEE80211_CHAN_INDOOR_ONLY = 1<<9, - IEEE80211_CHAN_GO_CONCURRENT = 1<<10, + IEEE80211_CHAN_IR_CONCURRENT = 1<<10, IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, }; diff --git a/include/net/codel.h b/include/net/codel.h index 1e18005f7f65..267e70210061 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -7,7 +7,7 @@ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> - * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -119,12 +119,14 @@ static inline u32 codel_time_to_us(codel_time_t val) /** * struct codel_params - contains codel parameters * @target: target queue size (in time units) + * @ce_threshold: threshold for marking packets with ECN CE * @interval: width of moving time window * @mtu: device mtu, or minimal queue backlog in bytes. * @ecn: is Explicit Congestion Notification enabled */ struct codel_params { codel_time_t target; + codel_time_t ce_threshold; codel_time_t interval; u32 mtu; bool ecn; @@ -161,19 +163,24 @@ struct codel_vars { * @maxpacket: largest packet we've seen so far * @drop_count: temp count of dropped packets in dequeue() * ecn_mark: number of packets we ECN marked instead of dropping + * ce_mark: number of packets CE marked because sojourn time was above ce_threshold */ struct codel_stats { u32 maxpacket; u32 drop_count; u32 ecn_mark; + u32 ce_mark; }; +#define CODEL_DISABLED_THRESHOLD INT_MAX + static void codel_params_init(struct codel_params *params, const struct Qdisc *sch) { params->interval = MS2TIME(100); params->target = MS2TIME(5); params->mtu = psched_mtu(qdisc_dev(sch)); + params->ce_threshold = CODEL_DISABLED_THRESHOLD; params->ecn = false; } @@ -354,6 +361,9 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, vars->rec_inv_sqrt); } end: + if (skb && codel_time_after(vars->ldelay, params->ce_threshold) && + INET_ECN_set_ce(skb)) + stats->ce_mark++; return skb; } #endif diff --git a/include/net/dst.h b/include/net/dst.h index 0fb99a26e973..2bc73f8a00a9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -109,7 +109,6 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL -#define DST_METRICS_FORCE_OVERWRITE 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) @@ -120,11 +119,6 @@ static inline bool dst_metrics_read_only(const struct dst_entry *dst) return dst->_metrics & DST_METRICS_READ_ONLY; } -static inline void dst_metrics_set_force_overwrite(struct dst_entry *dst) -{ - dst->_metrics |= DST_METRICS_FORCE_OVERWRITE; -} - void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old); static inline void dst_destroy_metrics_generic(struct dst_entry *dst) @@ -355,18 +349,6 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, __skb_tunnel_rx(skb, dev, net); } -/* Children define the path of the packet through the - * Linux networking. Thus, destinations are stackable. - */ - -static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) -{ - struct dst_entry *child = dst_clone(skb_dst(skb)->child); - - skb_dst_drop(skb); - return child; -} - int dst_discard_sk(struct sock *sk, struct sk_buff *skb); static inline int dst_discard(struct sk_buff *skb) { diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index dc8fd81412bf..6d6ef626811a 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -42,4 +42,20 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 u32 flow_hash_from_keys(struct flow_keys *keys); unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len, __be16 protocol); + +/* struct flow_keys_digest: + * + * This structure is used to hold a digest of the full flow keys. This is a + * larger "hash" of a flow to allow definitively matching specific flows where + * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so + * that it can by used in CB of skb (see sch_choke for an example). + */ +#define FLOW_KEYS_DIGEST_LEN 16 +struct flow_keys_digest { + u8 data[FLOW_KEYS_DIGEST_LEN]; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow); + #endif diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 4a92423eefa5..279f83591971 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sk_release_kernel(sk); + sock_release(sk->sk_socket); } #endif diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 20e80fa7bbdd..e00018011028 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -121,44 +121,14 @@ struct rt6_info { struct rt6key rt6i_prefsrc; struct inet6_dev *rt6i_idev; - unsigned long _rt6i_peer; u32 rt6i_metric; + u32 rt6i_pmtu; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; u8 rt6i_protocol; }; -static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt) -{ - return inetpeer_ptr(rt->_rt6i_peer); -} - -static inline bool rt6_has_peer(struct rt6_info *rt) -{ - return inetpeer_ptr_is_peer(rt->_rt6i_peer); -} - -static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - __inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) -{ - return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); -} - -static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base) -{ - inetpeer_init_ptr(&rt->_rt6i_peer, base); -} - -static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort) -{ - inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer); -} - static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; @@ -189,15 +159,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } -static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - struct dst_entry *new = (struct dst_entry *) from; - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(new); - rt->dst.from = new; -} - static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eec8ad3c9843..53d25ef1699a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -239,8 +239,10 @@ struct ip6_flowlabel { struct net *fl_net; }; -#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) -#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) +#define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_FLOWLABEL_STATELESS_FLAG cpu_to_be32(0x00080000) + #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 @@ -719,6 +721,9 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, hash ^= hash >> 12; flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + + if (net->ipv6.sysctl.flowlabel_state_ranges) + flowlabel |= IPV6_FLOWLABEL_STATELESS_FLAG; } return flowlabel; diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 0134681acc4c..fe994d2e5286 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -96,7 +96,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8e3668b44c29..2f8b7decace0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -337,10 +337,16 @@ enum ieee80211_bss_change { * enum ieee80211_event_type - event to be notified to the low level driver * @RSSI_EVENT: AP's rssi crossed the a threshold set by the driver. * @MLME_EVENT: event related to MLME + * @BAR_RX_EVENT: a BAR was received + * @BA_FRAME_TIMEOUT: Frames were released from the reordering buffer because + * they timed out. This won't be called for each frame released, but only + * once each time the timeout triggers. */ enum ieee80211_event_type { RSSI_EVENT, MLME_EVENT, + BAR_RX_EVENT, + BA_FRAME_TIMEOUT, }; /** @@ -354,7 +360,7 @@ enum ieee80211_rssi_event_data { }; /** - * enum ieee80211_rssi_event - data attached to an %RSSI_EVENT + * struct ieee80211_rssi_event - data attached to an %RSSI_EVENT * @data: See &enum ieee80211_rssi_event_data */ struct ieee80211_rssi_event { @@ -388,7 +394,7 @@ enum ieee80211_mlme_event_status { }; /** - * enum ieee80211_mlme_event - data attached to an %MLME_EVENT + * struct ieee80211_mlme_event - data attached to an %MLME_EVENT * @data: See &enum ieee80211_mlme_event_data * @status: See &enum ieee80211_mlme_event_status * @reason: the reason code if applicable @@ -400,16 +406,31 @@ struct ieee80211_mlme_event { }; /** + * struct ieee80211_ba_event - data attached for BlockAck related events + * @sta: pointer to the &ieee80211_sta to which this event relates + * @tid: the tid + * @ssn: the starting sequence number (for %BAR_RX_EVENT) + */ +struct ieee80211_ba_event { + struct ieee80211_sta *sta; + u16 tid; + u16 ssn; +}; + +/** * struct ieee80211_event - event to be sent to the driver - * @type The event itself. See &enum ieee80211_event_type. + * @type: The event itself. See &enum ieee80211_event_type. * @rssi: relevant if &type is %RSSI_EVENT * @mlme: relevant if &type is %AUTH_EVENT + * @ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT + * @u:union holding the fields above */ struct ieee80211_event { enum ieee80211_event_type type; union { struct ieee80211_rssi_event rssi; struct ieee80211_mlme_event mlme; + struct ieee80211_ba_event ba; } u; }; @@ -1480,6 +1501,47 @@ struct ieee80211_key_conf { u8 key[0]; }; +#define IEEE80211_MAX_PN_LEN 16 + +/** + * struct ieee80211_key_seq - key sequence counter + * + * @tkip: TKIP data, containing IV32 and IV16 in host byte order + * @ccmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_cmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @aes_gmac: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @gcmp: PN data, most significant byte first (big endian, + * reverse order than in packet) + * @hw: data for HW-only (e.g. cipher scheme) keys + */ +struct ieee80211_key_seq { + union { + struct { + u32 iv32; + u16 iv16; + } tkip; + struct { + u8 pn[6]; + } ccmp; + struct { + u8 pn[6]; + } aes_cmac; + struct { + u8 pn[6]; + } aes_gmac; + struct { + u8 pn[6]; + } gcmp; + struct { + u8 seq[IEEE80211_MAX_PN_LEN]; + u8 seq_len; + } hw; + }; +}; + /** * struct ieee80211_cipher_scheme - cipher scheme * @@ -1797,6 +1859,10 @@ struct ieee80211_txq { * the driver returns 1. This also forces the driver to advertise its * supported cipher suites. * + * @IEEE80211_HW_SUPPORT_FAST_XMIT: The driver/hardware supports fast-xmit, + * this currently requires only the ability to calculate the duration + * for frames. + * * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface * queue mapping in order to use different queues (not just one per AC) * for different virtual interfaces. See the doc section on HW queue @@ -1845,7 +1911,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_WANT_MONITOR_VIF = 1<<14, IEEE80211_HW_NO_AUTO_VIF = 1<<15, IEEE80211_HW_SW_CRYPTO_CONTROL = 1<<16, - /* free slots */ + IEEE80211_HW_SUPPORT_FAST_XMIT = 1<<17, IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_QUEUE_CONTROL = 1<<20, @@ -1939,8 +2005,8 @@ enum ieee80211_hw_flags { * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * * @netdev_features: netdev features to be set in each netdev created - * from this HW. Note only HW checksum features are currently - * compatible with mac80211. Other feature bits will be rejected. + * from this HW. Note that not all features are usable with mac80211, + * other features will be rejected during HW registration. * * @uapsd_queues: This bitmap is included in (re)association frame to indicate * for each access category if it is uAPSD trigger-enabled and delivery- @@ -2504,10 +2570,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * stack. It is always safe to pass more frames than requested, * but this has negative impact on power consumption. * - * @FIF_PROMISC_IN_BSS: promiscuous mode within your BSS, - * think of the BSS as your network segment and then this corresponds - * to the regular ethernet device promiscuous mode. - * * @FIF_ALLMULTI: pass all multicast frames, this is used if requested * by the user or if the hardware is not capable of filtering by * multicast address. @@ -2524,8 +2586,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * mac80211 needs to do and the amount of CPU wakeups, so you should * honour this flag if possible. * - * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * @FIF_CONTROL: pass control frames (except for PS Poll) addressed to this + * station * * @FIF_OTHER_BSS: pass frames destined to other BSSes * @@ -2535,7 +2597,6 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { - FIF_PROMISC_IN_BSS = 1<<0, FIF_ALLMULTI = 1<<1, FIF_FCSFAIL = 1<<2, FIF_PLCPFAIL = 1<<3, @@ -2818,9 +2879,9 @@ enum ieee80211_reconfig_type { * Returns zero if statistics are available. * The callback can sleep. * - * @get_tkip_seq: If your device implements TKIP encryption in hardware this - * callback should be provided to read the TKIP transmit IVs (both IV32 - * and IV16) for the given key from hardware. + * @get_key_seq: If your device implements encryption in hardware and does + * IV/PN assignment then this callback should be provided to read the + * IV/PN for the given key from hardware. * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this @@ -3003,7 +3064,7 @@ enum ieee80211_reconfig_type { * The callback can sleep. * @event_callback: Notify driver about any event in mac80211. See * &enum ieee80211_event_type for the different types. - * The callback can sleep. + * The callback must be atomic. * * @release_buffered_frames: Release buffered frames according to the given * parameters. In the case where the driver buffers some frames for @@ -3219,8 +3280,9 @@ struct ieee80211_ops { struct ieee80211_vif *vif); int (*get_stats)(struct ieee80211_hw *hw, struct ieee80211_low_level_stats *stats); - void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, - u32 *iv32, u16 *iv16); + void (*get_key_seq)(struct ieee80211_hw *hw, + struct ieee80211_key_conf *key, + struct ieee80211_key_seq *seq); int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -3468,14 +3530,15 @@ enum ieee80211_tpt_led_trigger_flags { }; #ifdef CONFIG_MAC80211_LEDS -char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); -char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); -char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, - unsigned int flags, - const struct ieee80211_tpt_blink *blink_table, - unsigned int blink_table_len); +const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); +const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); +const char * +__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len); #endif /** * ieee80211_get_tx_led_name - get name of TX LED @@ -3489,7 +3552,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_tx_led_name(hw); @@ -3510,7 +3573,7 @@ static inline char *ieee80211_get_tx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_rx_led_name(hw); @@ -3531,7 +3594,7 @@ static inline char *ieee80211_get_rx_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_assoc_led_name(hw); @@ -3552,7 +3615,7 @@ static inline char *ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) * * Return: The name of the LED trigger. %NULL if not configured for LEDs. */ -static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) +static inline const char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { #ifdef CONFIG_MAC80211_LEDS return __ieee80211_get_radio_led_name(hw); @@ -3573,7 +3636,7 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) * * Note: This function must be called before ieee80211_register_hw(). */ -static inline char * +static inline const char * ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len) @@ -4254,40 +4317,6 @@ void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf, u8 *k1, u8 *k2); /** - * struct ieee80211_key_seq - key sequence counter - * - * @tkip: TKIP data, containing IV32 and IV16 in host byte order - * @ccmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_cmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @aes_gmac: PN data, most significant byte first (big endian, - * reverse order than in packet) - * @gcmp: PN data, most significant byte first (big endian, - * reverse order than in packet) - */ -struct ieee80211_key_seq { - union { - struct { - u32 iv32; - u16 iv16; - } tkip; - struct { - u8 pn[6]; - } ccmp; - struct { - u8 pn[6]; - } aes_cmac; - struct { - u8 pn[6]; - } aes_gmac; - struct { - u8 pn[6]; - } gcmp; - }; -}; - -/** * ieee80211_get_key_tx_seq - get key TX sequence counter * * @keyconf: the parameter passed with the set key diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f733656404de..3f850acc844e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -271,7 +271,9 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst __initconst #endif +int peernet2id_alloc(struct net *net, struct net *peer); int peernet2id(struct net *net, struct net *peer); +bool peernet_has_id(struct net *net, struct net *peer); struct net *get_net_ns_by_id(struct net *net, int id); struct pernet_operations { diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index d2527bf81142..8d93544a2d2b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -34,6 +34,7 @@ struct netns_sysctl_ipv6 { int fwmark_reflect; int idgen_retries; int idgen_delay; + int flowlabel_state_ranges; }; struct netns_ipv6 { diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9f4265ce8892..87935cad2f7b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -64,6 +64,7 @@ struct request_sock { struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; + u32 *saved_syn; u32 secid; u32 peer_secid; }; @@ -77,7 +78,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) req->rsk_ops = ops; sock_hold(sk_listener); req->rsk_listener = sk_listener; - + req->saved_syn = NULL; /* Following is temporary. It is coupled with debugging * helpers in reqsk_put() & reqsk_free() */ @@ -104,6 +105,7 @@ static inline void reqsk_free(struct request_sock *req) req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); + kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6d778efcfdfd..1b0a2e88ed2b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -501,12 +501,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) return sch->enqueue(skb, sch); } -static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch) -{ - qdisc_skb_cb(skb)->pkt_len = skb->len; - return qdisc_enqueue(skb, sch) & NET_XMIT_MASK; -} - static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) { return q->flags & TCQ_F_CPUSTATS; @@ -755,8 +749,6 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, if (n) { n->tc_verd = SET_TC_VERD(n->tc_verd, 0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); } return n; } diff --git a/include/net/sock.h b/include/net/sock.h index 3a4898ec8c67..d882f4c8e438 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -184,6 +184,7 @@ struct sock_common { unsigned char skc_reuse:4; unsigned char skc_reuseport:1; unsigned char skc_ipv6only:1; + unsigned char skc_net_refcnt:1; int skc_bound_dev_if; union { struct hlist_node skc_bind_node; @@ -323,6 +324,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_reuseport __sk_common.skc_reuseport #define sk_ipv6only __sk_common.skc_ipv6only +#define sk_net_refcnt __sk_common.skc_net_refcnt #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot @@ -1514,9 +1516,8 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot); + struct proto *prot, int kern); void sk_free(struct sock *sk); -void sk_release_kernel(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, @@ -2192,22 +2193,6 @@ void sock_net_set(struct sock *sk, struct net *net) write_pnet(&sk->sk_net, net); } -/* - * Kernel sockets, f.e. rtnl or icmp_socket, are a part of a namespace. - * They should not hold a reference to a namespace in order to allow - * to stop it. - * Sockets after sk_change_net should be released using sk_release_kernel - */ -static inline void sk_change_net(struct sock *sk, struct net *net) -{ - struct net *current_net = sock_net(sk); - - if (!net_eq(current_net, net)) { - put_net(current_net); - sock_net_set(sk, net); - } -} - static inline struct sock *skb_steal_sock(struct sk_buff *skb) { if (skb->sk) { diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d2e69ee3019a..ea5b1c230d3d 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -14,153 +14,210 @@ #include <linux/netdevice.h> #include <linux/notifier.h> +#define SWITCHDEV_F_NO_RECURSE BIT(0) + +enum switchdev_trans { + SWITCHDEV_TRANS_NONE, + SWITCHDEV_TRANS_PREPARE, + SWITCHDEV_TRANS_ABORT, + SWITCHDEV_TRANS_COMMIT, +}; + +enum switchdev_attr_id { + SWITCHDEV_ATTR_UNDEFINED, + SWITCHDEV_ATTR_PORT_PARENT_ID, + SWITCHDEV_ATTR_PORT_STP_STATE, + SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, +}; + +struct switchdev_attr { + enum switchdev_attr_id id; + enum switchdev_trans trans; + u32 flags; + union { + struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ + u8 stp_state; /* PORT_STP_STATE */ + unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ + } u; +}; + struct fib_info; +enum switchdev_obj_id { + SWITCHDEV_OBJ_UNDEFINED, + SWITCHDEV_OBJ_PORT_VLAN, + SWITCHDEV_OBJ_IPV4_FIB, +}; + +struct switchdev_obj { + enum switchdev_obj_id id; + enum switchdev_trans trans; + union { + struct switchdev_obj_vlan { /* PORT_VLAN */ + u16 flags; + u16 vid_start; + u16 vid_end; + } vlan; + struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 nlflags; + u32 tb_id; + } ipv4_fib; + } u; +}; + /** * struct switchdev_ops - switchdev operations * - * @swdev_parent_id_get: Called to get an ID of the switch chip this port - * is part of. If driver implements this, it indicates that it - * represents a port of a switch chip. + * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr). * - * @swdev_port_stp_update: Called to notify switch device port of bridge - * port STP state change. + * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr). * - * @swdev_fib_ipv4_add: Called to add/modify IPv4 route to switch device. + * @switchdev_port_obj_add: Add an object to port (see switchdev_obj). * - * @swdev_fib_ipv4_del: Called to delete IPv4 route from switch device. + * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj). */ -struct swdev_ops { - int (*swdev_parent_id_get)(struct net_device *dev, - struct netdev_phys_item_id *psid); - int (*swdev_port_stp_update)(struct net_device *dev, u8 state); - int (*swdev_fib_ipv4_add)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, - u32 tb_id); - int (*swdev_fib_ipv4_del)(struct net_device *dev, __be32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); +struct switchdev_ops { + int (*switchdev_port_attr_get)(struct net_device *dev, + struct switchdev_attr *attr); + int (*switchdev_port_attr_set)(struct net_device *dev, + struct switchdev_attr *attr); + int (*switchdev_port_obj_add)(struct net_device *dev, + struct switchdev_obj *obj); + int (*switchdev_port_obj_del)(struct net_device *dev, + struct switchdev_obj *obj); }; -enum netdev_switch_notifier_type { - NETDEV_SWITCH_FDB_ADD = 1, - NETDEV_SWITCH_FDB_DEL, +enum switchdev_notifier_type { + SWITCHDEV_FDB_ADD = 1, + SWITCHDEV_FDB_DEL, }; -struct netdev_switch_notifier_info { +struct switchdev_notifier_info { struct net_device *dev; }; -struct netdev_switch_notifier_fdb_info { - struct netdev_switch_notifier_info info; /* must be first */ +struct switchdev_notifier_fdb_info { + struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; }; static inline struct net_device * -netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info) +switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { return info->dev; } #ifdef CONFIG_NET_SWITCHDEV -int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid); -int netdev_switch_port_stp_update(struct net_device *dev, u8 state); -int register_netdev_switch_notifier(struct notifier_block *nb); -int unregister_netdev_switch_notifier(struct notifier_block *nb); -int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info); -int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void netdev_switch_fib_ipv4_abort(struct fib_info *fi); +int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr); +int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr); +int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj); +int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj); +int register_switchdev_notifier(struct notifier_block *nb); +int unregister_switchdev_notifier(struct notifier_block *nb); +int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info); +int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags); +int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags); +int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id); +int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id); +void switchdev_fib_ipv4_abort(struct fib_info *fi); #else -static inline int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static inline int switchdev_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) { return -EOPNOTSUPP; } -static inline int netdev_switch_port_stp_update(struct net_device *dev, - u8 state) +static inline int switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) { return -EOPNOTSUPP; } -static inline int register_netdev_switch_notifier(struct notifier_block *nb) +static inline int switchdev_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) { - return 0; + return -EOPNOTSUPP; } -static inline int unregister_netdev_switch_notifier(struct notifier_block *nb) +static inline int switchdev_port_obj_del(struct net_device *dev, + struct switchdev_obj *obj) +{ + return -EOPNOTSUPP; +} + +static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; } -static inline int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info) +static inline int unregister_switchdev_notifier(struct notifier_block *nb) { - return NOTIFY_DONE; + return 0; } -static inline int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int call_switchdev_notifiers(unsigned long val, + struct net_device *dev, + struct switchdev_notifier_info *info) { - return -EOPNOTSUPP; + return NOTIFY_DONE; } -static inline int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, + u32 seq, struct net_device *dev, + u32 filter_mask, int nlflags) { return -EOPNOTSUPP; } -static inline int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { - return 0; + return -EOPNOTSUPP; } -static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) +static inline int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) { - return 0; + return -EOPNOTSUPP; } -static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) +static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, + u32 nlflags, u32 tb_id) { return 0; } -static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, + struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { return 0; } -static inline void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) { } diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d204f3f9df8..b8ea12880fd9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -527,7 +527,7 @@ int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); -int tcp_write_wakeup(struct sock *); +int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); @@ -1043,14 +1043,31 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) return tp->is_cwnd_limited; } -static inline void tcp_check_probe_timer(struct sock *sk) +/* Something is really bad, we could not queue an additional packet, + * because qdisc is full or receiver sent a 0 window. + * We do not want to add fuel to the fire, or abort too early, + * so make sure the timer we arm now is at least 200ms in the future, + * regardless of current icsk_rto value (as it could be ~2ms) + */ +static inline unsigned long tcp_probe0_base(const struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); + return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN); +} - if (!tp->packets_out && !icsk->icsk_pending) +/* Variant of inet_csk_rto_backoff() used for zero window probes */ +static inline unsigned long tcp_probe0_when(const struct sock *sk, + unsigned long max_when) +{ + u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff; + + return (unsigned long)min_t(u64, when, max_when); +} + +static inline void tcp_check_probe_timer(struct sock *sk) +{ + if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - icsk->icsk_rto, TCP_RTO_MAX); + tcp_probe0_base(sk), TCP_RTO_MAX); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 41892f720057..9692cda5f8fc 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -95,11 +95,17 @@ typedef __u32 can_err_mask_t; * @can_dlc: frame payload length in byte (0 .. 8) aka data length code * N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1 * mapping of the 'data length code' to the real payload length + * @__pad: padding + * @__res0: reserved / padding + * @__res1: reserved / padding * @data: CAN frame payload (up to 8 byte) */ struct can_frame { canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */ __u8 can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */ + __u8 __pad; /* padding */ + __u8 __res0; /* reserved / padding */ + __u8 __res1; /* reserved / padding */ __u8 data[CAN_MAX_DLEN] __attribute__((aligned(8))); }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d9cd19214b98..6d6e502e1051 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -417,6 +417,9 @@ enum { IFLA_BOND_AD_LACP_RATE, IFLA_BOND_AD_SELECT, IFLA_BOND_AD_INFO, + IFLA_BOND_AD_ACTOR_SYS_PRIO, + IFLA_BOND_AD_USER_PORT_KEY, + IFLA_BOND_AD_ACTOR_SYSTEM, __IFLA_BOND_MAX, }; diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1a85940f8ab7..3e34b7d702f8 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -108,6 +108,7 @@ struct nlmsgerr { #define NETLINK_NO_ENOBUFS 5 #define NETLINK_RX_RING 6 #define NETLINK_TX_RING 7 +#define NETLINK_LISTEN_ALL_NSID 8 struct nl_pktinfo { __u32 group; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 241220c43e86..c0ab6b0a3919 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2620,16 +2620,17 @@ enum nl80211_band_attr { * an indoor surroundings, i.e., it is connected to AC power (and not * through portable DC inverters) or is under the control of a master * that is acting as an AP and is connected to AC power. - * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this + * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this * channel if it's connected concurrently to a BSS on the same channel on * the 2 GHz band or to a channel in the same UNII band (on the 5 GHz - * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a - * channel that has the GO_CONCURRENT attribute set can be done when there - * is a clear assessment that the device is operating under the guidance of - * an authorized master, i.e., setting up a GO while the device is also - * connected to an AP with DFS and radar detection on the UNII band (it is - * up to user-space, i.e., wpa_supplicant to perform the required - * verifications) + * band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS + * off-channel on a channel that has the IR_CONCURRENT attribute set can be + * done when there is a clear assessment that the device is operating under + * the guidance of an authorized master, i.e., setting up a GO or TDLS + * off-channel while the device is also connected to an AP with DFS and + * radar detection on the UNII band (it is up to user-space, i.e., + * wpa_supplicant to perform the required verifications). Using this + * attribute for IR is disallowed for master interfaces (IBSS, AP). * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed @@ -2641,7 +2642,7 @@ enum nl80211_band_attr { * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122 * for more information on the FCC description of the relaxations allowed * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and - * NL80211_FREQUENCY_ATTR_GO_CONCURRENT. + * NL80211_FREQUENCY_ATTR_IR_CONCURRENT. */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -2659,7 +2660,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_160MHZ, NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, NL80211_FREQUENCY_ATTR_INDOOR_ONLY, - NL80211_FREQUENCY_ATTR_GO_CONCURRENT, + NL80211_FREQUENCY_ATTR_IR_CONCURRENT, NL80211_FREQUENCY_ATTR_NO_20MHZ, NL80211_FREQUENCY_ATTR_NO_10MHZ, @@ -2672,6 +2673,8 @@ enum nl80211_frequency_attr { #define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \ + NL80211_FREQUENCY_ATTR_IR_CONCURRENT /** * enum nl80211_bitrate_attr - bitrate attributes @@ -2830,7 +2833,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. - * @NL80211_RRF_GO_CONCURRENT: See &NL80211_FREQUENCY_ATTR_GO_CONCURRENT + * @NL80211_RRF_IR_CONCURRENT: See &NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed @@ -2847,7 +2850,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_IR = 1<<7, __NL80211_RRF_NO_IBSS = 1<<8, NL80211_RRF_AUTO_BW = 1<<11, - NL80211_RRF_GO_CONCURRENT = 1<<12, + NL80211_RRF_IR_CONCURRENT = 1<<12, NL80211_RRF_NO_HT40MINUS = 1<<13, NL80211_RRF_NO_HT40PLUS = 1<<14, NL80211_RRF_NO_80MHZ = 1<<15, @@ -2859,6 +2862,7 @@ enum nl80211_reg_rule_flags { #define NL80211_RRF_NO_IR NL80211_RRF_NO_IR #define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ NL80211_RRF_NO_HT40PLUS) +#define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT /* For backport compatibility with older userspace */ #define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index bf08e76bf505..596ffa0c7084 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -35,6 +35,8 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance * * */ +#ifndef __KERNEL__ +/* backwards compat for userspace only */ #define TC_MUNGED _TC_MAKEMASK1(0) #define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED)) #define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED) @@ -42,6 +44,7 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define TC_OK2MUNGE _TC_MAKEMASK1(1) #define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE)) #define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE) +#endif #define S_TC_VERD _TC_MAKE32(2) #define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD) @@ -62,11 +65,13 @@ bits 9,10,11: redirect counter - redirect TTL. Loop avoidance #define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) #define CLR_TC_NCLS(v) ( v & ~TC_NCLS) +#ifndef __KERNEL__ #define S_TC_RTTL _TC_MAKE32(9) #define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL) #define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL) #define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL) #define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL)) +#endif #define S_TC_AT _TC_MAKE32(12) #define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 534b84710745..8d2530daca9f 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -268,7 +268,8 @@ enum { TCA_GRED_STAB, TCA_GRED_DPS, TCA_GRED_MAX_P, - __TCA_GRED_MAX, + TCA_GRED_LIMIT, + __TCA_GRED_MAX, }; #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) @@ -679,6 +680,7 @@ enum { TCA_CODEL_LIMIT, TCA_CODEL_INTERVAL, TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, __TCA_CODEL_MAX }; @@ -695,6 +697,7 @@ struct tc_codel_xstats { __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ }; /* FQ_CODEL */ @@ -707,6 +710,7 @@ enum { TCA_FQ_CODEL_ECN, TCA_FQ_CODEL_FLOWS, TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, __TCA_FQ_CODEL_MAX }; @@ -730,6 +734,7 @@ struct tc_fq_codel_qd_stats { */ __u32 new_flows_len; /* count of flows in new list */ __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ }; struct tc_fq_codel_cl_stats { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 6a6fb747c78d..eee8968407f0 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -276,6 +276,8 @@ enum LINUX_MIB_TCPACKSKIPPEDFINWAIT2, /* TCPACKSkippedFinWait2 */ LINUX_MIB_TCPACKSKIPPEDTIMEWAIT, /* TCPACKSkippedTimeWait */ LINUX_MIB_TCPACKSKIPPEDCHALLENGE, /* TCPACKSkippedChallenge */ + LINUX_MIB_TCPWINPROBE, /* TCPWinProbe */ + LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index faa72f4fa547..51ebedba577f 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -113,6 +113,8 @@ enum { #define TCP_TIMESTAMP 24 #define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ struct tcp_repair_opt { __u32 opt_code; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4f44028943e6..245df6b32b81 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -346,16 +346,13 @@ static inline void seccomp_sync_threads(void) */ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { - struct seccomp_filter *filter; - unsigned long fp_size; - struct sock_filter *fp; - int new_len; - long ret; + struct seccomp_filter *sfilter; + int ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); - fp_size = fprog->len * sizeof(struct sock_filter); /* * Installing a seccomp filter requires that the task has @@ -368,60 +365,21 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) CAP_SYS_ADMIN) != 0) return ERR_PTR(-EACCES); - fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); - if (!fp) - return ERR_PTR(-ENOMEM); - - /* Copy the instructions from fprog. */ - ret = -EFAULT; - if (copy_from_user(fp, fprog->filter, fp_size)) - goto free_prog; - - /* Check and rewrite the fprog via the skb checker */ - ret = bpf_check_classic(fp, fprog->len); - if (ret) - goto free_prog; - - /* Check and rewrite the fprog for seccomp use */ - ret = seccomp_check_filter(fp, fprog->len); - if (ret) - goto free_prog; - - /* Convert 'sock_filter' insns to 'bpf_insn' insns */ - ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len); - if (ret) - goto free_prog; - /* Allocate a new seccomp_filter */ - ret = -ENOMEM; - filter = kzalloc(sizeof(struct seccomp_filter), - GFP_KERNEL|__GFP_NOWARN); - if (!filter) - goto free_prog; - - filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); - if (!filter->prog) - goto free_filter; - - ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); - if (ret) - goto free_filter_prog; - - kfree(fp); - atomic_set(&filter->usage, 1); - filter->prog->len = new_len; + sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN); + if (!sfilter) + return ERR_PTR(-ENOMEM); - bpf_prog_select_runtime(filter->prog); + ret = bpf_prog_create_from_user(&sfilter->prog, fprog, + seccomp_check_filter); + if (ret < 0) { + kfree(sfilter); + return ERR_PTR(ret); + } - return filter; + atomic_set(&sfilter->usage, 1); -free_filter_prog: - __bpf_prog_free(filter->prog); -free_filter: - kfree(filter); -free_prog: - kfree(fp); - return ERR_PTR(ret); + return sfilter; } /** diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b28df4019ade..4936fc4d7f2c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -579,7 +579,6 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) struct bucket_table *tbl = iter->walker->tbl; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; - void *obj = NULL; if (p) { p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); @@ -599,8 +598,7 @@ next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; - obj = rht_obj(ht, p); - goto out; + return rht_obj(ht, p); } iter->skip = 0; @@ -618,9 +616,7 @@ next: iter->p = NULL; -out: - - return obj; + return NULL; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 80d78c51f65f..8bca780e3613 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1755,7 +1755,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1), BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R0, 0x1ffffffffLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), /* R0 = 1 */ BPF_EXIT_INSN(), }, INTERNAL, @@ -1805,6 +1806,2198 @@ static struct bpf_test tests[] = { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, { { 38, 256 } } }, + /* BPF_ALU | BPF_MOV | BPF_X */ + { + "ALU_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU32_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU64_MOV_X: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_X: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + /* BPF_ALU | BPF_MOV | BPF_K */ + { + "ALU_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_MOV_K: dst = 4294967295", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU_MOV_K: 0x0000ffffffff0000 = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = 2", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOV_K: dst = 2147483647", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_OR_K: dst = 0x0", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0), + BPF_ALU64_IMM(BPF_MOV, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MOV_K: dst = -1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_MOV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_ADD | BPF_X */ + { + "ALU_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU64_ADD_X: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_X: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + /* BPF_ALU | BPF_ADD | BPF_K */ + { + "ALU_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_ADD_K: 1 + 4294967294 = 4294967295", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_ADD, R0, 4294967294), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4294967295 } }, + }, + { + "ALU_ADD_K: 0 + (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 1 + 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 3 + 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_ADD, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_ADD_K: 1 + 2147483646 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_ADD, R0, 2147483646), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_ADD_K: 2147483646 + -2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_ADD, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 1 + 0 = 1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x1), + BPF_ALU64_IMM(BPF_ADD, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 0 + (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_ADD, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_X */ + { + "ALU_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU32_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_X: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_X: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967294), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_SUB | BPF_K */ + { + "ALU_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_SUB_K: 4294967295 - 4294967294 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_SUB, R0, 4294967294), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_SUB_K: 3 - 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_SUB_K: 3 - 0 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_SUB, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_SUB_K: 4294967294 - 4294967295 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967294), + BPF_ALU64_IMM(BPF_SUB, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + { + "ALU64_ADD_K: 2147483646 - 2147483647 = -1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483646), + BPF_ALU64_IMM(BPF_SUB, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_X */ + { + "ALU_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_X: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0x7FFFFFF8), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_X: -1 * -1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, -1), + BPF_ALU32_IMM(BPF_MOV, R1, -1), + BPF_ALU32_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MUL_X: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 3), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_X: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + /* BPF_ALU | BPF_MUL | BPF_K */ + { + "ALU_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_MUL_K: 2 * 0x7FFFFFF8 = 0xFFFFFFF0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MUL, R0, 0x7FFFFFF8), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xFFFFFFF0 } }, + }, + { + "ALU_MUL_K: 1 * (-1) = 0x00000000ffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0x00000000ffffffff), + BPF_ALU32_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_MUL_K: 2 * 3 = 6", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_MUL, R0, 3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 6 } }, + }, + { + "ALU64_MUL_K: 3 * 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MUL, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_MUL_K: 1 * 2147483647 = 2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * -2147483647 = -2147483647", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_MUL, R0, -2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -2147483647 } }, + }, + { + "ALU64_MUL_K: 1 * (-1) = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x1), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_MUL, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_X */ + { + "ALU_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_X: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967295), + BPF_ALU32_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_X: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483647), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_X: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R4, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x0000000000000001UL), + BPF_ALU64_REG(BPF_DIV, R2, R4), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_DIV | BPF_K */ + { + "ALU_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU32_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_DIV_K: 4294967295 / 4294967295 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_DIV, R0, 4294967295), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_DIV_K: 0xffffffffffffffff / (-1) = 0x1", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x1UL), + BPF_ALU32_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_DIV_K: 6 / 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 6), + BPF_ALU64_IMM(BPF_DIV, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 3 / 1 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_DIV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_DIV_K: 2147483647 / 2147483647 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_DIV, R0, 2147483647), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_DIV_K: 0xffffffffffffffff / (-1) = 0x0000000000000001", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffffUL), + BPF_LD_IMM64(R3, 0x0000000000000001UL), + BPF_ALU64_IMM(BPF_DIV, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_X */ + { + "ALU_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_X: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOV, R1, 4294967293), + BPF_ALU32_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_X: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_X: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU32_IMM(BPF_MOV, R1, 2147483645), + BPF_ALU64_REG(BPF_MOD, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_MOD | BPF_K */ + { + "ALU_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU_MOD_K: 4294967295 % 4294967293 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967295), + BPF_ALU32_IMM(BPF_MOD, R0, 4294967293), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_MOD_K: 3 % 2 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_MOD_K: 3 % 1 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_MOD, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "ALU64_MOD_K: 2147483647 % 2147483645 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 2147483647), + BPF_ALU64_IMM(BPF_MOD, R0, 2147483645), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + /* BPF_ALU | BPF_AND | BPF_X */ + { + "ALU_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_X: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_X: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_AND, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_AND | BPF_K */ + { + "ALU_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU32_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU32_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 3 & 2 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_AND, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_AND_K: 0xffffffff & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xffffffff), + BPF_ALU64_IMM(BPF_AND, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000000000000000), + BPF_ALU64_IMM(BPF_AND, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_AND_K: 0xffffffffffffffff & -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffffffffffffffff), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_AND, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_OR | BPF_X */ + { + "ALU_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_X: 0x0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_X: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 2), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_X: 0 | 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_OR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_ALU | BPF_OR | BPF_K */ + { + "ALU_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU32_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 1 | 2 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_OR, R0, 2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_OR_K: 0 & 0xffffffff = 0xffffffff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_ALU64_IMM(BPF_OR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_OR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x0000ffffffff0000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_OR_K: 0x000000000000000 | -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_OR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_XOR | BPF_X */ + { + "ALU_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_X: 0x1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU32_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_X: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_MOV, R1, 6), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_X: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_ALU64_REG(BPF_XOR, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + /* BPF_ALU | BPF_XOR | BPF_K */ + { + "ALU_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU32_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU_XOR_K: 1 ^ 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 5 ^ 6 = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, 5), + BPF_ALU64_IMM(BPF_XOR, R0, 6), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ 0x0 = 0x0000ffffffff0000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0x0000ffffffff0000), + BPF_ALU64_IMM(BPF_XOR, R2, 0x0), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x0000ffffffff0000 ^ -1 = 0xffff00000000ffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000ffffffff0000), + BPF_LD_IMM64(R3, 0xffff00000000ffff), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_XOR_K: 0x000000000000000 ^ -1 = 0xffffffffffffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0000000000000000), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ALU64_IMM(BPF_XOR, R2, 0xffffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_X */ + { + "ALU_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_X: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_X: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_LSH | BPF_K */ + { + "ALU_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU32_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + { + "ALU64_LSH_K: 1 << 1 = 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2 } }, + }, + { + "ALU64_LSH_K: 1 << 31 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R0, 1), + BPF_ALU64_IMM(BPF_LSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x80000000 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_X */ + { + "ALU_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_X: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_MOV, R1, 31), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_RSH | BPF_K */ + { + "ALU_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU32_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU32_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 2 >> 1 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_ALU64_IMM(BPF_RSH, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "ALU64_RSH_K: 0x80000000 >> 31 = 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x80000000), + BPF_ALU64_IMM(BPF_RSH, R0, 31), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_X */ + { + "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU32_IMM(BPF_MOV, R1, 40), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_ARSH | BPF_K */ + { + "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xff00ff0000000000LL), + BPF_ALU64_IMM(BPF_ARSH, R0, 40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff00ff } }, + }, + /* BPF_ALU | BPF_NEG */ + { + "ALU_NEG: -(3) = -3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU_NEG: -(-3) = 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -3), + BPF_ALU32_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + { + "ALU64_NEG: -(3) = -3", + .u.insns_int = { + BPF_LD_IMM64(R0, 3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -3 } }, + }, + { + "ALU64_NEG: -(-3) = 3", + .u.insns_int = { + BPF_LD_IMM64(R0, -3), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 3 } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_BE */ + { + "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be16(0xcdef) } }, + }, + { + "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_be32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_BE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } }, + }, + /* BPF_ALU | BPF_END | BPF_FROM_LE */ + { + "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 16), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le16(0xcdef) } }, + }, + { + "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, cpu_to_le32(0x89abcdef) } }, + }, + { + "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ENDIAN(BPF_FROM_LE, R0, 64), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } }, + }, + /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */ + { + "ST_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_B, R10, -40, 0xff), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_B: Store/Load byte: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7f), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7f } }, + }, + { + "STX_MEM_B: Store/Load byte: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffLL), + BPF_STX_MEM(BPF_B, R10, R1, -40), + BPF_LDX_MEM(BPF_B, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xff } }, + }, + { + "ST_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0xffff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_H: Store/Load half word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_H, R10, -40, 0x7fff), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fff } }, + }, + { + "STX_MEM_H: Store/Load half word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffLL), + BPF_STX_MEM(BPF_H, R10, R1, -40), + BPF_LDX_MEM(BPF_H, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffff } }, + }, + { + "ST_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_W: Store/Load word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_W: Store/Load word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + { + "ST_MEM_DW: Store/Load double word: max negative 2", + .u.insns_int = { + BPF_LD_IMM64(R2, 0xffff00000000ffff), + BPF_LD_IMM64(R3, 0xffffffffffffffff), + BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff), + BPF_LDX_MEM(BPF_DW, R2, R10, -40), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ST_MEM_DW: Store/Load double word: max positive", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x7fffffff } }, + }, + { + "STX_MEM_DW: Store/Load double word: max negative", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_STX_MEM(BPF_W, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } }, + }, + /* BPF_STX | BPF_XADD | BPF_W/DW */ + { + "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_W, R10, -40, 0x10), + BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + { + "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_DW, R10, -40, 0x10), + BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x22 } }, + }, + /* BPF_JMP | BPF_EXIT */ + { + "JMP_EXIT", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x4711), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0x4712), + }, + INTERNAL, + { }, + { { 0, 0x4711 } }, + }, + /* BPF_JMP | BPF_JA */ + { + "JMP_JA: Unconditional jump: if (true) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_K */ + { + "JMP_JSGT_K: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_K: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGT, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_K */ + { + "JMP_JSGE_K: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_K: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 0xffffffffffffffffLL), + BPF_JMP_IMM(BPF_JSGE, R1, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_K */ + { + "JMP_JGT_K: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGT, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_K */ + { + "JMP_JGE_K: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_K: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JGE, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_K */ + { + "JMP_JNE_K: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_K */ + { + "JMP_JEQ_K: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JEQ, R1, 3, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_K */ + { + "JMP_JSET_K: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_K: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_JMP_IMM(BPF_JNE, R1, 0xffffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGT | BPF_X */ + { + "JMP_JSGT_X: Signed jump: if (-1 > -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGT_X: Signed jump: if (-1 > -1) return 0", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSGE | BPF_X */ + { + "JMP_JSGE_X: Signed jump: if (-1 >= -2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -2), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_X: Signed jump: if (-1 >= -1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, -1), + BPF_JMP_REG(BPF_JSGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGT | BPF_X */ + { + "JMP_JGT_X: if (3 > 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JGE | BPF_X */ + { + "JMP_JGE_X: if (3 >= 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JGE_X: if (3 >= 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JGE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JNE | BPF_X */ + { + "JMP_JNE_X: if (3 != 2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JEQ | BPF_X */ + { + "JMP_JEQ_X: if (3 == 3) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 3), + BPF_JMP_REG(BPF_JEQ, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + /* BPF_JMP | BPF_JSET | BPF_X */ + { + "JMP_JSET_X: if (0x3 & 0x2) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSET_X: if (0x3 & 0xffffffff) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 0xffffffff), + BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, }; static struct net_device dev; @@ -1990,6 +4183,7 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) static __init int test_bpf(void) { int i, err_cnt = 0, pass_cnt = 0; + int jit_cnt = 0, run_cnt = 0; for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_prog *fp; @@ -2006,6 +4200,13 @@ static __init int test_bpf(void) return err; } + + pr_cont("jited:%u ", fp->jited); + + run_cnt++; + if (fp->jited) + jit_cnt++; + err = run_one(fp, &tests[i]); release_filter(fp, i); @@ -2018,7 +4219,9 @@ static __init int test_bpf(void) } } - pr_info("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); + pr_info("Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n", + pass_cnt, err_cnt, jit_cnt, run_cnt); + return err_cnt ? -EINVAL : 0; } diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index b2957540d3c7..c90777eae1f8 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -1,14 +1,9 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * - * Based on the following paper: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * - * Code partially derived from nft_hash - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -26,20 +21,37 @@ #include <linux/rhashtable.h> #include <linux/slab.h> +#define MAX_ENTRIES 1000000 +#define TEST_INSERT_FAIL INT_MAX + +static int entries = 50000; +module_param(entries, int, 0); +MODULE_PARM_DESC(entries, "Number of entries to add (default: 50000)"); + +static int runs = 4; +module_param(runs, int, 0); +MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)"); + +static int max_size = 65536; +module_param(max_size, int, 0); +MODULE_PARM_DESC(runs, "Maximum table size (default: 65536)"); -#define TEST_HT_SIZE 8 -#define TEST_ENTRIES 2048 -#define TEST_PTR ((void *) 0xdeadbeef) -#define TEST_NEXPANDS 4 +static bool shrinking = false; +module_param(shrinking, bool, 0); +MODULE_PARM_DESC(shrinking, "Enable automatic shrinking (default: off)"); + +static int size = 8; +module_param(size, int, 0); +MODULE_PARM_DESC(size, "Initial size hint of table (default: 8)"); struct test_obj { - void *ptr; int value; struct rhash_head node; }; -static const struct rhashtable_params test_rht_params = { - .nelem_hint = TEST_HT_SIZE, +static struct test_obj array[MAX_ENTRIES]; + +static struct rhashtable_params test_rht_params = { .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(int), @@ -51,11 +63,14 @@ static int __init test_rht_lookup(struct rhashtable *ht) { unsigned int i; - for (i = 0; i < TEST_ENTRIES * 2; i++) { + for (i = 0; i < entries * 2; i++) { struct test_obj *obj; bool expected = !(i % 2); u32 key = i; + if (array[i / 2].value == TEST_INSERT_FAIL) + expected = false; + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); if (expected && !obj) { @@ -66,9 +81,9 @@ static int __init test_rht_lookup(struct rhashtable *ht) key); return -EEXIST; } else if (expected && obj) { - if (obj->ptr != TEST_PTR || obj->value != i) { - pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", - obj->ptr, TEST_PTR, obj->value, i); + if (obj->value != i) { + pr_warn("Test failed: Lookup value mismatch %u!=%u\n", + obj->value, i); return -EINVAL; } } @@ -77,129 +92,147 @@ static int __init test_rht_lookup(struct rhashtable *ht) return 0; } -static void test_bucket_stats(struct rhashtable *ht, bool quiet) +static void test_bucket_stats(struct rhashtable *ht) { - unsigned int cnt, rcu_cnt, i, total = 0; + unsigned int err, total = 0, chain_len = 0; + struct rhashtable_iter hti; struct rhash_head *pos; - struct test_obj *obj; - struct bucket_table *tbl; - tbl = rht_dereference_rcu(ht->tbl, ht); - for (i = 0; i < tbl->size; i++) { - rcu_cnt = cnt = 0; + err = rhashtable_walk_init(ht, &hti); + if (err) { + pr_warn("Test failed: allocation error"); + return; + } - if (!quiet) - pr_info(" [%#4x/%u]", i, tbl->size); + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) { + pr_warn("Test failed: iterator failed: %d\n", err); + return; + } - rht_for_each_entry_rcu(obj, pos, tbl, i, node) { - cnt++; - total++; - if (!quiet) - pr_cont(" [%p],", obj); + while ((pos = rhashtable_walk_next(&hti))) { + if (PTR_ERR(pos) == -EAGAIN) { + pr_info("Info: encountered resize\n"); + chain_len++; + continue; + } else if (IS_ERR(pos)) { + pr_warn("Test failed: rhashtable_walk_next() error: %ld\n", + PTR_ERR(pos)); + break; } - rht_for_each_entry_rcu(obj, pos, tbl, i, node) - rcu_cnt++; - - if (rcu_cnt != cnt) - pr_warn("Test failed: Chain count mismach %d != %d", - cnt, rcu_cnt); - - if (!quiet) - pr_cont("\n [%#x] first element: %p, chain length: %u\n", - i, tbl->buckets[i], cnt); + total++; } - pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d\n", - total, atomic_read(&ht->nelems), TEST_ENTRIES); + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d, table-jumps=%u\n", + total, atomic_read(&ht->nelems), entries, chain_len); - if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES) + if (total != atomic_read(&ht->nelems) || total != entries) pr_warn("Test failed: Total count mismatch ^^^"); } -static int __init test_rhashtable(struct rhashtable *ht) +static s64 __init test_rhashtable(struct rhashtable *ht) { - struct bucket_table *tbl; struct test_obj *obj; - struct rhash_head *pos, *next; int err; - unsigned int i; + unsigned int i, insert_fails = 0; + s64 start, end; /* * Insertion Test: - * Insert TEST_ENTRIES into table with all keys even numbers + * Insert entries into table with all keys even numbers */ - pr_info(" Adding %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { - struct test_obj *obj; - - obj = kzalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) { - err = -ENOMEM; - goto error; - } + pr_info(" Adding %d keys\n", entries); + start = ktime_get_ns(); + for (i = 0; i < entries; i++) { + struct test_obj *obj = &array[i]; - obj->ptr = TEST_PTR; obj->value = i * 2; err = rhashtable_insert_fast(ht, &obj->node, test_rht_params); - if (err) { - kfree(obj); - goto error; + if (err == -ENOMEM || err == -EBUSY) { + /* Mark failed inserts but continue */ + obj->value = TEST_INSERT_FAIL; + insert_fails++; + } else if (err) { + return err; } } + if (insert_fails) + pr_info(" %u insertions failed due to memory pressure\n", + insert_fails); + + test_bucket_stats(ht); rcu_read_lock(); - test_bucket_stats(ht, true); test_rht_lookup(ht); rcu_read_unlock(); - rcu_read_lock(); - test_bucket_stats(ht, true); - rcu_read_unlock(); + test_bucket_stats(ht); - pr_info(" Deleting %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { + pr_info(" Deleting %d keys\n", entries); + for (i = 0; i < entries; i++) { u32 key = i * 2; - obj = rhashtable_lookup_fast(ht, &key, test_rht_params); - BUG_ON(!obj); + if (array[i].value != TEST_INSERT_FAIL) { + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); + BUG_ON(!obj); - rhashtable_remove_fast(ht, &obj->node, test_rht_params); - kfree(obj); + rhashtable_remove_fast(ht, &obj->node, test_rht_params); + } } - return 0; - -error: - tbl = rht_dereference_rcu(ht->tbl, ht); - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_safe(obj, pos, next, tbl, i, node) - kfree(obj); + end = ktime_get_ns(); + pr_info(" Duration of test: %lld ns\n", end - start); - return err; + return end - start; } static struct rhashtable ht; static int __init test_rht_init(void) { - int err; + int i, err; + u64 total_time = 0; - pr_info("Running resizable hashtable tests...\n"); + entries = min(entries, MAX_ENTRIES); - err = rhashtable_init(&ht, &test_rht_params); - if (err < 0) { - pr_warn("Test failed: Unable to initialize hashtable: %d\n", - err); - return err; - } + test_rht_params.automatic_shrinking = shrinking; + test_rht_params.max_size = max_size; + test_rht_params.nelem_hint = size; - err = test_rhashtable(&ht); + pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n", + size, max_size, shrinking); - rhashtable_destroy(&ht); + for (i = 0; i < runs; i++) { + s64 time; - return err; + pr_info("Test %02d:\n", i); + memset(&array, 0, sizeof(array)); + err = rhashtable_init(&ht, &test_rht_params); + if (err < 0) { + pr_warn("Test failed: Unable to initialize hashtable: %d\n", + err); + continue; + } + + time = test_rhashtable(&ht); + rhashtable_destroy(&ht); + if (time < 0) { + pr_warn("Test failed: return code %lld\n", time); + return -EINVAL; + } + + total_time += time; + } + + do_div(total_time, runs); + pr_info("Average test time: %llu\n", total_time); + + return 0; } static void __exit test_rht_exit(void) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebffa0e4a9c0..2fd31aebef30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2967,6 +2967,104 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); /* + * Page Fragment: + * An arbitrary-length arbitrary-offset area of memory which resides + * within a 0 or higher order page. Multiple fragments within that page + * are individually refcounted, in the page's reference counter. + * + * The page_frag functions below provide a simple allocation framework for + * page fragments. This is used by the network stack and network device + * drivers to provide a backing region of memory for use as either an + * sk_buff->head, or to be used in the "frags" portion of skb_shared_info. + */ +static struct page *__page_frag_refill(struct page_frag_cache *nc, + gfp_t gfp_mask) +{ + struct page *page = NULL; + gfp_t gfp = gfp_mask; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + PAGE_FRAG_CACHE_MAX_ORDER); + nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; +#endif + if (unlikely(!page)) + page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); + + nc->va = page ? page_address(page) : NULL; + + return page; +} + +void *__alloc_page_frag(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + unsigned int size = PAGE_SIZE; + struct page *page; + int offset; + + if (unlikely(!nc->va)) { +refill: + page = __page_frag_refill(nc, gfp_mask); + if (!page) + return NULL; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(size - 1, &page->_count); + + /* reset page count bias and offset to start of new frag */ + nc->pfmemalloc = page->pfmemalloc; + nc->pagecnt_bias = size; + nc->offset = size; + } + + offset = nc->offset - fragsz; + if (unlikely(offset < 0)) { + page = virt_to_page(nc->va); + + if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) + goto refill; + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) + /* if size can vary use size else just use PAGE_SIZE */ + size = nc->size; +#endif + /* OK, page count is 0, we can safely set it */ + atomic_set(&page->_count, size); + + /* reset page count bias and offset to start of new frag */ + nc->pagecnt_bias = size; + offset = size - fragsz; + } + + nc->pagecnt_bias--; + nc->offset = offset; + + return nc->va + offset; +} +EXPORT_SYMBOL(__alloc_page_frag); + +/* + * Frees a page fragment allocated out of either a compound or order 0 page. + */ +void __free_page_frag(void *addr) +{ + struct page *page = virt_to_head_page(addr); + + if (unlikely(put_page_testzero(page))) + __free_pages_ok(page, compound_order(page)); +} +EXPORT_SYMBOL(__free_page_frag); + +/* * alloc_kmem_pages charges newly allocated pages to the kmem resource counter * of the current memory cgroup. * diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3b7ad43c7dad..d5871ac493eb 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/atm/common.c b/net/atm/common.c index ed0466637e13..49a872db7e42 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -141,7 +141,7 @@ static struct proto vcc_proto = { .release_cb = vcc_release_cb, }; -int vcc_create(struct net *net, struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern) { struct sock *sk; struct atm_vcc *vcc; @@ -149,7 +149,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff --git a/net/atm/common.h b/net/atm/common.h index 4d6f5b2068ac..959436b87182 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -10,7 +10,7 @@ #include <linux/poll.h> /* for poll_table */ -int vcc_create(struct net *net, struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index ae0324021407..040207ec399f 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -136,7 +136,7 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &pvc_proto_ops; - return vcc_create(net, sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC, kern); } static const struct net_proto_family pvc_family_ops = { diff --git a/net/atm/svc.c b/net/atm/svc.c index 1ba23f5018e7..3fa0a9ee98d1 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -660,7 +660,7 @@ static int svc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; sock->ops = &svc_proto_ops; - error = vcc_create(net, sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC, kern); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 330c1f4a5a0b..4273533d22b1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -855,7 +855,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto); + sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern); if (sk == NULL) return -ENOMEM; @@ -881,7 +881,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) struct sock *sk; ax25_cb *ax25, *oax25; - sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index bde2bdd9e929..b5116fa9835e 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -202,7 +202,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index d82787d417bd..ce86a7bae844 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -205,7 +205,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 56f9edbf3d05..5b14dcafcd08 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1377,7 +1377,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &hci_sock_ops; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index cb3fdde1968a..008ba439bd62 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -235,7 +235,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a7278f05eafb..244287706f91 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -43,7 +43,7 @@ static struct bt_sock_list l2cap_sk_list = { static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio); + int proto, gfp_t prio, int kern); bool l2cap_is_socket(struct socket *sock) { @@ -1193,7 +1193,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) } sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, - GFP_ATOMIC); + GFP_ATOMIC, 0); if (!sk) { release_sock(parent); return NULL; @@ -1523,12 +1523,12 @@ static struct proto l2cap_proto = { }; static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, - int proto, gfp_t prio) + int proto, gfp_t prio, int kern) { struct sock *sk; struct l2cap_chan *chan; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern); if (!sk) return NULL; @@ -1574,7 +1574,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4fea24275b17..29709fbfd1f5 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -200,7 +200,7 @@ static int rfcomm_l2sock_create(struct socket **sock) BT_DBG(""); - err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); + err = sock_create_kern(&init_net, PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock); if (!err) { struct sock *sk = (*sock)->sk; sk->sk_data_ready = rfcomm_l2data_ready; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 825e8fb5114b..b2338e971b33 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -269,12 +269,12 @@ static struct proto rfcomm_proto = { .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern); if (!sk) return NULL; @@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock, sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -969,7 +969,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * goto done; } - sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0); if (!sk) goto done; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4322c833e748..6b6e59dc54cf 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -460,11 +460,11 @@ static struct proto sco_proto = { .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; - sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern); if (!sk) return NULL; @@ -501,7 +501,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol, sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; @@ -1026,7 +1026,7 @@ static void sco_conn_ready(struct sco_conn *conn) bh_lock_sock(parent); sk = sco_sock_alloc(sock_net(parent), NULL, - BTPROTO_SCO, GFP_ATOMIC); + BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { bh_unlock_sock(parent); sco_conn_unlock(conn); diff --git a/net/bridge/br.c b/net/bridge/br.c index 02c24cf63c34..a1abe4936fe1 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,13 +121,13 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; -static int br_netdev_switch_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int br_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; - struct netdev_switch_notifier_fdb_info *fdb_info; + struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; rtnl_lock(); @@ -138,14 +138,14 @@ static int br_netdev_switch_event(struct notifier_block *unused, br = p->br; switch (event) { - case NETDEV_SWITCH_FDB_ADD: + case SWITCHDEV_FDB_ADD: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, fdb_info->vid); if (err) err = notifier_from_errno(err); break; - case NETDEV_SWITCH_FDB_DEL: + case SWITCHDEV_FDB_DEL: fdb_info = ptr; err = br_fdb_external_learn_del(br, p, fdb_info->addr, fdb_info->vid); @@ -159,8 +159,8 @@ out: return err; } -static struct notifier_block br_netdev_switch_notifier = { - .notifier_call = br_netdev_switch_event, +static struct notifier_block br_switchdev_notifier = { + .notifier_call = br_switchdev_event, }; static void __net_exit br_net_exit(struct net *net) @@ -214,7 +214,7 @@ static int __init br_init(void) if (err) goto err_out3; - err = register_netdev_switch_notifier(&br_netdev_switch_notifier); + err = register_switchdev_notifier(&br_switchdev_notifier); if (err) goto err_out4; @@ -235,7 +235,7 @@ static int __init br_init(void) return 0; err_out5: - unregister_netdev_switch_notifier(&br_netdev_switch_notifier); + unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: @@ -253,7 +253,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); - unregister_netdev_switch_notifier(&br_netdev_switch_notifier); + unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); unregister_pernet_subsys(&br_net_ops); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4b6722f8f179..2d69d5cab52f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -975,9 +975,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int err = 0; __be32 group; - if (!pskb_may_pull(skb, sizeof(*ih))) - return -EINVAL; - ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1248,25 +1245,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay = 10 * HZ; group = 0; } - } else { - if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { - err = -EINVAL; - goto out; - } - + } else if (skb->len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; max_delay = ih3->code ? IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; - } - - /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer - * all-systems destination addresses (224.0.0.1) for general queries - */ - if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { - err = -EINVAL; + } else { goto out; } @@ -1329,12 +1315,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ - if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { - err = -EINVAL; - goto out; - } - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1358,14 +1338,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, is_general_query = group && ipv6_addr_any(group); - /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer - * all-nodes destination address (ff02::1) for general queries - */ - if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { - err = -EINVAL; - goto out; - } - if (is_general_query) { saddr.proto = htons(ETH_P_IPV6); saddr.u.ip6 = ip6h->saddr; @@ -1557,74 +1529,22 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2 = skb; - const struct iphdr *iph; + struct sk_buff *skb_trimmed = NULL; struct igmphdr *ih; - unsigned int len; - unsigned int offset; int err; - /* We treat OOM as packet loss for now. */ - if (!pskb_may_pull(skb, sizeof(*iph))) - return -EINVAL; - - iph = ip_hdr(skb); - - if (iph->ihl < 5 || iph->version != 4) - return -EINVAL; - - if (!pskb_may_pull(skb, ip_hdrlen(skb))) - return -EINVAL; - - iph = ip_hdr(skb); - - if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - return -EINVAL; + err = ip_mc_check_igmp(skb, &skb_trimmed); - if (iph->protocol != IPPROTO_IGMP) { - if (!ipv4_is_local_multicast(iph->daddr)) + if (err == -ENOMSG) { + if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; + } else if (err < 0) { + return err; } - len = ntohs(iph->tot_len); - if (skb->len < len || len < ip_hdrlen(skb)) - return -EINVAL; - - if (skb->len > len) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = pskb_trim_rcsum(skb2, len); - if (err) - goto err_out; - } - - len -= ip_hdrlen(skb2); - offset = skb_network_offset(skb2) + ip_hdrlen(skb2); - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - - err = -EINVAL; - if (!pskb_may_pull(skb2, sizeof(*ih))) - goto out; - - switch (skb2->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_fold(skb2->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb2->csum = 0; - if (skb_checksum_complete(skb2)) - goto out; - } - - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; - ih = igmp_hdr(skb2); + ih = igmp_hdr(skb); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: @@ -1633,21 +1553,19 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); + err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2, vid); + err = br_ip4_multicast_query(br, port, skb_trimmed, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); break; } -out: - __skb_push(skb2, offset); -err_out: - if (skb2 != skb) - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } @@ -1657,138 +1575,42 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb2; - const struct ipv6hdr *ip6h; - u8 icmp6_type; - u8 nexthdr; - __be16 frag_off; - unsigned int len; - int offset; + struct sk_buff *skb_trimmed = NULL; + struct mld_msg *mld; int err; - if (!pskb_may_pull(skb, sizeof(*ip6h))) - return -EINVAL; - - ip6h = ipv6_hdr(skb); - - /* - * We're interested in MLD messages only. - * - Version is 6 - * - MLD has always Router Alert hop-by-hop option - * - But we do not support jumbrograms. - */ - if (ip6h->version != 6) - return 0; - - /* Prevent flooding this packet if there is no listener present */ - if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) - BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + err = ipv6_mc_check_mld(skb, &skb_trimmed); - if (ip6h->nexthdr != IPPROTO_HOPOPTS || - ip6h->payload_len == 0) - return 0; - - len = ntohs(ip6h->payload_len) + sizeof(*ip6h); - if (skb->len < len) - return -EINVAL; - - nexthdr = ip6h->nexthdr; - offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off); - - if (offset < 0 || nexthdr != IPPROTO_ICMPV6) + if (err == -ENOMSG) { + if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; - - /* Okay, we found ICMPv6 header */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - return -ENOMEM; - - err = -EINVAL; - if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) - goto out; - - len -= offset - skb_network_offset(skb2); - - __skb_pull(skb2, offset); - skb_reset_transport_header(skb2); - skb_postpull_rcsum(skb2, skb_network_header(skb2), - skb_network_header_len(skb2)); - - icmp6_type = icmp6_hdr(skb2)->icmp6_type; - - switch (icmp6_type) { - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - case ICMPV6_MLD2_REPORT: - break; - default: - err = 0; - goto out; - } - - /* Okay, we found MLD message. Check further. */ - if (skb2->len > len) { - err = pskb_trim_rcsum(skb2, len); - if (err) - goto out; - err = -EINVAL; - } - - ip6h = ipv6_hdr(skb2); - - switch (skb2->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, - IPPROTO_ICMPV6, skb2->csum)) - break; - /*FALLTHROUGH*/ - case CHECKSUM_NONE: - skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, - &ip6h->daddr, - skb2->len, - IPPROTO_ICMPV6, 0)); - if (__skb_checksum_complete(skb2)) - goto out; + } else if (err < 0) { + return err; } - err = 0; - BR_INPUT_SKB_CB(skb)->igmp = 1; + mld = (struct mld_msg *)skb_transport_header(skb); - switch (icmp6_type) { + switch (mld->mld_type) { case ICMPV6_MGM_REPORT: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); break; - } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2, vid); + err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2, vid); + err = br_ip6_multicast_query(br, port, skb_trimmed, vid); break; case ICMPV6_MGM_REDUCTION: - { - struct mld_msg *mld; - if (!pskb_may_pull(skb2, sizeof(*mld))) { - err = -EINVAL; - goto out; - } - mld = (struct mld_msg *)skb_transport_header(skb2); br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); - } + break; } -out: - kfree_skb(skb2); + if (skb_trimmed) + kfree_skb(skb_trimmed); + return err; } #endif diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 4b5c236998ff..6b67ed3831de 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -586,7 +586,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; - int err = 0, ret_offload = 0; + int err = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -628,16 +628,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) afspec, RTM_SETLINK); } - if (p && !(flags & BRIDGE_FLAGS_SELF)) { - /* set bridge attributes in hardware if supported - */ - ret_offload = netdev_switch_port_bridge_setlink(dev, nlh, - flags); - if (ret_offload && ret_offload != -EOPNOTSUPP) - br_warn(p->br, "error setting attrs on port %u(%s)\n", - (unsigned int)p->port_no, p->dev->name); - } - if (err == 0) br_ifinfo_notify(RTM_NEWLINK, p); out: @@ -649,7 +639,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *afspec; struct net_bridge_port *p; - int err = 0, ret_offload = 0; + int err = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!afspec) @@ -668,16 +658,6 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) */ br_ifinfo_notify(RTM_NEWLINK, p); - if (p && !(flags & BRIDGE_FLAGS_SELF)) { - /* del bridge attributes in hardware - */ - ret_offload = netdev_switch_port_bridge_dellink(dev, nlh, - flags); - if (ret_offload && ret_offload != -EOPNOTSUPP) - br_warn(p->br, "error deleting attrs on port %u (%s)\n", - (unsigned int)p->port_no, p->dev->name); - } - return err; } static int br_validate(struct nlattr *tb[], struct nlattr *data[]) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3362c29400f1..1f36fa70639b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -33,8 +33,8 @@ /* Control of forwarding link local multicast */ #define BR_GROUPFWD_DEFAULT 0 -/* Don't allow forwarding control protocols like STP and LLDP */ -#define BR_GROUPFWD_RESTRICTED 0x4007u +/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ +#define BR_GROUPFWD_RESTRICTED 0x0007u /* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ #define BR_GROUPFWD_8021AD 0xB801u diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index fb3ebe615513..45f1ff113af9 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -39,10 +39,14 @@ void br_log_state(const struct net_bridge_port *p) void br_set_state(struct net_bridge_port *p, unsigned int state) { + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_STP_STATE, + .u.stp_state = state, + }; int err; p->state = state; - err = netdev_switch_port_stp_update(p->dev, state); + err = switchdev_port_attr_set(p->dev, &attr); if (err && err != -EOPNOTSUPP) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 91180a7fc943..5149d9e71114 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -139,7 +139,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) + if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO)) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && FWINV2(e->ethproto != ethproto, EBT_IPROTO)) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4ec0c803aef1..78a04ebb113c 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1047,7 +1047,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, * is really not used at all in the net/core or socket.c but the * initialization makes sure that sock->state is not uninitialized. */ - sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot); + sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern); if (!sk) return -ENOMEM; diff --git a/net/can/af_can.c b/net/can/af_can.c index 32d710eaf1fc..d4d404bdfc9a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -179,7 +179,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, sock->ops = cp->ops; - sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); + sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot, kern); if (!sk) { err = -ENOMEM; goto errout; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 967080a9f043..073262fea6dd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, - IPPROTO_TCP, &sock); + ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; diff --git a/net/core/dev.c b/net/core/dev.c index 2c1c67fad64d..90a568a150b4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3521,60 +3521,41 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif #ifdef CONFIG_NET_CLS_ACT -/* TODO: Maybe we should just force sch_ingress to be compiled in - * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions - * a compare and 2 stores extra right now if we dont have it on - * but have CONFIG_NET_CLS_ACT - * NOTE: This doesn't stop any functionality; if you dont have - * the ingress scheduler, you just can't add policies on ingress. - * - */ -static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) -{ - struct net_device *dev = skb->dev; - u32 ttl = G_TC_RTTL(skb->tc_verd); - int result = TC_ACT_OK; - struct Qdisc *q; - - if (unlikely(MAX_RED_LOOP < ttl++)) { - net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); - return TC_ACT_SHOT; - } - - skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - - q = rcu_dereference(rxq->qdisc); - if (q != &noop_qdisc) { - spin_lock(qdisc_lock(q)); - if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) - result = qdisc_enqueue_root(skb, q); - spin_unlock(qdisc_lock(q)); - } - - return result; -} - static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); + struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct tcf_result cl_res; - if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) + /* If there's at least one ingress present somewhere (so + * we get here via enabled static key), remaining devices + * that are not configured with an ingress qdisc will bail + * out here. + */ + if (!cl) return skb; - if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } - switch (ing_filter(skb, rxq)) { + qdisc_bstats_update_cpu(cl->q, skb); + skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); + + switch (tc_classify(skb, cl, &cl_res)) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + skb->tc_index = TC_H_MIN(cl_res.classid); + break; case TC_ACT_SHOT: + qdisc_qstats_drop_cpu(cl->q); case TC_ACT_STOLEN: + case TC_ACT_QUEUED: kfree_skb(skb); return NULL; + default: + break; } return skb; @@ -6320,6 +6301,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return 0; } +void netif_tx_stop_all_queues(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_stop_queue(txq); + } +} +EXPORT_SYMBOL(netif_tx_stop_all_queues); + /** * register_netdevice - register a network device * @dev: device to register diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1d00b8922902..eb0c3ace7458 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,7 +98,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", - [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char diff --git a/net/core/filter.c b/net/core/filter.c index bf831a85c315..a831f193e2c7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -355,8 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * for socket filters: ctx == 'struct sk_buff *', for seccomp: * ctx == 'struct seccomp_data *'. */ -int bpf_convert_filter(struct sock_filter *prog, int len, - struct bpf_insn *new_prog, int *new_len) +static int bpf_convert_filter(struct sock_filter *prog, int len, + struct bpf_insn *new_prog, int *new_len) { int new_flen = 0, pass = 0, target, i; struct bpf_insn *new_insn; @@ -371,7 +371,8 @@ int bpf_convert_filter(struct sock_filter *prog, int len, return -EINVAL; if (new_prog) { - addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL); + addrs = kcalloc(len, sizeof(*addrs), + GFP_KERNEL | __GFP_NOWARN); if (!addrs) return -ENOMEM; } @@ -751,7 +752,8 @@ static bool chk_code_allowed(u16 code_to_probe) * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) +static int bpf_check_classic(const struct sock_filter *filter, + unsigned int flen) { bool anc_found; int pc; @@ -825,7 +827,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen) return -EINVAL; } -EXPORT_SYMBOL(bpf_check_classic); static int bpf_prog_store_orig_filter(struct bpf_prog *fp, const struct sock_fprog *fprog) @@ -839,7 +840,9 @@ static int bpf_prog_store_orig_filter(struct bpf_prog *fp, fkprog = fp->orig_prog; fkprog->len = fprog->len; - fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL); + + fkprog->filter = kmemdup(fp->insns, fsize, + GFP_KERNEL | __GFP_NOWARN); if (!fkprog->filter) { kfree(fp->orig_prog); return -ENOMEM; @@ -941,7 +944,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) * pass. At this time, the user BPF is stored in fp->insns. */ old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!old_prog) { err = -ENOMEM; goto out_err; @@ -988,7 +991,8 @@ out_err: return ERR_PTR(err); } -static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) +static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp, + bpf_aux_classic_check_t trans) { int err; @@ -1001,6 +1005,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) return ERR_PTR(err); } + /* There might be additional checks and transformations + * needed on classic filters, f.e. in case of seccomp. + */ + if (trans) { + err = trans(fp->insns, fp->len); + if (err) { + __bpf_prog_release(fp); + return ERR_PTR(err); + } + } + /* Probe if we can JIT compile the filter and if so, do * the compilation of the filter. */ @@ -1050,7 +1065,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - fp = bpf_prepare_filter(fp); + fp = bpf_prepare_filter(fp, NULL); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -1059,6 +1074,53 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) } EXPORT_SYMBOL_GPL(bpf_prog_create); +/** + * bpf_prog_create_from_user - create an unattached filter from user buffer + * @pfp: the unattached filter that is created + * @fprog: the filter program + * @trans: post-classic verifier transformation handler + * + * This function effectively does the same as bpf_prog_create(), only + * that it builds up its insns buffer from user space provided buffer. + * It also allows for passing a bpf_aux_classic_check_t handler. + */ +int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, + bpf_aux_classic_check_t trans) +{ + unsigned int fsize = bpf_classic_proglen(fprog); + struct bpf_prog *fp; + + /* Make sure new filter is there and in the right amounts. */ + if (fprog->filter == NULL) + return -EINVAL; + + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); + if (!fp) + return -ENOMEM; + + if (copy_from_user(fp->insns, fprog->filter, fsize)) { + __bpf_prog_free(fp); + return -EFAULT; + } + + fp->len = fprog->len; + /* Since unattached filters are not copied back to user + * space through sk_get_filter(), we do not need to hold + * a copy here, and can spare us the work. + */ + fp->orig_prog = NULL; + + /* bpf_prepare_filter() already takes care of freeing + * memory in case something goes wrong. + */ + fp = bpf_prepare_filter(fp, trans); + if (IS_ERR(fp)) + return PTR_ERR(fp); + + *pfp = fp; + return 0; +} + void bpf_prog_destroy(struct bpf_prog *fp) { __bpf_prog_release(fp); @@ -1135,7 +1197,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog); + prog = bpf_prepare_filter(prog, NULL); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2c35c02a931e..d3acc4dff4ae 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -267,13 +267,12 @@ static __always_inline void __flow_hash_secret_init(void) net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c) +static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval) { - __flow_hash_secret_init(); - return jhash_3words(a, b, c, hashrnd); + return jhash_3words(a, b, c, keyval); } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) { u32 hash; @@ -287,7 +286,8 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys) hash = __flow_hash_3words((__force u32)keys->dst, (__force u32)keys->src, - (__force u32)keys->ports); + (__force u32)keys->ports, + keyval); if (!hash) hash = 1; @@ -296,10 +296,47 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys) u32 flow_hash_from_keys(struct flow_keys *keys) { - return __flow_hash_from_keys(keys); + __flow_hash_secret_init(); + return __flow_hash_from_keys(keys, hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); +static inline u32 ___skb_get_hash(const struct sk_buff *skb, + struct flow_keys *keys, u32 keyval) +{ + if (!skb_flow_dissect(skb, keys)) + return 0; + + return __flow_hash_from_keys(keys, keyval); +} + +struct _flow_keys_digest_data { + __be16 n_proto; + u8 ip_proto; + u8 padding; + __be32 ports; + __be32 src; + __be32 dst; +}; + +void make_flow_keys_digest(struct flow_keys_digest *digest, + const struct flow_keys *flow) +{ + struct _flow_keys_digest_data *data = + (struct _flow_keys_digest_data *)digest; + + BUILD_BUG_ON(sizeof(*data) > sizeof(*digest)); + + memset(digest, 0, sizeof(*digest)); + + data->n_proto = flow->n_proto; + data->ip_proto = flow->ip_proto; + data->ports = flow->ports; + data->src = flow->src; + data->dst = flow->dst; +} +EXPORT_SYMBOL(make_flow_keys_digest); + /* * __skb_get_hash: calculate a flow hash based on src/dst addresses * and src/dst port numbers. Sets hash in skb to non-zero hash value @@ -309,8 +346,12 @@ EXPORT_SYMBOL(flow_hash_from_keys); void __skb_get_hash(struct sk_buff *skb) { struct flow_keys keys; + u32 hash; - if (!skb_flow_dissect(skb, &keys)) + __flow_hash_secret_init(); + + hash = ___skb_get_hash(skb, &keys, hashrnd); + if (!hash) return; if (keys.ports) @@ -318,10 +359,18 @@ void __skb_get_hash(struct sk_buff *skb) skb->sw_hash = 1; - skb->hash = __flow_hash_from_keys(&keys); + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +{ + struct flow_keys keys; + + return ___skb_get_hash(skb, &keys, perturb); +} +EXPORT_SYMBOL(skb_get_hash_perturb); + /* * Returns a Tx hash based on the given packet descriptor a Tx queues' number * to be used as a distribution range. diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4238d6da5c60..18b34d771ed4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -458,11 +458,15 @@ static ssize_t phys_switch_id_show(struct device *dev, return restart_syscall(); if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; - ret = netdev_switch_parent_id_get(netdev, &ppid); + ret = switchdev_port_attr_get(netdev, &attr); if (!ret) - ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); + ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, + attr.u.ppid.id); } rtnl_unlock(); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 572af0011997..cbee75f2fc28 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -28,6 +28,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; DEFINE_MUTEX(net_mutex); +static DEFINE_SPINLOCK(nsid_lock); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); @@ -147,24 +148,17 @@ static void ops_free_list(const struct pernet_operations *ops, } } -static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, - int id); +/* should be called with nsid_lock held */ static int alloc_netid(struct net *net, struct net *peer, int reqid) { - int min = 0, max = 0, id; - - ASSERT_RTNL(); + int min = 0, max = 0; if (reqid >= 0) { min = reqid; max = reqid + 1; } - id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); - if (id >= 0) - rtnl_net_notifyid(net, peer, RTM_NEWNSID, id); - - return id; + return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); } /* This function is used by idr_for_each(). If net is equal to peer, the @@ -180,11 +174,16 @@ static int net_eq_idr(int id, void *net, void *peer) return 0; } -static int __peernet2id(struct net *net, struct net *peer, bool alloc) +/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc + * is set to true, thus the caller knows that the new id must be notified via + * rtnl. + */ +static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) { int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); + bool alloc_it = *alloc; - ASSERT_RTNL(); + *alloc = false; /* Magic value for id 0. */ if (id == NET_ID_ZERO) @@ -192,36 +191,77 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc) if (id > 0) return id; - if (alloc) - return alloc_netid(net, peer, -1); + if (alloc_it) { + id = alloc_netid(net, peer, -1); + *alloc = true; + return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; + } + + return NETNSA_NSID_NOT_ASSIGNED; +} + +/* should be called with nsid_lock held */ +static int __peernet2id(struct net *net, struct net *peer) +{ + bool no = false; - return -ENOENT; + return __peernet2id_alloc(net, peer, &no); } +static void rtnl_net_notifyid(struct net *net, int cmd, int id); /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ +int peernet2id_alloc(struct net *net, struct net *peer) +{ + unsigned long flags; + bool alloc; + int id; + + spin_lock_irqsave(&nsid_lock, flags); + alloc = atomic_read(&peer->count) == 0 ? false : true; + id = __peernet2id_alloc(net, peer, &alloc); + spin_unlock_irqrestore(&nsid_lock, flags); + if (alloc && id >= 0) + rtnl_net_notifyid(net, RTM_NEWNSID, id); + return id; +} +EXPORT_SYMBOL(peernet2id_alloc); + +/* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { - bool alloc = atomic_read(&peer->count) == 0 ? false : true; + unsigned long flags; int id; - id = __peernet2id(net, peer, alloc); - return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; + spin_lock_irqsave(&nsid_lock, flags); + id = __peernet2id(net, peer); + spin_unlock_irqrestore(&nsid_lock, flags); + return id; +} + +/* This function returns true is the peer netns has an id assigned into the + * current netns. + */ +bool peernet_has_id(struct net *net, struct net *peer) +{ + return peernet2id(net, peer) >= 0; } -EXPORT_SYMBOL(peernet2id); struct net *get_net_ns_by_id(struct net *net, int id) { + unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); + spin_lock_irqsave(&nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); + spin_unlock_irqrestore(&nsid_lock, flags); rcu_read_unlock(); return peer; @@ -362,14 +402,19 @@ static void cleanup_net(struct work_struct *work) list_del_rcu(&net->list); list_add_tail(&net->exit_list, &net_exit_list); for_each_net(tmp) { - int id = __peernet2id(tmp, net, false); + int id; - if (id >= 0) { - rtnl_net_notifyid(tmp, net, RTM_DELNSID, id); + spin_lock_irq(&nsid_lock); + id = __peernet2id(tmp, net); + if (id >= 0) idr_remove(&tmp->netns_ids, id); - } + spin_unlock_irq(&nsid_lock); + if (id >= 0) + rtnl_net_notifyid(tmp, RTM_DELNSID, id); } + spin_lock_irq(&nsid_lock); idr_destroy(&net->netns_ids); + spin_unlock_irq(&nsid_lock); } rtnl_unlock(); @@ -497,6 +542,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + unsigned long flags; struct net *peer; int nsid, err; @@ -517,14 +563,18 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - if (__peernet2id(net, peer, false) >= 0) { + spin_lock_irqsave(&nsid_lock, flags); + if (__peernet2id(net, peer) >= 0) { err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - if (err > 0) + spin_unlock_irqrestore(&nsid_lock, flags); + if (err >= 0) { + rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; + } out: put_net(peer); return err; @@ -538,14 +588,10 @@ static int rtnl_net_get_size(void) } static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, - int cmd, struct net *net, struct net *peer, - int nsid) + int cmd, struct net *net, int nsid) { struct nlmsghdr *nlh; struct rtgenmsg *rth; - int id; - - ASSERT_RTNL(); nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); if (!nlh) @@ -554,14 +600,7 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, rth = nlmsg_data(nlh); rth->rtgen_family = AF_UNSPEC; - if (nsid >= 0) { - id = nsid; - } else { - id = __peernet2id(net, peer, false); - if (id < 0) - id = NETNSA_NSID_NOT_ASSIGNED; - } - if (nla_put_s32(skb, NETNSA_NSID, id)) + if (nla_put_s32(skb, NETNSA_NSID, nsid)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -578,7 +617,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[NETNSA_MAX + 1]; struct sk_buff *msg; struct net *peer; - int err; + int err, id; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy); @@ -600,8 +639,9 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) goto out; } + id = peernet2id(net, peer); err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - RTM_NEWNSID, net, peer, -1); + RTM_NEWNSID, net, id); if (err < 0) goto err_out; @@ -633,7 +673,7 @@ static int rtnl_net_dumpid_one(int id, void *peer, void *data) ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWNSID, net_cb->net, peer, id); + RTM_NEWNSID, net_cb->net, id); if (ret < 0) return ret; @@ -652,17 +692,17 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; + unsigned long flags; - ASSERT_RTNL(); - + spin_lock_irqsave(&nsid_lock, flags); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); + spin_unlock_irqrestore(&nsid_lock, flags); cb->args[0] = net_cb.idx; return skb->len; } -static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, - int id) +static void rtnl_net_notifyid(struct net *net, int cmd, int id) { struct sk_buff *msg; int err = -ENOMEM; @@ -671,7 +711,7 @@ static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, if (!msg) goto out; - err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id); + err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id); if (err < 0) goto err_out; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 508155b283dd..62f979984a23 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -210,6 +210,10 @@ #define T_REMDEVALL (1<<2) /* Remove all devs */ #define T_REMDEV (1<<3) /* Remove one dev */ +/* Xmit modes */ +#define M_START_XMIT 0 /* Default normal TX */ +#define M_NETIF_RECEIVE 1 /* Inject packets into stack */ + /* If lock -- protects updating of if_list */ #define if_lock(t) spin_lock(&(t->if_lock)); #define if_unlock(t) spin_unlock(&(t->if_lock)); @@ -251,13 +255,14 @@ struct pktgen_dev { * we will do a random selection from within the range. */ __u32 flags; - int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ - + int xmit_mode; int min_pkt_size; int max_pkt_size; int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + int removal_mark; /* non-zero => the device is marked for + * removal by worker thread */ + struct page *page; u64 delay; /* nano-seconds */ @@ -620,6 +625,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->node >= 0) seq_printf(seq, " node: %d\n", pkt_dev->node); + if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) + seq_puts(seq, " xmit_mode: netif_receive\n"); + seq_puts(seq, " Flags: "); if (pkt_dev->flags & F_IPV6) @@ -1081,7 +1089,8 @@ static ssize_t pktgen_if_write(struct file *file, if (len < 0) return len; if ((value > 0) && - (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) || + !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) return -ENOTSUPP; i += len; pkt_dev->clone_skb = value; @@ -1134,7 +1143,7 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; - if ((value > 1) && + if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) && (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; @@ -1160,6 +1169,45 @@ static ssize_t pktgen_if_write(struct file *file, sprintf(pg_result, "ERROR: node not possible"); return count; } + if (!strcmp(name, "xmit_mode")) { + char f[32]; + + memset(f, 0, 32); + len = strn_len(&user_buffer[i], sizeof(f) - 1); + if (len < 0) + return len; + + if (copy_from_user(f, &user_buffer[i], len)) + return -EFAULT; + i += len; + + if (strcmp(f, "start_xmit") == 0) { + pkt_dev->xmit_mode = M_START_XMIT; + } else if (strcmp(f, "netif_receive") == 0) { + /* clone_skb set earlier, not supported in this mode */ + if (pkt_dev->clone_skb > 0) + return -ENOTSUPP; + + pkt_dev->xmit_mode = M_NETIF_RECEIVE; + + /* make sure new packet is allocated every time + * pktgen_xmit() is called + */ + pkt_dev->last_ok = 1; + + /* override clone_skb if user passed default value + * at module loading time + */ + pkt_dev->clone_skb = 0; + } else { + sprintf(pg_result, + "xmit_mode -:%s:- unknown\nAvailable modes: %s", + f, "start_xmit, netif_receive\n"); + return count; + } + sprintf(pg_result, "OK: xmit_mode=%s", f); + return count; + } if (!strcmp(name, "flag")) { char f[32]; memset(f, 0, 32); @@ -1267,6 +1315,9 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "NO_TIMESTAMP") == 0) pkt_dev->flags |= F_NO_TIMESTAMP; + else if (strcmp(f, "!NO_TIMESTAMP") == 0) + pkt_dev->flags &= ~F_NO_TIMESTAMP; + else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", @@ -3317,6 +3368,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) unsigned int burst = ACCESS_ONCE(pkt_dev->burst); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; + struct sk_buff *skb; int ret; /* If device is offline, then don't send */ @@ -3354,6 +3406,37 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->delay && pkt_dev->last_ok) spin(pkt_dev, pkt_dev->next_tx); + if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) { + skb = pkt_dev->skb; + skb->protocol = eth_type_trans(skb, skb->dev); + atomic_add(burst, &skb->users); + local_bh_disable(); + do { + ret = netif_receive_skb(skb); + if (ret == NET_RX_DROP) + pkt_dev->errors++; + pkt_dev->sofar++; + pkt_dev->seq_num++; + if (atomic_read(&skb->users) != burst) { + /* skb was queued by rps/rfs or taps, + * so cannot reuse this skb + */ + atomic_sub(burst - 1, &skb->users); + /* get out of the loop and wait + * until skb is consumed + */ + break; + } + /* skb was 'freed' by stack, so clean few + * bits and reuse it + */ +#ifdef CONFIG_NET_CLS_ACT + skb->tc_verd = 0; /* reset reclass/redir ttl */ +#endif + } while (--burst > 0); + goto out; /* Skips xmit_mode M_START_XMIT */ + } + txq = skb_get_tx_queue(odev, pkt_dev->skb); local_bh_disable(); @@ -3401,6 +3484,7 @@ xmit_more: unlock: HARD_TX_UNLOCK(odev, txq); +out: local_bh_enable(); /* If pkt_dev->count is zero, then run forever */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 666e0928ba40..141ccc357e2e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1004,16 +1004,20 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; - struct netdev_phys_item_id psid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + .flags = SWITCHDEV_F_NO_RECURSE, + }; - err = netdev_switch_parent_id_get(dev, &psid); + err = switchdev_port_attr_get(dev, &attr); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } - if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id)) + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len, + attr.u.ppid.id)) return -EMSGSIZE; return 0; @@ -1204,7 +1208,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id(dev_net(dev), link_net); + int id = peernet2id_alloc(dev_net(dev), link_net); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) goto nla_put_failure; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a3d651..d67e612bf0ef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -347,94 +347,18 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) } EXPORT_SYMBOL(build_skb); -struct netdev_alloc_cache { - struct page_frag frag; - /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_count every time we allocate a fragment. - */ - unsigned int pagecnt_bias; -}; -static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); -static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); - -static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, - gfp_t gfp_mask) -{ - const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER; - struct page *page = NULL; - gfp_t gfp = gfp_mask; - - if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); - nc->frag.size = PAGE_SIZE << (page ? order : 0); - } - - if (unlikely(!page)) - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - - nc->frag.page = page; - - return page; -} - -static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache, - unsigned int fragsz, gfp_t gfp_mask) -{ - struct netdev_alloc_cache *nc = this_cpu_ptr(cache); - struct page *page = nc->frag.page; - unsigned int size; - int offset; - - if (unlikely(!page)) { -refill: - page = __page_frag_refill(nc, gfp_mask); - if (!page) - return NULL; - - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - - /* Even if we own the page, we do not use atomic_set(). - * This would break get_page_unless_zero() users. - */ - atomic_add(size - 1, &page->_count); - - /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; - nc->frag.offset = size; - } - - offset = nc->frag.offset - fragsz; - if (unlikely(offset < 0)) { - if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) - goto refill; - - /* if size can vary use frag.size else just use PAGE_SIZE */ - size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE; - - /* OK, page count is 0, we can safely set it */ - atomic_set(&page->_count, size); - - /* reset page count bias and offset to start of new frag */ - nc->pagecnt_bias = size; - offset = size - fragsz; - } - - nc->pagecnt_bias--; - nc->frag.offset = offset; - - return page_address(page) + offset; -} +static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); +static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { + struct page_frag_cache *nc; unsigned long flags; void *data; local_irq_save(flags); - data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); + nc = this_cpu_ptr(&netdev_alloc_cache); + data = __alloc_page_frag(nc, fragsz, gfp_mask); local_irq_restore(flags); return data; } @@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); + + return __alloc_page_frag(nc, fragsz, gfp_mask); } void *napi_alloc_frag(unsigned int fragsz) @@ -464,76 +390,64 @@ void *napi_alloc_frag(unsigned int fragsz) EXPORT_SYMBOL(napi_alloc_frag); /** - * __alloc_rx_skb - allocate an skbuff for rx + * __netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on * @length: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb - * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for - * allocations in case we have to fallback to __alloc_skb() - * If SKB_ALLOC_NAPI is set, page fragment will be allocated - * from napi_cache instead of netdev_cache. * * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate + * buffer has NET_SKB_PAD headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. */ -static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask, - int flags) +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, + gfp_t gfp_mask) { - struct sk_buff *skb = NULL; - unsigned int fragsz = SKB_DATA_ALIGN(length) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct page_frag_cache *nc; + unsigned long flags; + struct sk_buff *skb; + bool pfmemalloc; + void *data; - if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { - void *data; + len += NET_SKB_PAD; - if (sk_memalloc_socks()) - gfp_mask |= __GFP_MEMALLOC; + if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + (gfp_mask & (__GFP_WAIT | GFP_DMA))) + return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); - data = (flags & SKB_ALLOC_NAPI) ? - __napi_alloc_frag(fragsz, gfp_mask) : - __netdev_alloc_frag(fragsz, gfp_mask); + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + len = SKB_DATA_ALIGN(len); - if (likely(data)) { - skb = build_skb(data, fragsz); - if (unlikely(!skb)) - put_page(virt_to_head_page(data)); - } - } else { - skb = __alloc_skb(length, gfp_mask, - SKB_ALLOC_RX, NUMA_NO_NODE); - } - return skb; -} + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; -/** - * __netdev_alloc_skb - allocate an skbuff for rx on a specific device - * @dev: network device to receive on - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has NET_SKB_PAD headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned if there is no free memory. - */ -struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, gfp_t gfp_mask) -{ - struct sk_buff *skb; + local_irq_save(flags); + + nc = this_cpu_ptr(&netdev_alloc_cache); + data = __alloc_page_frag(nc, len, gfp_mask); + pfmemalloc = nc->pfmemalloc; - length += NET_SKB_PAD; - skb = __alloc_rx_skb(length, gfp_mask, 0); + local_irq_restore(flags); + + if (unlikely(!data)) + return NULL; - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = dev; + skb = __build_skb(data, len); + if (unlikely(!skb)) { + skb_free_frag(data); + return NULL; } + /* use OR instead of assignment to avoid clearing of bits in mask */ + if (pfmemalloc) + skb->pfmemalloc = 1; + skb->head_frag = 1; + + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + return skb; } EXPORT_SYMBOL(__netdev_alloc_skb); @@ -551,19 +465,43 @@ EXPORT_SYMBOL(__netdev_alloc_skb); * * %NULL is returned if there is no free memory. */ -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, - unsigned int length, gfp_t gfp_mask) +struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, + gfp_t gfp_mask) { + struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct sk_buff *skb; + void *data; - length += NET_SKB_PAD + NET_IP_ALIGN; - skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI); + len += NET_SKB_PAD + NET_IP_ALIGN; - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - skb->dev = napi->dev; + if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + (gfp_mask & (__GFP_WAIT | GFP_DMA))) + return __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); + + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + len = SKB_DATA_ALIGN(len); + + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; + + data = __alloc_page_frag(nc, len, gfp_mask); + if (unlikely(!data)) + return NULL; + + skb = __build_skb(data, len); + if (unlikely(!skb)) { + skb_free_frag(data); + return NULL; } + /* use OR instead of assignment to avoid clearing of bits in mask */ + if (nc->pfmemalloc) + skb->pfmemalloc = 1; + skb->head_frag = 1; + + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + skb->dev = napi->dev; + return skb; } EXPORT_SYMBOL(__napi_alloc_skb); @@ -611,10 +549,12 @@ static void skb_clone_fraglist(struct sk_buff *skb) static void skb_free_head(struct sk_buff *skb) { + unsigned char *head = skb->head; + if (skb->head_frag) - put_page(virt_to_head_page(skb->head)); + skb_free_frag(head); else - kfree(skb->head); + kfree(head); } static void skb_release_data(struct sk_buff *skb) @@ -4030,6 +3970,93 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) } EXPORT_SYMBOL(skb_checksum_setup); +/** + * skb_checksum_maybe_trim - maybe trims the given skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * + * Checks whether the given skb has data beyond the given transport length. + * If so, returns a cloned skb trimmed to this transport length. + * Otherwise returns the provided skb. Returns NULL in error cases + * (e.g. transport_len exceeds skb length or out-of-memory). + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb, + unsigned int transport_len) +{ + struct sk_buff *skb_chk; + unsigned int len = skb_transport_offset(skb) + transport_len; + int ret; + + if (skb->len < len) { + kfree_skb(skb); + return NULL; + } else if (skb->len == len) { + return skb; + } + + skb_chk = skb_clone(skb, GFP_ATOMIC); + kfree_skb(skb); + + if (!skb_chk) + return NULL; + + ret = pskb_trim_rcsum(skb_chk, len); + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} + +/** + * skb_checksum_trimmed - validate checksum of an skb + * @skb: the skb to check + * @transport_len: the data length beyond the network header + * @skb_chkf: checksum function to use + * + * Applies the given checksum function skb_chkf to the provided skb. + * Returns a checked and maybe trimmed skb. Returns NULL on error. + * + * If the skb has data beyond the given transport length, then a + * trimmed & cloned skb is checked and returned. + * + * Caller needs to set the skb transport header and release the returned skb. + * Provided skb is consumed. + */ +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)) +{ + struct sk_buff *skb_chk; + unsigned int offset = skb_transport_offset(skb); + __sum16 ret; + + skb_chk = skb_checksum_maybe_trim(skb, transport_len); + if (!skb_chk) + return NULL; + + if (!pskb_may_pull(skb_chk, offset)) { + kfree_skb(skb_chk); + return NULL; + } + + __skb_pull(skb_chk, offset); + ret = skb_chkf(skb_chk); + __skb_push(skb_chk, offset); + + if (ret) { + kfree_skb(skb_chk); + return NULL; + } + + return skb_chk; +} +EXPORT_SYMBOL(skb_checksum_trimmed); + void __skb_warn_lro_forwarding(const struct sk_buff *skb) { net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", diff --git a/net/core/sock.c b/net/core/sock.c index 292f42228bfb..c18738a795b0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1396,9 +1396,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx); * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance + * @kern: is this to be a kernel socket? */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot) + struct proto *prot, int kern) { struct sock *sk; @@ -1411,7 +1412,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); - sock_net_set(sk, get_net(net)); + sk->sk_net_refcnt = kern ? 0 : 1; + if (likely(sk->sk_net_refcnt)) + get_net(net); + sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); @@ -1445,7 +1449,8 @@ static void __sk_free(struct sock *sk) if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - put_net(sock_net(sk)); + if (likely(sk->sk_net_refcnt)) + put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } @@ -1461,25 +1466,6 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); -/* - * Last sock_put should drop reference to sk->sk_net. It has already - * been dropped in sk_change_net. Taking reference to stopping namespace - * is not an option. - * Take reference to a socket to remove it from hash _alive_ and after that - * destroy it in the context of init_net. - */ -void sk_release_kernel(struct sock *sk) -{ - if (sk == NULL || sk->sk_socket == NULL) - return; - - sock_hold(sk); - sock_release(sk->sk_socket); - sock_net_set(sk, get_net(&init_net)); - sock_put(sk); -} -EXPORT_SYMBOL(sk_release_kernel); - static void sk_update_clone(const struct sock *sk, struct sock *newsk) { if (mem_cgroup_sockets_enabled && sk->sk_cgrp) diff --git a/net/core/stream.c b/net/core/stream.c index 301c05f26060..d70f77a0c889 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -119,6 +119,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; + bool noblock = (*timeo_p ? false : true); DEFINE_WAIT(wait); if (sk_stream_memory_free(sk)) @@ -131,8 +132,11 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - if (!*timeo_p) + if (!*timeo_p) { + if (noblock) + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); goto do_nonblock; + } if (signal_pending(current)) goto do_interrupted; clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 754484b3cd0e..675cf94e04f8 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -468,10 +468,10 @@ static struct proto dn_proto = { .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern) { struct dn_scp *scp; - struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern); if (!sk) goto out; @@ -693,7 +693,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, } - if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -1096,7 +1096,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 827cda560a55..04ffad311704 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -345,6 +345,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) return ret; } +static int dsa_slave_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + int ret = 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_STP_STATE: + if (attr->trans == SWITCHDEV_TRANS_COMMIT) + ret = dsa_slave_stp_update(dev, attr->u.stp_state); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + static int dsa_slave_bridge_port_join(struct net_device *dev, struct net_device *br) { @@ -382,14 +400,20 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev) return ret; } -static int dsa_slave_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +static int dsa_slave_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - psid->id_len = sizeof(ds->index); - memcpy(&psid->id, &ds->index, psid->id_len); + switch (attr->id) { + case SWITCHDEV_ATTR_PORT_PARENT_ID: + attr->u.ppid.id_len = sizeof(ds->index); + memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); + break; + default: + return -EOPNOTSUPP; + } return 0; } @@ -675,9 +699,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_get_iflink = dsa_slave_get_iflink, }; -static const struct swdev_ops dsa_slave_swdev_ops = { - .swdev_parent_id_get = dsa_slave_parent_id_get, - .swdev_port_stp_update = dsa_slave_stp_update, +static const struct switchdev_ops dsa_slave_switchdev_ops = { + .switchdev_port_attr_get = dsa_slave_port_attr_get, + .switchdev_port_attr_set = dsa_slave_port_attr_set, }; static void dsa_slave_adjust_link(struct net_device *dev) @@ -810,12 +834,19 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, return 0; } +static struct lock_class_key dsa_slave_netdev_xmit_lock_key; +static void dsa_slave_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, + &dsa_slave_netdev_xmit_lock_key); +} + int dsa_slave_suspend(struct net_device *slave_dev) { struct dsa_slave_priv *p = netdev_priv(slave_dev); - netif_device_detach(slave_dev); - if (p->phy) { phy_stop(p->phy); p->old_pause = -1; @@ -859,7 +890,10 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, eth_hw_addr_inherit(slave_dev, master); slave_dev->tx_queue_len = 0; slave_dev->netdev_ops = &dsa_slave_netdev_ops; - slave_dev->swdev_ops = &dsa_slave_swdev_ops; + slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; + + netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, + NULL); SET_NETDEV_DEV(slave_dev, parent); slave_dev->dev.of_node = ds->pd->port_dn[port]; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f3bad41d725f..9045e2a1108f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -156,10 +156,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb_reset_mac_header(skb); + + eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - eth = eth_hdr(skb); - if (unlikely(is_multicast_ether_addr(eth->h_dest))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else @@ -178,7 +179,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); - if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) + if (likely(eth_proto_is_802_3(eth->h_proto))) return eth->h_proto; /* diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index b60c65f70346..7aaaf967df58 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1014,7 +1014,7 @@ static int ieee802154_create(struct net *net, struct socket *sock, } rc = -ENOMEM; - sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto); + sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern); if (!sk) goto out; rc = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b47a4d79d04..235d36afece3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -317,7 +317,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; @@ -1430,7 +1430,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, struct net *net) { struct socket *sock; - int rc = sock_create_kern(family, type, protocol, &sock); + int rc = sock_create_kern(net, family, type, protocol, &sock); if (rc == 0) { *sk = sock->sk; @@ -1440,8 +1440,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, * we do not wish this socket to see incoming packets. */ (*sk)->sk_prot->unhash(*sk); - - sk_change_net(*sk, net); } return rc; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8d695b6659c7..28ec3c1823bf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -713,8 +713,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, struct hlist_head *dest; unsigned int new_hash; - hlist_del(&fi->fib_hash); - new_hash = fib_info_hashfn(fi); dest = &new_info_hash[new_hash]; hlist_add_head(&fi->fib_hash, dest); @@ -731,8 +729,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, struct hlist_head *ldest; unsigned int new_hash; - hlist_del(&fi->fib_lhash); - new_hash = fib_laddr_hashfn(fi->fib_prefsrc); ldest = &new_laddrhash[new_hash]; hlist_add_head(&fi->fib_lhash, ldest); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e13fcc602da2..03444c6ae342 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1165,13 +1165,13 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; - err = netdev_switch_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); + err = switchdev_fib_ipv4_add(key, plen, fi, + new_fa->fa_tos, + cfg->fc_type, + cfg->fc_nlflags, + tb->tb_id); if (err) { - netdev_switch_fib_ipv4_abort(fi); + switchdev_fib_ipv4_abort(fi); kmem_cache_free(fn_alias_kmem, new_fa); goto out; } @@ -1215,12 +1215,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->tb_id = tb->tb_id; /* (Optionally) offload fib entry to switch hardware. */ - err = netdev_switch_fib_ipv4_add(key, plen, fi, tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); + err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, + cfg->fc_nlflags, tb->tb_id); if (err) { - netdev_switch_fib_ipv4_abort(fi); + switchdev_fib_ipv4_abort(fi); goto out_free_new_fa; } @@ -1239,7 +1237,7 @@ succeeded: return 0; out_sw_fib_del: - netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); + switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1517,8 +1515,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; - netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); + switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, + cfg->fc_type, tb->tb_id); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1767,10 +1765,9 @@ void fib_table_flush_external(struct fib_table *tb) if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL)) continue; - netdev_switch_fib_ipv4_del(n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, - fa->fa_type, tb->tb_id); + switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id); } /* update leaf slen */ @@ -1835,10 +1832,9 @@ int fib_table_flush(struct fib_table *tb) continue; } - netdev_switch_fib_ipv4_del(n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, - fa->fa_type, tb->tb_id); + switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a3a697f5ffba..651cdf648ec4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1339,6 +1339,168 @@ out: } EXPORT_SYMBOL(ip_mc_inc_group); +static int ip_mc_check_iphdr(struct sk_buff *skb) +{ + const struct iphdr *iph; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph)) + return -EINVAL; + + offset += ip_hdrlen(skb) - sizeof(*iph); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + iph = ip_hdr(skb); + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + return -EINVAL; + + len = skb_network_offset(skb) + ntohs(iph->tot_len); + if (skb->len < len || len < offset) + return -EINVAL; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ip_mc_check_igmp_reportv3(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmpv3_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ip_mc_check_igmp_query(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct igmphdr); + if (skb->len < len) + return -EINVAL; + + /* IGMPv{1,2}? */ + if (skb->len != len) { + /* or IGMPv3? */ + len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!igmp_hdr(skb)->group && + ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP)) + return -EINVAL; + + return 0; +} + +static int ip_mc_check_igmp_msg(struct sk_buff *skb) +{ + switch (igmp_hdr(skb)->type) { + case IGMP_HOST_LEAVE_MESSAGE: + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + /* fall through */ + return 0; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + return ip_mc_check_igmp_reportv3(skb); + case IGMP_HOST_MEMBERSHIP_QUERY: + return ip_mc_check_igmp_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_simple_validate(skb); +} + +static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr); + int ret; + + transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ip_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ip_mc_check_igmp_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ip_mc_check_igmp - checks whether this is a sane IGMP packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) + * + * Checks whether an IPv4 packet is a valid IGMP packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an IGMP packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an IGMP packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret = ip_mc_check_iphdr(skb); + + if (ret < 0) + return ret; + + if (ip_hdr(skb)->protocol != IPPROTO_IGMP) + return -ENOMSG; + + return __ip_mc_check_igmp(skb, skb_trimmed); +} +EXPORT_SYMBOL(ip_mc_check_igmp); + /* * Resend IGMP JOIN report; used by netdev notifier. */ diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index ce63ab21b6cd..3998b1822d85 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -98,7 +98,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) return -ENOMEM; eh = (struct ethhdr *)skb->data; - if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + if (likely(eth_proto_is_802_3(eh->h_proto))) skb->protocol = eh->h_proto; else skb->protocol = htons(ETH_P_802_2); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e1f3b911dd1e..da5d483e236a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2), SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT), SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE), + SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE), + SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bff62fc87b8e..9e15f5ca4495 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -457,12 +457,9 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, } #define IP_IDENTS_SZ 2048u -struct ip_ident_bucket { - atomic_t id; - u32 stamp32; -}; -static struct ip_ident_bucket *ip_idents __read_mostly; +static atomic_t *ip_idents __read_mostly; +static u32 *ip_tstamps __read_mostly; /* In order to protect privacy, we add a perturbation to identifiers * if one generator is seldom used. This makes hard for an attacker @@ -470,15 +467,16 @@ static struct ip_ident_bucket *ip_idents __read_mostly; */ u32 ip_idents_reserve(u32 hash, int segs) { - struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; - u32 old = ACCESS_ONCE(bucket->stamp32); + u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; + atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; + u32 old = ACCESS_ONCE(*p_tstamp); u32 now = (u32)jiffies; u32 delta = 0; - if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) + if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - return atomic_add_return(segs + delta, &bucket->id) - segs; + return atomic_add_return(segs + delta, p_id) - segs; } EXPORT_SYMBOL(ip_idents_reserve); @@ -2093,7 +2091,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) goto out; } if (ipv4_is_local_multicast(fl4->daddr) || - ipv4_is_lbcast(fl4->daddr)) { + ipv4_is_lbcast(fl4->daddr) || + fl4->flowi4_proto == IPPROTO_IGMP) { if (!fl4->saddr) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); @@ -2738,6 +2737,10 @@ int __init ip_rt_init(void) prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL); + if (!ip_tstamps) + panic("IP: failed to allocate ip_tstamps\n"); + for_each_possible_cpu(cpu) { struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46efa03d2b11..ecccfdc50d76 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2482,6 +2482,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, icsk->icsk_syn_retries = val; break; + case TCP_SAVE_SYN: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->save_syn = val; + break; + case TCP_LINGER2: if (val < 0) tp->linger2 = -1; @@ -2818,6 +2825,34 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; break; + case TCP_SAVE_SYN: + val = tp->save_syn; + break; + case TCP_SAVED_SYN: { + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + if (tp->saved_syn) { + len = min_t(unsigned int, tp->saved_syn[0], len); + if (put_user(len, optlen)) { + release_sock(sk); + return -EFAULT; + } + if (copy_to_user(optval, tp->saved_syn + 1, len)) { + release_sock(sk); + return -EFAULT; + } + tcp_saved_syn_free(tp); + release_sock(sk); + } else { + release_sock(sk); + len = 0; + if (put_user(len, optlen)) + return -EFAULT; + } + return 0; + } default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bc790ea9960f..cf8b20ff6658 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sacktag_state { int reord; int fack_count; - long rtt_us; /* RTT measured by SACKing never-retransmitted data */ + /* Timestamps for earliest and latest never-retransmitted segment + * that was SACKed. RTO needs the earliest RTT to stay conservative, + * but congestion control should still get an accurate delay signal. + */ + struct skb_mstamp first_sackt; + struct skb_mstamp last_sackt; int flag; }; @@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk, state->reord); if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; - /* Pick the earliest sequence sacked for RTT */ - if (state->rtt_us < 0) { - struct skb_mstamp now; - - skb_mstamp_get(&now); - state->rtt_us = skb_mstamp_us_delta(&now, - xmit_time); - } + if (state->first_sackt.v64 == 0) + state->first_sackt = *xmit_time; + state->last_sackt = *xmit_time; } if (sacked & TCPCB_LOST) { @@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, - u32 prior_snd_una, long *sack_rtt_us) + u32 prior_snd_una, struct tcp_sacktag_state *state) { struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + @@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; - struct tcp_sacktag_state state; struct sk_buff *skb; int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3); int used_sacks; @@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, int i, j; int first_sack_index; - state.flag = 0; - state.reord = tp->packets_out; - state.rtt_us = -1L; + state->flag = 0; + state->reord = tp->packets_out; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) @@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); if (found_dup_sack) - state.flag |= FLAG_DSACKING_ACK; + state->flag |= FLAG_DSACKING_ACK; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } skb = tcp_write_queue_head(sk); - state.fack_count = 0; + state->fack_count = 0; i = 0; if (!tp->sacked_out) { @@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, /* Head todo? */ if (before(start_seq, cache->start_seq)) { - skb = tcp_sacktag_skip(skb, sk, &state, + skb = tcp_sacktag_skip(skb, sk, state, start_seq); skb = tcp_sacktag_walk(skb, sk, next_dup, - &state, + state, start_seq, cache->start_seq, dup_sack); @@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, goto advance_sp; skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, - &state, + state, cache->end_seq); /* ...tail remains todo... */ @@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state.fack_count = tp->fackets_out; + state->fack_count = tp->fackets_out; cache++; goto walk; } - skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); + skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq); /* Check overlap against next cached too (past this one already) */ cache++; continue; @@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state.fack_count = tp->fackets_out; + state->fack_count = tp->fackets_out; } - skb = tcp_sacktag_skip(skb, sk, &state, start_seq); + skb = tcp_sacktag_skip(skb, sk, state, start_seq); walk: - skb = tcp_sacktag_walk(skb, sk, next_dup, &state, + skb = tcp_sacktag_walk(skb, sk, next_dup, state, start_seq, end_seq, dup_sack); advance_sp: @@ -1820,9 +1818,9 @@ advance_sp: for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; - if ((state.reord < tp->fackets_out) && + if ((state->reord < tp->fackets_out) && ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) - tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); + tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); tcp_mark_lost_retrans(sk); tcp_verify_left_out(tp); @@ -1834,8 +1832,7 @@ out: WARN_ON((int)tp->retrans_out < 0); WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif - *sack_rtt_us = state.rtt_us; - return state.flag; + return state->flag; } /* Limits sacked_out so that sum with lost_out isn't ever larger than @@ -3052,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, * arrived at the other end. */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, long sack_rtt_us) + u32 prior_snd_una, + struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); struct skb_mstamp first_ackt, last_ackt, now; @@ -3060,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; bool fully_acked = true; - long ca_seq_rtt_us = -1L; + long sack_rtt_us = -1L; long seq_rtt_us = -1L; + long ca_rtt_us = -1L; struct sk_buff *skb; u32 pkts_acked = 0; bool rtt_update; @@ -3150,15 +3149,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, skb_mstamp_get(&now); if (likely(first_ackt.v64)) { seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); - ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + } + if (sack->first_sackt.v64) { + sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); + ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); } rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); if (flag & FLAG_ACKED) { - const struct tcp_congestion_ops *ca_ops - = inet_csk(sk)->icsk_ca_ops; - tcp_rearm_rto(sk); if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { @@ -3181,11 +3181,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); - if (ca_ops->pkts_acked) { - long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us); - ca_ops->pkts_acked(sk, pkts_acked, rtt_us); - } - } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent @@ -3195,6 +3190,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_rearm_rto(sk); } + if (icsk->icsk_ca_ops->pkts_acked) + icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us); + #if FASTRETRANS_DEBUG > 0 WARN_ON((int)tp->sacked_out < 0); WARN_ON((int)tp->lost_out < 0); @@ -3235,7 +3233,7 @@ static void tcp_ack_probe(struct sock *sk) * This function is not for random using! */ } else { - unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); @@ -3459,6 +3457,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sacktag_state sack_state; u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -3467,7 +3466,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets = tp->packets_out; const int prior_unsacked = tp->packets_out - tp->sacked_out; int acked = 0; /* Number of packets newly acked */ - long sack_rtt_us = -1L; + + sack_state.first_sackt.v64 = 0; /* We very likely will need to access write queue head. */ prefetchw(sk->sk_write_queue.next); @@ -3531,7 +3531,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt_us); + &sack_state); if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { flag |= FLAG_ECE; @@ -3556,7 +3556,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ acked = tp->packets_out; flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, - sack_rtt_us); + &sack_state); acked -= tp->packets_out; /* Advance cwnd if state allows */ @@ -3608,7 +3608,7 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_rtt_us); + &sack_state); tcp_fastretrans_alert(sk, acked, prior_unsacked, is_dupack, flag); } @@ -6060,6 +6060,23 @@ static bool tcp_syn_flood_action(struct sock *sk, return want_cookie; } +static void tcp_reqsk_record_syn(const struct sock *sk, + struct request_sock *req, + const struct sk_buff *skb) +{ + if (tcp_sk(sk)->save_syn) { + u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb); + u32 *copy; + + copy = kmalloc(len + sizeof(u32), GFP_ATOMIC); + if (copy) { + copy[0] = len; + memcpy(©[1], skb_network_header(skb), len); + req->saved_syn = copy; + } + } +} + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6192,6 +6209,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } + tcp_reqsk_record_syn(sk, req, skb); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc1c658ec6c1..91cb4768a860 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1802,6 +1802,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); + tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); sock_release_memcg(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e5d7649136fc..ebe2ab2596ed 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -536,6 +536,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; + newtp->saved_syn = req->saved_syn; + req->saved_syn = NULL; + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } return newsk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a369e8a70b2c..7386d32cd670 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3382,7 +3382,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack); * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ -static int tcp_xmit_probe_skb(struct sock *sk, int urgent) +static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -3400,6 +3400,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) */ tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); + NET_INC_STATS_BH(sock_net(sk), mib); return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } @@ -3407,12 +3408,12 @@ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; - tcp_xmit_probe_skb(sk, 0); + tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); } } /* Initiate keepalive or window probe from timer. */ -int tcp_write_wakeup(struct sock *sk) +int tcp_write_wakeup(struct sock *sk, int mib) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -3449,8 +3450,8 @@ int tcp_write_wakeup(struct sock *sk) return err; } else { if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) - tcp_xmit_probe_skb(sk, 1); - return tcp_xmit_probe_skb(sk, 0); + tcp_xmit_probe_skb(sk, 1, mib); + return tcp_xmit_probe_skb(sk, 0, mib); } } @@ -3464,7 +3465,7 @@ void tcp_send_probe0(struct sock *sk) unsigned long probe_max; int err; - err = tcp_write_wakeup(sk); + err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); if (tp->packets_out || !tcp_send_head(sk)) { /* Cancel probe timer, if it is not required. */ @@ -3490,7 +3491,7 @@ void tcp_send_probe0(struct sock *sk) probe_max = TCP_RESOURCE_PROBE_INTERVAL; } inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - inet_csk_rto_backoff(icsk, probe_max), + tcp_probe0_when(sk, probe_max), TCP_RTO_MAX); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8c65dc147d8b..65bf670e8714 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data) tcp_write_err(sk); goto out; } - if (tcp_write_wakeup(sk) <= 0) { + if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { icsk->icsk_probes_out++; elapsed = keepalive_intvl_when(tp); } else { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6bb98cc193c9..933ea903f7b8 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -15,12 +15,10 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket *sock = NULL; struct sockaddr_in udp_addr; - err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = cfg->local_udp_port; @@ -47,7 +45,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; @@ -101,7 +99,7 @@ void udp_tunnel_sock_release(struct socket *sock) { rcu_assign_sk_user_data(sock->sk, NULL); kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2e8c06108ab9..0f3f1999719a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o +obj-y += mcast_snoop.o endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 37b70e82bff8..21c2c818df3b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); if (!fn) goto out; + + noflags |= RTF_CACHE; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->dst.dev->ifindex != dev->ifindex) continue; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eef63b394c5a..f3866c0b6cfe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -167,7 +167,7 @@ lookup_protocol: WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; @@ -768,6 +768,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.auto_flowlabels = 0; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; + net->ipv6.sysctl.flowlabel_state_ranges = 1; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index d491125011c4..1f9ebe3cbb4a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -595,6 +595,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; + if (net->ipv6.sysctl.flowlabel_state_ranges && + (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) + return -ERANGE; + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (!fl) return err; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index bba8903e871f..e1a1136bda7c 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, int err; struct socket *sock = NULL; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -55,7 +53,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c new file mode 100644 index 000000000000..df8afe5ab31e --- /dev/null +++ b/net/ipv6/mcast_snoop.c @@ -0,0 +1,213 @@ +/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * + * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki. + */ + +#include <linux/skbuff.h> +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/addrconf.h> +#include <net/ip6_checksum.h> + +static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + if (ip6h->version != 6) + return -EINVAL; + + len = offset + ntohs(ip6h->payload_len); + if (skb->len < len || len <= offset) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_exthdrs(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + int offset; + u8 nexthdr; + __be16 frag_off; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_HOPOPTS) + return -ENOMSG; + + nexthdr = ip6h->nexthdr; + offset = skb_network_offset(skb) + sizeof(*ip6h); + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + + if (offset < 0) + return -EINVAL; + + if (nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct mld2_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ipv6_mc_check_mld_query(struct sk_buff *skb) +{ + struct mld_msg *mld; + unsigned int len = skb_transport_offset(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + len += sizeof(struct mld_msg); + if (skb->len < len) + return -EINVAL; + + /* MLDv1? */ + if (skb->len != len) { + /* or MLDv2? */ + len += sizeof(struct mld2_query) - sizeof(struct mld_msg); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + mld = (struct mld_msg *)skb_transport_header(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (ipv6_addr_any(&mld->mld_mca) && + !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_mld_msg(struct sk_buff *skb) +{ + struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + + switch (mld->mld_type) { + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MGM_REPORT: + /* fall through */ + return 0; + case ICMPV6_MLD2_REPORT: + return ipv6_mc_check_mld_reportv2(skb); + case ICMPV6_MGM_QUERY: + return ipv6_mc_check_mld_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); +} + +static int __ipv6_mc_check_mld(struct sk_buff *skb, + struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk = NULL; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + int ret; + + transport_len = ntohs(ipv6_hdr(skb)->payload_len); + transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ipv6_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ipv6_mc_check_mld_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ipv6_mc_check_mld - checks whether this is a sane MLD packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) + * + * Checks whether an IPv6 packet is a valid MLD packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an MLD packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an MLD packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret; + + ret = ipv6_mc_check_ip6hdr(skb); + if (ret < 0) + return ret; + + ret = ipv6_mc_check_exthdrs(skb); + if (ret < 0) + return ret; + + return __ipv6_mc_check_mld(skb, skb_trimmed); +} +EXPORT_SYMBOL(ipv6_mc_check_mld); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d3588885f097..6f4a35096bde 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void rt6_dst_from_metrics_check(struct rt6_info *rt); static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO @@ -104,65 +105,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif -static void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - -static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) -{ - if (rt6_has_peer(rt)) - return rt6_peer_ptr(rt); - - rt6_bind_peer(rt, create); - return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); -} - -static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) -{ - return __rt6_get_peer(rt, 1); -} - static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { - struct rt6_info *rt = (struct rt6_info *) dst; - struct inet_peer *peer; - u32 *p = NULL; + struct rt6_info *rt = (struct rt6_info *)dst; - if (!(rt->dst.flags & DST_HOST)) + if (rt->rt6i_flags & RTF_CACHE) + return NULL; + else return dst_cow_metrics_generic(dst, old); - - peer = rt6_get_peer_create(rt); - if (peer) { - u32 *old_p = __DST_METRICS_PTR(old); - unsigned long prev, new; - - p = peer->metrics; - if (inet_metrics_new(peer) || - (old & DST_METRICS_FORCE_OVERWRITE)) - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); - - new = (unsigned long) p; - prev = cmpxchg(&dst->_metrics, old, new); - - if (prev != old) { - p = __DST_METRICS_PTR(prev); - if (prev & DST_METRICS_READ_ONLY) - p = NULL; - } - } - return p; } static inline const void *choose_neigh_daddr(struct rt6_info *rt, @@ -311,7 +261,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); INIT_LIST_HEAD(&rt->rt6i_siblings); } return rt; @@ -323,8 +272,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct dst_entry *from = dst->from; - if (!(rt->dst.flags & DST_HOST)) - dst_destroy_metrics_generic(dst); + dst_destroy_metrics_generic(dst); if (idev) { rt->rt6i_idev = NULL; @@ -333,11 +281,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) dst->from = NULL; dst_release(from); - - if (rt6_has_peer(rt)) { - struct inet_peer *peer = rt6_peer_ptr(rt); - inet_putpeer(peer); - } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -652,15 +595,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match; + struct rt6_info *rt, *match, *cont; int mpri = -1; match = NULL; - for (rt = rr_head; rt && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + cont = NULL; + for (rt = rr_head; rt; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + match = find_match(rt, oif, strict, &mpri, match, do_rr); - for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + } + + for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + + match = find_match(rt, oif, strict, &mpri, match, do_rr); + } + + if (match || !cont) + return match; + + for (rt = cont; rt; rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; @@ -959,7 +920,7 @@ redo_rt6_select: if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); - else if (!(rt->dst.flags & DST_HOST)) + else if (!(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL)) nrt = rt6_alloc_clone(rt, &fl6->daddr); else goto out2; @@ -985,6 +946,7 @@ redo_rt6_select: goto redo_fib6_lookup_lock; out2: + rt6_dst_from_metrics_check(rt); rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1059,7 +1021,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new = &rt->dst; memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); - rt6_init_peer(rt, net->ipv6.peers); new->__use = 1; new->input = dst_discard; @@ -1093,6 +1054,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ +static void rt6_dst_from_metrics_check(struct rt6_info *rt) +{ + if (rt->dst.from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); +} + static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { struct rt6_info *rt; @@ -1109,6 +1077,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt6_check_expired(rt)) return NULL; + rt6_dst_from_metrics_check(rt); + return dst; } @@ -1154,14 +1124,14 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct rt6_info *rt6 = (struct rt6_info *)dst; dst_confirm(dst); - if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { + if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) { struct net *net = dev_net(dst->dev); rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - dst_metric_set(dst, RTAX_MTU, mtu); + rt6->rt6i_pmtu = mtu; rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } @@ -1341,12 +1311,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + goto out; + mtu = IPV6_MIN_MTU; rcu_read_lock(); @@ -1590,10 +1565,8 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) { + if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; - dst_metrics_set_force_overwrite(&rt->dst); - } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1785,6 +1758,9 @@ static int ip6_route_del(struct fib6_config *cfg) if (fn) { for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + if ((rt->rt6i_flags & RTF_CACHE) && + !(cfg->fc_flags & RTF_CACHE)) + continue; if (cfg->fc_ifindex && (!rt->dst.dev || rt->dst.dev->ifindex != cfg->fc_ifindex)) @@ -1926,12 +1902,27 @@ out: * Misc support functions */ +static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) +{ + BUG_ON(from->dst.from); + + rt->rt6i_flags &= ~RTF_EXPIRES; + dst_hold(&from->dst); + rt->dst.from = &from->dst; + dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); +} + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, - ort->rt6i_table); + struct rt6_info *rt; + + if (ort->rt6i_flags & RTF_CACHE) + ort = (struct rt6_info *)ort->dst.from; + + rt = ip6_dst_alloc(net, ort->dst.dev, 0, + ort->rt6i_table); if (rt) { rt->dst.input = ort->dst.input; @@ -1940,7 +1931,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, rt->rt6i_dst.addr = *dest; rt->rt6i_dst.plen = 128; - dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) @@ -2373,11 +2363,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU) && - (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->rt6i_flags & RTF_CACHE) { + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) + rt->rt6i_pmtu = arg->mtu; + } else if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + } } return 0; } @@ -2434,6 +2433,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; + if (rtm->rtm_flags & RTM_F_CLONED) + cfg->fc_flags |= RTF_CACHE; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2604,6 +2606,7 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { + u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; @@ -2717,7 +2720,10 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); + if (rt->rt6i_pmtu) + metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; + if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index abcc79f649b3..4e705add4f18 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -68,6 +68,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "flowlabel_state_ranges", + .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -109,6 +116,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; + ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f337a908a76a..6ae256b4c55a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -71,13 +71,6 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } -static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) -{ - struct rt6_info *rt = (struct rt6_info *)xdst; - - rt6_init_peer(rt, net->ipv6.peers); -} - static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -106,8 +99,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return -ENODEV; } - rt6_transfer_peer(&xdst->u.rt6, rt); - /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | @@ -255,10 +246,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (rt6_has_peer(&xdst->u.rt6)) { - struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); - inet_putpeer(peer); - } xfrm_dst_destroy(xdst); } @@ -308,7 +295,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, - .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 4ea5d7497b5f..48d0dc89b58d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1347,7 +1347,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOMEM; - sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern); if (!sk) goto out; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ee0ea25c8e7a..fae6822cc367 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1100,7 +1100,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol, } /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto); + sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6daa52a18d40..918151c11348 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -535,12 +535,12 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent) sk->sk_type = parent->sk_type; } -static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, int kern) { struct sock *sk; struct iucv_sock *iucv; - sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); + sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, kern); if (!sk) return NULL; iucv = iucv_sk(sk); @@ -602,7 +602,7 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL); + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); if (!sk) return -ENOMEM; @@ -1723,7 +1723,7 @@ static int iucv_callback_connreq(struct iucv_path *path, } /* Create the new socket */ - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); if (!nsk) { err = pr_iucv->path_sever(path, user_data); iucv_path_free(path); @@ -1933,7 +1933,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb) goto out; } - nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); + nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0); bh_lock_sock(sk); if ((sk->sk_state != IUCV_LISTEN) || sk_acceptq_is_full(sk) || diff --git a/net/key/af_key.c b/net/key/af_key.c index f0d52d721b3a..9e834ec475a9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -149,7 +149,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) goto out; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a29a504492af..f6b090df3930 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1334,9 +1334,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work) if (sock) inet_shutdown(sock, 2); } else { - if (sock) + if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sk); + sock_release(sock); + } } l2tp_tunnel_sock_put(sk); @@ -1399,13 +1400,11 @@ static int l2tp_tunnel_sock_create(struct net *net, if (cfg->local_ip6 && cfg->peer_ip6) { struct sockaddr_l2tpip6 ip6_addr = {0}; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); @@ -1429,13 +1428,11 @@ static int l2tp_tunnel_sock_create(struct net *net, { struct sockaddr_l2tpip ip_addr = {0}; - err = sock_create_kern(AF_INET, SOCK_DGRAM, + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; - sk_change_net(sock->sk, net); - ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; @@ -1462,7 +1459,7 @@ out: *sockp = sock; if ((err < 0) && sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); *sockp = NULL; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e9b0dec56b8e..f56c9f69e9f2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -542,12 +542,12 @@ static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb) /* socket() handler. Initialize a new struct sock. */ -static int pppol2tp_create(struct net *net, struct socket *sock) +static int pppol2tp_create(struct net *net, struct socket *sock, int kern) { int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, kern); if (!sk) goto out; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 17a8dff06090..8fd9febaa5ba 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -168,7 +168,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol, if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 81a61fce3afb..3e821daf9dd4 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -768,7 +768,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk, struct llc_addr *daddr) { struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC, - sk->sk_prot); + sk->sk_prot, 0); struct llc_sock *newllc, *llc = llc_sk(sk); if (!newsk) @@ -931,9 +931,9 @@ static void llc_sk_init(struct sock *sk) * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern) { - struct sock *sk = sk_alloc(net, family, priority, prot); + struct sock *sk = sk_alloc(net, family, priority, prot, kern); if (!sk) goto out; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 64a012a0c6e5..086de496a4c1 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -302,6 +302,20 @@ config MAC80211_DEBUG_COUNTERS ---help--- Selecting this option causes mac80211 to keep additional and very verbose statistics about TX and RX handler use - and show them in debugfs. + as well as a few selected dot11 counters. These will be + exposed in debugfs. + + Note that some of the counters are not concurrency safe + and may thus not always be accurate. If unsure, say N. + +config MAC80211_STA_HASH_MAX_SIZE + int "Station hash table maximum size" if MAC80211_DEBUG_MENU + default 0 + ---help--- + Setting this option to a low value (e.g. 4) allows testing the + hash table with collisions relatively deterministically (just + connect more stations than the number selected here.) + + If unsure, leave the default of 0. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 265e42721a66..3469bbdc891c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -137,6 +137,9 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata->noack_map = noack_map; + + ieee80211_check_fast_xmit_iface(sdata); + return 0; } @@ -309,6 +312,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u32 iv32; u16 iv16; int err = -ENOENT; + struct ieee80211_key_seq kseq = {}; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -339,10 +343,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, iv32 = key->u.tkip.tx.iv32; iv16 = key->u.tkip.tx.iv16; - if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) - drv_get_tkip_seq(sdata->local, - key->conf.hw_key_idx, - &iv32, &iv16); + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + iv32 = kseq.tkip.iv32; + iv16 = kseq.tkip.iv16; + } seq[0] = iv16 & 0xff; seq[1] = (iv16 >> 8) & 0xff; @@ -355,52 +361,85 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: - pn64 = atomic64_read(&key->u.ccmp.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.ccmp.pn, 6); + } else { + pn64 = atomic64_read(&key->u.ccmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: - pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.aes_cmac.pn, 6); + } else { + pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: - pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.aes_gmac.pn, 6); + } else { + pn64 = atomic64_read(&key->u.aes_gmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - pn64 = atomic64_read(&key->u.gcmp.tx_pn); - seq[0] = pn64; - seq[1] = pn64 >> 8; - seq[2] = pn64 >> 16; - seq[3] = pn64 >> 24; - seq[4] = pn64 >> 32; - seq[5] = pn64 >> 40; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + drv_get_key_seq(sdata->local, key, &kseq); + memcpy(seq, kseq.gcmp.pn, 6); + } else { + pn64 = atomic64_read(&key->u.gcmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; + } params.seq = seq; params.seq_len = 6; break; + default: + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + break; + if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) + break; + drv_get_key_seq(sdata->local, key, &kseq); + params.seq = kseq.hw.seq; + params.seq_len = kseq.hw.seq_len; + break; } params.key = key->conf.key; @@ -2099,10 +2138,14 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) int err; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { + ieee80211_check_fast_xmit_all(local); + err = drv_set_frag_threshold(local, wiphy->frag_threshold); - if (err) + if (err) { + ieee80211_check_fast_xmit_all(local); return err; + } } if ((changed & WIPHY_PARAM_COVERAGE_CLASS) || @@ -3336,8 +3379,14 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (!sdata->u.mgd.associated) + sdata_lock(sdata); + if (!sdata->u.mgd.associated || + (params->offchan && params->wait && + local->ops->remain_on_channel && + memcmp(sdata->u.mgd.associated->bssid, + mgmt->bssid, ETH_ALEN))) need_offchan = true; + sdata_unlock(sdata); break; case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 5bcd4e5589d3..7e9b62475400 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -664,6 +664,8 @@ out: ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + ieee80211_check_fast_xmit_iface(sdata); + return ret; } @@ -1030,6 +1032,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (sdata->vif.type == NL80211_IFTYPE_AP) __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + ieee80211_check_fast_xmit_iface(sdata); + if (ieee80211_chanctx_refcount(local, old_ctx) == 0) ieee80211_free_chanctx(local, old_ctx); @@ -1376,6 +1380,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); + ieee80211_check_fast_xmit_iface(sdata); + sdata->radar_required = sdata->reserved_radar_required; if (sdata->vif.bss_conf.chandef.width != diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 23813ebb349c..b17206db49b4 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -219,8 +219,8 @@ static const struct file_operations stats_ ##name## _ops = { \ .llseek = generic_file_llseek, \ }; -#define DEBUGFS_STATS_ADD(name, field) \ - debugfs_create_u32(#name, 0400, statsd, (u32 *) &field); +#define DEBUGFS_STATS_ADD(name) \ + debugfs_create_u32(#name, 0400, statsd, &local->name); #define DEBUGFS_DEVSTATS_ADD(name) \ debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); @@ -255,53 +255,31 @@ void debugfs_hw_add(struct ieee80211_local *local) if (!statsd) return; - DEBUGFS_STATS_ADD(transmitted_fragment_count, - local->dot11TransmittedFragmentCount); - DEBUGFS_STATS_ADD(multicast_transmitted_frame_count, - local->dot11MulticastTransmittedFrameCount); - DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount); - DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount); - DEBUGFS_STATS_ADD(multiple_retry_count, - local->dot11MultipleRetryCount); - DEBUGFS_STATS_ADD(frame_duplicate_count, - local->dot11FrameDuplicateCount); - DEBUGFS_STATS_ADD(received_fragment_count, - local->dot11ReceivedFragmentCount); - DEBUGFS_STATS_ADD(multicast_received_frame_count, - local->dot11MulticastReceivedFrameCount); - DEBUGFS_STATS_ADD(transmitted_frame_count, - local->dot11TransmittedFrameCount); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop); - DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued); - DEBUGFS_STATS_ADD(tx_handlers_drop_fragment, - local->tx_handlers_drop_fragment); - DEBUGFS_STATS_ADD(tx_handlers_drop_wep, - local->tx_handlers_drop_wep); - DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc, - local->tx_handlers_drop_not_assoc); - DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port, - local->tx_handlers_drop_unauth_port); - DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop); - DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued); - DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc, - local->rx_handlers_drop_nullfunc); - DEBUGFS_STATS_ADD(rx_handlers_drop_defrag, - local->rx_handlers_drop_defrag); - DEBUGFS_STATS_ADD(rx_handlers_drop_short, - local->rx_handlers_drop_short); - DEBUGFS_STATS_ADD(tx_expand_skb_head, - local->tx_expand_skb_head); - DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned, - local->tx_expand_skb_head_cloned); - DEBUGFS_STATS_ADD(rx_expand_skb_head, - local->rx_expand_skb_head); - DEBUGFS_STATS_ADD(rx_expand_skb_head2, - local->rx_expand_skb_head2); - DEBUGFS_STATS_ADD(rx_handlers_fragments, - local->rx_handlers_fragments); - DEBUGFS_STATS_ADD(tx_status_drop, - local->tx_status_drop); + DEBUGFS_STATS_ADD(dot11TransmittedFragmentCount); + DEBUGFS_STATS_ADD(dot11MulticastTransmittedFrameCount); + DEBUGFS_STATS_ADD(dot11FailedCount); + DEBUGFS_STATS_ADD(dot11RetryCount); + DEBUGFS_STATS_ADD(dot11MultipleRetryCount); + DEBUGFS_STATS_ADD(dot11FrameDuplicateCount); + DEBUGFS_STATS_ADD(dot11ReceivedFragmentCount); + DEBUGFS_STATS_ADD(dot11MulticastReceivedFrameCount); + DEBUGFS_STATS_ADD(dot11TransmittedFrameCount); + DEBUGFS_STATS_ADD(tx_handlers_drop); + DEBUGFS_STATS_ADD(tx_handlers_queued); + DEBUGFS_STATS_ADD(tx_handlers_drop_wep); + DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc); + DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port); + DEBUGFS_STATS_ADD(rx_handlers_drop); + DEBUGFS_STATS_ADD(rx_handlers_queued); + DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc); + DEBUGFS_STATS_ADD(rx_handlers_drop_defrag); + DEBUGFS_STATS_ADD(rx_handlers_drop_short); + DEBUGFS_STATS_ADD(tx_expand_skb_head); + DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned); + DEBUGFS_STATS_ADD(rx_expand_skb_head_defrag); + DEBUGFS_STATS_ADD(rx_handlers_fragments); + DEBUGFS_STATS_ADD(tx_status_drop); #endif DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount); DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 252859e90e8a..06d52935036d 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -29,8 +29,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \ format_string, sta->field); \ } #define STA_READ_D(name, field) STA_READ(name, field, "%d\n") -#define STA_READ_U(name, field) STA_READ(name, field, "%u\n") -#define STA_READ_S(name, field) STA_READ(name, field, "%s\n") #define STA_OPS(name) \ static const struct file_operations sta_ ##name## _ops = { \ @@ -52,10 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_OPS(name) STA_FILE(aid, sta.aid, D); -STA_FILE(dev, sdata->name, S); -STA_FILE(last_signal, last_signal, D); STA_FILE(last_ack_signal, last_ack_signal, D); -STA_FILE(beacon_loss_count, beacon_loss_count, D); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -101,40 +96,6 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file, } STA_OPS(num_ps_buf_frames); -static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - return mac80211_format_buffer(userbuf, count, ppos, "%d\n", - jiffies_to_msecs(jiffies - sta->last_rx)); -} -STA_OPS(inactive_ms); - - -static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct timespec uptime; - struct tm result; - long connected_time_secs; - char buf[100]; - int res; - ktime_get_ts(&uptime); - connected_time_secs = uptime.tv_sec - sta->last_connected; - time_to_tm(connected_time_secs, 0, &result); - result.tm_year -= 70; - result.tm_mday -= 1; - res = scnprintf(buf, sizeof(buf), - "years - %ld\nmonths - %d\ndays - %d\nclock - %d:%d:%d\n\n", - result.tm_year, result.tm_mon, result.tm_mday, - result.tm_hour, result.tm_min, result.tm_sec); - return simple_read_from_buffer(userbuf, count, ppos, buf, res); -} -STA_OPS(connected_time); - - - static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -359,37 +320,6 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, } STA_OPS(vht_capa); -static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct rate_info rinfo; - u16 rate; - sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo); - rate = cfg80211_calculate_bitrate(&rinfo); - - return mac80211_format_buffer(userbuf, count, ppos, - "%d.%d MBit/s\n", - rate/10, rate%10); -} -STA_OPS(current_tx_rate); - -static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct sta_info *sta = file->private_data; - struct rate_info rinfo; - u16 rate; - - sta_set_rate_info_rx(sta, &rinfo); - - rate = cfg80211_calculate_bitrate(&rinfo); - - return mac80211_format_buffer(userbuf, count, ppos, - "%d.%d MBit/s\n", - rate/10, rate%10); -} -STA_OPS(last_rx_rate); #define DEBUGFS_ADD(name) \ debugfs_create_file(#name, 0400, \ @@ -432,30 +362,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(flags); DEBUGFS_ADD(num_ps_buf_frames); - DEBUGFS_ADD(inactive_ms); - DEBUGFS_ADD(connected_time); DEBUGFS_ADD(last_seq_ctrl); DEBUGFS_ADD(agg_status); - DEBUGFS_ADD(dev); - DEBUGFS_ADD(last_signal); - DEBUGFS_ADD(beacon_loss_count); DEBUGFS_ADD(ht_capa); DEBUGFS_ADD(vht_capa); DEBUGFS_ADD(last_ack_signal); - DEBUGFS_ADD(current_tx_rate); - DEBUGFS_ADD(last_rx_rate); - DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); - DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); - DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes); - DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes); DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); - DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped); - DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments); DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); - DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed); - DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 26e1ca8a474a..c01e681b90fb 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -417,12 +417,13 @@ static inline int drv_get_stats(struct ieee80211_local *local, return ret; } -static inline void drv_get_tkip_seq(struct ieee80211_local *local, - u8 hw_key_idx, u32 *iv32, u16 *iv16) +static inline void drv_get_key_seq(struct ieee80211_local *local, + struct ieee80211_key *key, + struct ieee80211_key_seq *seq) { - if (local->ops->get_tkip_seq) - local->ops->get_tkip_seq(&local->hw, hw_key_idx, iv32, iv16); - trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); + if (local->ops->get_key_seq) + local->ops->get_key_seq(&local->hw, &key->conf, seq); + trace_drv_get_key_seq(local, &key->conf); } static inline int drv_set_frag_threshold(struct ieee80211_local *local, diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 52bcea6ad9e8..188faab11c24 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -38,7 +38,7 @@ static void ieee80211_get_ringparam(struct net_device *dev, static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { "rx_packets", "rx_bytes", "rx_duplicates", "rx_fragments", "rx_dropped", - "tx_packets", "tx_bytes", "tx_fragments", + "tx_packets", "tx_bytes", "tx_filtered", "tx_retry_failed", "tx_retries", "beacon_loss", "sta_state", "txrate", "rxrate", "signal", "channel", "noise", "ch_time", "ch_time_busy", @@ -87,7 +87,6 @@ static void ieee80211_get_stats(struct net_device *dev, \ data[i++] += sinfo.tx_packets; \ data[i++] += sinfo.tx_bytes; \ - data[i++] += sta->tx_fragments; \ data[i++] += sta->tx_filtered_count; \ data[i++] += sta->tx_retry_failed; \ data[i++] += sta->tx_retry_count; \ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ab46ab4a7249..241b74f3bd81 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -181,8 +181,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result; /** * enum ieee80211_packet_rx_flags - packet RX flags - * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed - * (incl. multicast frames) * @IEEE80211_RX_FRAGMENTED: fragmented frame * @IEEE80211_RX_AMSDU: a-MSDU packet * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed @@ -192,7 +190,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result; * @rx_flags field of &struct ieee80211_rx_status. */ enum ieee80211_packet_rx_flags { - IEEE80211_RX_RA_MATCH = BIT(1), IEEE80211_RX_FRAGMENTED = BIT(2), IEEE80211_RX_AMSDU = BIT(3), IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4), @@ -725,7 +722,6 @@ struct ieee80211_if_mesh { * enum ieee80211_sub_if_data_flags - virtual interface flags * * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets - * @IEEE80211_SDATA_PROMISC: interface is promisc * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between * associated stations and deliver multicast frames both @@ -735,7 +731,6 @@ struct ieee80211_if_mesh { */ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_ALLMULTI = BIT(0), - IEEE80211_SDATA_PROMISC = BIT(1), IEEE80211_SDATA_OPERATING_GMODE = BIT(2), IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), @@ -1211,8 +1206,8 @@ struct ieee80211_local { atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES]; - /* number of interfaces with corresponding IFF_ flags */ - atomic_t iff_allmultis, iff_promiscs; + /* number of interfaces with allmulti RX */ + atomic_t iff_allmultis; struct rate_control_ref *rate_ctrl; @@ -1264,6 +1259,15 @@ struct ieee80211_local { struct list_head chanctx_list; struct mutex chanctx_mtx; +#ifdef CONFIG_MAC80211_LEDS + struct led_trigger tx_led, rx_led, assoc_led, radio_led; + struct led_trigger tpt_led; + atomic_t tx_led_active, rx_led_active, assoc_led_active; + atomic_t radio_led_active, tpt_led_active; + struct tpt_led_trigger *tpt_led_trigger; +#endif + +#ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* SNMP counters */ /* dot11CountersTable */ u32 dot11TransmittedFragmentCount; @@ -1276,18 +1280,9 @@ struct ieee80211_local { u32 dot11MulticastReceivedFrameCount; u32 dot11TransmittedFrameCount; -#ifdef CONFIG_MAC80211_LEDS - struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led; - struct tpt_led_trigger *tpt_led_trigger; - char tx_led_name[32], rx_led_name[32], - assoc_led_name[32], radio_led_name[32]; -#endif - -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; unsigned int tx_handlers_queued; - unsigned int tx_handlers_drop_fragment; unsigned int tx_handlers_drop_wep; unsigned int tx_handlers_drop_not_assoc; unsigned int tx_handlers_drop_unauth_port; @@ -1298,8 +1293,7 @@ struct ieee80211_local { unsigned int rx_handlers_drop_short; unsigned int tx_expand_skb_head; unsigned int tx_expand_skb_head_cloned; - unsigned int rx_expand_skb_head; - unsigned int rx_expand_skb_head2; + unsigned int rx_expand_skb_head_defrag; unsigned int rx_handlers_fragments; unsigned int tx_status_drop; #define I802_DEBUG_INC(c) (c)++ @@ -1651,6 +1645,11 @@ struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); +void ieee80211_check_fast_xmit(struct sta_info *sta); +void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); +void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); +void ieee80211_clear_fast_xmit(struct sta_info *sta); + /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bab5c63c0bad..4ee8fea263ed 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -697,9 +697,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (sdata->flags & IEEE80211_SDATA_ALLMULTI) atomic_inc(&local->iff_allmultis); - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_inc(&local->iff_promiscs); - if (coming_up) local->open_count++; @@ -829,13 +826,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1))); - /* don't count this interface for promisc/allmulti while it is down */ + /* don't count this interface for allmulti while it is down */ if (sdata->flags & IEEE80211_SDATA_ALLMULTI) atomic_dec(&local->iff_allmultis); - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_dec(&local->iff_promiscs); - if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll--; local->fif_probe_req--; @@ -1049,12 +1043,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - int allmulti, promisc, sdata_allmulti, sdata_promisc; + int allmulti, sdata_allmulti; allmulti = !!(dev->flags & IFF_ALLMULTI); - promisc = !!(dev->flags & IFF_PROMISC); sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); - sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC); if (allmulti != sdata_allmulti) { if (dev->flags & IFF_ALLMULTI) @@ -1064,13 +1056,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) sdata->flags ^= IEEE80211_SDATA_ALLMULTI; } - if (promisc != sdata_promisc) { - if (dev->flags & IFF_PROMISC) - atomic_inc(&local->iff_promiscs); - else - atomic_dec(&local->iff_promiscs); - sdata->flags ^= IEEE80211_SDATA_PROMISC; - } spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); @@ -1111,6 +1096,35 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev, return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } +static struct rtnl_link_stats64 * +ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *tstats; + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + tstats = per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_irq(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->tx_packets += tx_packets; + stats->rx_bytes += rx_bytes; + stats->tx_bytes += tx_bytes; + } + + return stats; +} + static const struct net_device_ops ieee80211_dataif_ops = { .ndo_open = ieee80211_open, .ndo_stop = ieee80211_stop, @@ -1120,6 +1134,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, + .ndo_get_stats64 = ieee80211_get_stats64, }; static u16 ieee80211_monitor_select_queue(struct net_device *dev, @@ -1153,14 +1168,21 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, + .ndo_get_stats64 = ieee80211_get_stats64, }; +static void ieee80211_if_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + static void ieee80211_if_setup(struct net_device *dev) { ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &ieee80211_dataif_ops; - dev->destructor = free_netdev; + dev->destructor = ieee80211_if_free; } static void ieee80211_iface_work(struct work_struct *work) @@ -1701,6 +1723,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, return -ENOMEM; dev_net_set(ndev, wiphy_net(local->hw.wiphy)); + ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!ndev->tstats) { + free_netdev(ndev); + return -ENOMEM; + } + ndev->needed_headroom = local->tx_headroom + 4*6 /* four MAC addresses */ + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 2291cd730091..2e677376c958 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -229,6 +229,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, if (uni) { rcu_assign_pointer(sdata->default_unicast_key, key); + ieee80211_check_fast_xmit_iface(sdata); drv_set_default_unicast_key(sdata->local, sdata, idx); } @@ -298,6 +299,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (pairwise) { rcu_assign_pointer(sta->ptk[idx], new); sta->ptk_idx = idx; + ieee80211_check_fast_xmit(sta); } else { rcu_assign_pointer(sta->gtk[idx], new); sta->gtk_idx = idx; @@ -483,15 +485,17 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, break; default: if (cs) { - size_t len = (seq_len > MAX_PN_LEN) ? - MAX_PN_LEN : seq_len; + if (seq_len && seq_len != cs->pn_len) { + kfree(key); + return ERR_PTR(-EINVAL); + } key->conf.iv_len = cs->hdr_len; key->conf.icv_len = cs->mic_len; for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - for (j = 0; j < len; j++) + for (j = 0; j < seq_len; j++) key->u.gen.rx_pn[i][j] = - seq[len - j - 1]; + seq[seq_len - j - 1]; key->flags |= KEY_FLAG_CIPHER_SCHEME; } } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index c5a31835be0e..df430a618764 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -18,7 +18,6 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 -#define MAX_PN_LEN 16 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -116,7 +115,7 @@ struct ieee80211_key { } gcmp; struct { /* generic cipher scheme */ - u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN]; + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_MAX_PN_LEN]; } gen; } u; diff --git a/net/mac80211/led.c b/net/mac80211/led.c index e2b836446af3..38f05565eaac 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -12,96 +12,175 @@ #include <linux/export.h> #include "led.h" -#define MAC80211_BLINK_DELAY 50 /* ms */ - -void ieee80211_led_rx(struct ieee80211_local *local) -{ - unsigned long led_delay = MAC80211_BLINK_DELAY; - if (unlikely(!local->rx_led)) - return; - led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0); -} - -void ieee80211_led_tx(struct ieee80211_local *local) -{ - unsigned long led_delay = MAC80211_BLINK_DELAY; - if (unlikely(!local->tx_led)) - return; - led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0); -} - void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { - if (unlikely(!local->assoc_led)) + if (!atomic_read(&local->assoc_led_active)) return; if (associated) - led_trigger_event(local->assoc_led, LED_FULL); + led_trigger_event(&local->assoc_led, LED_FULL); else - led_trigger_event(local->assoc_led, LED_OFF); + led_trigger_event(&local->assoc_led, LED_OFF); } void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { - if (unlikely(!local->radio_led)) + if (!atomic_read(&local->radio_led_active)) return; if (enabled) - led_trigger_event(local->radio_led, LED_FULL); + led_trigger_event(&local->radio_led, LED_FULL); else - led_trigger_event(local->radio_led, LED_OFF); + led_trigger_event(&local->radio_led, LED_OFF); +} + +void ieee80211_alloc_led_names(struct ieee80211_local *local) +{ + local->rx_led.name = kasprintf(GFP_KERNEL, "%srx", + wiphy_name(local->hw.wiphy)); + local->tx_led.name = kasprintf(GFP_KERNEL, "%stx", + wiphy_name(local->hw.wiphy)); + local->assoc_led.name = kasprintf(GFP_KERNEL, "%sassoc", + wiphy_name(local->hw.wiphy)); + local->radio_led.name = kasprintf(GFP_KERNEL, "%sradio", + wiphy_name(local->hw.wiphy)); +} + +void ieee80211_free_led_names(struct ieee80211_local *local) +{ + kfree(local->rx_led.name); + kfree(local->tx_led.name); + kfree(local->assoc_led.name); + kfree(local->radio_led.name); +} + +static void ieee80211_tx_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tx_led); + + atomic_inc(&local->tx_led_active); +} + +static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tx_led); + + atomic_dec(&local->tx_led_active); +} + +static void ieee80211_rx_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + rx_led); + + atomic_inc(&local->rx_led_active); +} + +static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + rx_led); + + atomic_dec(&local->rx_led_active); +} + +static void ieee80211_assoc_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + assoc_led); + + atomic_inc(&local->assoc_led_active); +} + +static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + assoc_led); + + atomic_dec(&local->assoc_led_active); +} + +static void ieee80211_radio_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + radio_led); + + atomic_inc(&local->radio_led_active); +} + +static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + radio_led); + + atomic_dec(&local->radio_led_active); +} + +static void ieee80211_tpt_led_activate(struct led_classdev *led_cdev) +{ + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tpt_led); + + atomic_inc(&local->tpt_led_active); } -void ieee80211_led_names(struct ieee80211_local *local) +static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev) { - snprintf(local->rx_led_name, sizeof(local->rx_led_name), - "%srx", wiphy_name(local->hw.wiphy)); - snprintf(local->tx_led_name, sizeof(local->tx_led_name), - "%stx", wiphy_name(local->hw.wiphy)); - snprintf(local->assoc_led_name, sizeof(local->assoc_led_name), - "%sassoc", wiphy_name(local->hw.wiphy)); - snprintf(local->radio_led_name, sizeof(local->radio_led_name), - "%sradio", wiphy_name(local->hw.wiphy)); + struct ieee80211_local *local = container_of(led_cdev->trigger, + struct ieee80211_local, + tpt_led); + + atomic_dec(&local->tpt_led_active); } void ieee80211_led_init(struct ieee80211_local *local) { - local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->rx_led) { - local->rx_led->name = local->rx_led_name; - if (led_trigger_register(local->rx_led)) { - kfree(local->rx_led); - local->rx_led = NULL; - } + atomic_set(&local->rx_led_active, 0); + local->rx_led.activate = ieee80211_rx_led_activate; + local->rx_led.deactivate = ieee80211_rx_led_deactivate; + if (local->rx_led.name && led_trigger_register(&local->rx_led)) { + kfree(local->rx_led.name); + local->rx_led.name = NULL; } - local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->tx_led) { - local->tx_led->name = local->tx_led_name; - if (led_trigger_register(local->tx_led)) { - kfree(local->tx_led); - local->tx_led = NULL; - } + atomic_set(&local->tx_led_active, 0); + local->tx_led.activate = ieee80211_tx_led_activate; + local->tx_led.deactivate = ieee80211_tx_led_deactivate; + if (local->tx_led.name && led_trigger_register(&local->tx_led)) { + kfree(local->tx_led.name); + local->tx_led.name = NULL; } - local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->assoc_led) { - local->assoc_led->name = local->assoc_led_name; - if (led_trigger_register(local->assoc_led)) { - kfree(local->assoc_led); - local->assoc_led = NULL; - } + atomic_set(&local->assoc_led_active, 0); + local->assoc_led.activate = ieee80211_assoc_led_activate; + local->assoc_led.deactivate = ieee80211_assoc_led_deactivate; + if (local->assoc_led.name && led_trigger_register(&local->assoc_led)) { + kfree(local->assoc_led.name); + local->assoc_led.name = NULL; } - local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL); - if (local->radio_led) { - local->radio_led->name = local->radio_led_name; - if (led_trigger_register(local->radio_led)) { - kfree(local->radio_led); - local->radio_led = NULL; - } + atomic_set(&local->radio_led_active, 0); + local->radio_led.activate = ieee80211_radio_led_activate; + local->radio_led.deactivate = ieee80211_radio_led_deactivate; + if (local->radio_led.name && led_trigger_register(&local->radio_led)) { + kfree(local->radio_led.name); + local->radio_led.name = NULL; } + atomic_set(&local->tpt_led_active, 0); if (local->tpt_led_trigger) { - if (led_trigger_register(&local->tpt_led_trigger->trig)) { + local->tpt_led.activate = ieee80211_tpt_led_activate; + local->tpt_led.deactivate = ieee80211_tpt_led_deactivate; + if (led_trigger_register(&local->tpt_led)) { kfree(local->tpt_led_trigger); local->tpt_led_trigger = NULL; } @@ -110,58 +189,50 @@ void ieee80211_led_init(struct ieee80211_local *local) void ieee80211_led_exit(struct ieee80211_local *local) { - if (local->radio_led) { - led_trigger_unregister(local->radio_led); - kfree(local->radio_led); - } - if (local->assoc_led) { - led_trigger_unregister(local->assoc_led); - kfree(local->assoc_led); - } - if (local->tx_led) { - led_trigger_unregister(local->tx_led); - kfree(local->tx_led); - } - if (local->rx_led) { - led_trigger_unregister(local->rx_led); - kfree(local->rx_led); - } + if (local->radio_led.name) + led_trigger_unregister(&local->radio_led); + if (local->assoc_led.name) + led_trigger_unregister(&local->assoc_led); + if (local->tx_led.name) + led_trigger_unregister(&local->tx_led); + if (local->rx_led.name) + led_trigger_unregister(&local->rx_led); if (local->tpt_led_trigger) { - led_trigger_unregister(&local->tpt_led_trigger->trig); + led_trigger_unregister(&local->tpt_led); kfree(local->tpt_led_trigger); } } -char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->radio_led_name; + return local->radio_led.name; } EXPORT_SYMBOL(__ieee80211_get_radio_led_name); -char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->assoc_led_name; + return local->assoc_led.name; } EXPORT_SYMBOL(__ieee80211_get_assoc_led_name); -char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->tx_led_name; + return local->tx_led.name; } EXPORT_SYMBOL(__ieee80211_get_tx_led_name); -char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) +const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); - return local->rx_led_name; + return local->rx_led.name; } EXPORT_SYMBOL(__ieee80211_get_rx_led_name); @@ -211,10 +282,11 @@ static void tpt_trig_timer(unsigned long data) read_unlock(&tpt_trig->trig.leddev_list_lock); } -char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, - unsigned int flags, - const struct ieee80211_tpt_blink *blink_table, - unsigned int blink_table_len) +const char * +__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + unsigned int flags, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len) { struct ieee80211_local *local = hw_to_local(hw); struct tpt_led_trigger *tpt_trig; @@ -229,7 +301,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, snprintf(tpt_trig->name, sizeof(tpt_trig->name), "%stpt", wiphy_name(local->hw.wiphy)); - tpt_trig->trig.name = tpt_trig->name; + local->tpt_led.name = tpt_trig->name; tpt_trig->blink_table = blink_table; tpt_trig->blink_table_len = blink_table_len; diff --git a/net/mac80211/led.h b/net/mac80211/led.h index 89f4344f13b9..a7893a1ac98b 100644 --- a/net/mac80211/led.h +++ b/net/mac80211/led.h @@ -11,25 +11,42 @@ #include <linux/leds.h> #include "ieee80211_i.h" +#define MAC80211_BLINK_DELAY 50 /* ms */ + +static inline void ieee80211_led_rx(struct ieee80211_local *local) +{ +#ifdef CONFIG_MAC80211_LEDS + unsigned long led_delay = MAC80211_BLINK_DELAY; + + if (!atomic_read(&local->rx_led_active)) + return; + led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0); +#endif +} + +static inline void ieee80211_led_tx(struct ieee80211_local *local) +{ +#ifdef CONFIG_MAC80211_LEDS + unsigned long led_delay = MAC80211_BLINK_DELAY; + + if (!atomic_read(&local->tx_led_active)) + return; + led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0); +#endif +} + #ifdef CONFIG_MAC80211_LEDS -void ieee80211_led_rx(struct ieee80211_local *local); -void ieee80211_led_tx(struct ieee80211_local *local); void ieee80211_led_assoc(struct ieee80211_local *local, bool associated); void ieee80211_led_radio(struct ieee80211_local *local, bool enabled); -void ieee80211_led_names(struct ieee80211_local *local); +void ieee80211_alloc_led_names(struct ieee80211_local *local); +void ieee80211_free_led_names(struct ieee80211_local *local); void ieee80211_led_init(struct ieee80211_local *local); void ieee80211_led_exit(struct ieee80211_local *local); void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, unsigned int types_on, unsigned int types_off); #else -static inline void ieee80211_led_rx(struct ieee80211_local *local) -{ -} -static inline void ieee80211_led_tx(struct ieee80211_local *local) -{ -} static inline void ieee80211_led_assoc(struct ieee80211_local *local, bool associated) { @@ -38,7 +55,10 @@ static inline void ieee80211_led_radio(struct ieee80211_local *local, bool enabled) { } -static inline void ieee80211_led_names(struct ieee80211_local *local) +static inline void ieee80211_alloc_led_names(struct ieee80211_local *local) +{ +} +static inline void ieee80211_free_led_names(struct ieee80211_local *local) { } static inline void ieee80211_led_init(struct ieee80211_local *local) @@ -58,7 +78,7 @@ static inline void ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes) { #ifdef CONFIG_MAC80211_LEDS - if (local->tpt_led_trigger && ieee80211_is_data(fc)) + if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->tx_bytes += bytes; #endif } @@ -67,7 +87,7 @@ static inline void ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes) { #ifdef CONFIG_MAC80211_LEDS - if (local->tpt_led_trigger && ieee80211_is_data(fc)) + if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active)) local->tpt_led_trigger->rx_bytes += bytes; #endif } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index df3051d96aff..3c956c5f99b2 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -41,9 +41,6 @@ void ieee80211_configure_filter(struct ieee80211_local *local) unsigned int changed_flags; unsigned int new_flags = 0; - if (atomic_read(&local->iff_promiscs)) - new_flags |= FIF_PROMISC_IN_BSS; - if (atomic_read(&local->iff_allmultis)) new_flags |= FIF_ALLMULTI; @@ -646,7 +643,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); - ieee80211_led_names(local); + ieee80211_alloc_led_names(local); ieee80211_roc_setup(local); @@ -771,8 +768,11 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local) suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256; } - for (r = 0; r < local->hw.n_cipher_schemes; r++) + for (r = 0; r < local->hw.n_cipher_schemes; r++) { suites[w++] = cs[r].cipher; + if (WARN_ON(cs[r].pn_len > IEEE80211_MAX_PN_LEN)) + return -EINVAL; + } } local->hw.wiphy->cipher_suites = suites; @@ -840,7 +840,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* Only HW csum features are currently compatible with mac80211 */ feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_CSUM; + NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | + NETIF_F_GSO_SOFTWARE; if (WARN_ON(hw->netdev_features & ~feature_whitelist)) return -EINVAL; @@ -1209,6 +1210,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) sta_info_stop(local); + ieee80211_free_led_names(local); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 60d737f144e3..ac843fc88745 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -72,10 +72,11 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata, * * @sta: mesh peer link to restart * - * Locking: this function must be called holding sta->lock + * Locking: this function must be called holding sta->plink_lock */ static inline void mesh_plink_fsm_restart(struct sta_info *sta) { + lockdep_assert_held(&sta->plink_lock); sta->plink_state = NL80211_PLINK_LISTEN; sta->llid = sta->plid = sta->reason = 0; sta->plink_retries = 0; @@ -213,13 +214,15 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) * All mesh paths with this peer as next hop will be flushed * Returns beacon changed flag if the beacon content changed. * - * Locking: the caller must hold sta->lock + * Locking: the caller must hold sta->plink_lock */ static u32 __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed = 0; + lockdep_assert_held(&sta->plink_lock); + if (sta->plink_state == NL80211_PLINK_ESTAB) changed = mesh_plink_dec_estab_count(sdata); sta->plink_state = NL80211_PLINK_BLOCKED; @@ -244,13 +247,13 @@ u32 mesh_plink_deactivate(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; u32 changed; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); changed = __mesh_plink_deactivate(sta); sta->reason = WLAN_REASON_MESH_PEER_CANCELED; mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, sta->sta.addr, sta->llid, sta->plid, sta->reason); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return changed; } @@ -387,7 +390,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[band]; rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates); - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); sta->last_rx = jiffies; /* rates and capabilities don't change during peering */ @@ -419,7 +422,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, else rate_control_rate_update(local, sband, sta, changed); out: - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); } static struct sta_info * @@ -552,7 +555,7 @@ static void mesh_plink_timer(unsigned long data) if (sta->sdata->local->quiescing) return; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); /* If a timer fires just before a state transition on another CPU, * we may have already extended the timeout and changed state by the @@ -563,7 +566,7 @@ static void mesh_plink_timer(unsigned long data) mpl_dbg(sta->sdata, "Ignoring timer for %pM in state %s (timer adjusted)", sta->sta.addr, mplstates[sta->plink_state]); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return; } @@ -573,7 +576,7 @@ static void mesh_plink_timer(unsigned long data) mpl_dbg(sta->sdata, "Ignoring timer for %pM in state %s (timer deleted)", sta->sta.addr, mplstates[sta->plink_state]); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return; } @@ -619,7 +622,7 @@ static void mesh_plink_timer(unsigned long data) default: break; } - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); if (action) mesh_plink_frame_tx(sdata, action, sta->sta.addr, sta->llid, sta->plid, reason); @@ -674,16 +677,16 @@ u32 mesh_plink_open(struct sta_info *sta) if (!test_sta_flag(sta, WLAN_STA_AUTH)) return 0; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); sta->llid = mesh_get_new_llid(sdata); if (sta->plink_state != NL80211_PLINK_LISTEN && sta->plink_state != NL80211_PLINK_BLOCKED) { - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return 0; } sta->plink_state = NL80211_PLINK_OPN_SNT; mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout); - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); mpl_dbg(sdata, "Mesh plink: starting establishment with %pM\n", sta->sta.addr); @@ -700,10 +703,10 @@ u32 mesh_plink_block(struct sta_info *sta) { u32 changed; - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); changed = __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_BLOCKED; - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); return changed; } @@ -758,7 +761,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr, mplstates[sta->plink_state], mplevents[event]); - spin_lock_bh(&sta->lock); + spin_lock_bh(&sta->plink_lock); switch (sta->plink_state) { case NL80211_PLINK_LISTEN: switch (event) { @@ -872,7 +875,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata, */ break; } - spin_unlock_bh(&sta->lock); + spin_unlock_bh(&sta->plink_lock); if (action) { mesh_plink_frame_tx(sdata, action, sta->sta.addr, sta->llid, sta->plid, sta->reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 26053bf2faa8..3294666f599c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4307,15 +4307,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, - struct cfg80211_bss *cbss, bool assoc) + struct cfg80211_bss *cbss, bool assoc, + bool override) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)cbss->priv; struct sta_info *new_sta = NULL; struct ieee80211_supported_band *sband; - struct ieee80211_sta_ht_cap sta_ht_cap; - bool have_sta = false, is_override = false; + bool have_sta = false; int err; sband = local->hw.wiphy->bands[cbss->channel->band]; @@ -4335,14 +4335,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return -ENOMEM; } - memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); - ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); - - is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) != - (sband->ht_cap.cap & - IEEE80211_HT_CAP_SUP_WIDTH_20_40); - - if (new_sta || is_override) { + if (new_sta || override) { err = ieee80211_prep_channel(sdata, cbss); if (err) { if (new_sta) @@ -4552,7 +4545,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); - err = ieee80211_prep_connection(sdata, req->bss, false); + err = ieee80211_prep_connection(sdata, req->bss, false, false); if (err) goto err_clear; @@ -4624,6 +4617,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; const u8 *ssidie, *ht_ie, *vht_ie; int i, err; + bool override = false; assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL); if (!assoc_data) @@ -4728,14 +4722,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } } - if (req->flags & ASSOC_REQ_DISABLE_HT) { - ifmgd->flags |= IEEE80211_STA_DISABLE_HT; - ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - } - - if (req->flags & ASSOC_REQ_DISABLE_VHT) - ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - /* Also disable HT if we don't support it or the AP doesn't use WMM */ sband = local->hw.wiphy->bands[req->bss->channel->band]; if (!sband->ht_cap.ht_supported || @@ -4847,7 +4833,36 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->dtim_period = 0; ifmgd->have_beacon = false; - err = ieee80211_prep_connection(sdata, req->bss, true); + /* override HT/VHT configuration only if the AP and we support it */ + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { + struct ieee80211_sta_ht_cap sta_ht_cap; + + if (req->flags & ASSOC_REQ_DISABLE_HT) + override = true; + + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + + /* check for 40 MHz disable override */ + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ) && + sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && + !(sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) + override = true; + + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && + req->flags & ASSOC_REQ_DISABLE_VHT) + override = true; + } + + if (req->flags & ASSOC_REQ_DISABLE_HT) { + ifmgd->flags |= IEEE80211_STA_DISABLE_HT; + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + } + + if (req->flags & ASSOC_REQ_DISABLE_VHT) + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + + err = ieee80211_prep_connection(sdata, req->bss, true, override); if (err) goto err_clear; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index d53355b011f5..de69adf24f53 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -683,7 +683,13 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) return; - ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); + if (ista) { + spin_lock_bh(&sta->rate_ctrl_lock); + ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); + spin_unlock_bh(&sta->rate_ctrl_lock); + } else { + ref->ops->get_rate(ref->priv, NULL, NULL, txrc); + } if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE) return; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 38652f09feaf..25c9be5dd7fd 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -42,10 +42,12 @@ static inline void rate_control_tx_status(struct ieee80211_local *local, if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) return; + spin_lock_bh(&sta->rate_ctrl_lock); if (ref->ops->tx_status) ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); else ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); + spin_unlock_bh(&sta->rate_ctrl_lock); } static inline void @@ -64,7 +66,9 @@ rate_control_tx_status_noskb(struct ieee80211_local *local, if (WARN_ON_ONCE(!ref->ops->tx_status_noskb)) return; + spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); + spin_unlock_bh(&sta->rate_ctrl_lock); } static inline void rate_control_rate_init(struct sta_info *sta) @@ -91,8 +95,10 @@ static inline void rate_control_rate_init(struct sta_info *sta) sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band]; + spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista, priv_sta); + spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } @@ -115,18 +121,20 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, return; } + spin_lock_bh(&sta->rate_ctrl_lock); ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def, ista, priv_sta, changed); + spin_unlock_bh(&sta->rate_ctrl_lock); rcu_read_unlock(); } drv_sta_rc_update(local, sta->sdata, &sta->sta, changed); } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, - struct ieee80211_sta *sta, - gfp_t gfp) + struct sta_info *sta, gfp_t gfp) { - return ref->ops->alloc_sta(ref->priv, sta, gfp); + spin_lock_init(&sta->rate_ctrl_lock); + return ref->ops->alloc_sta(ref->priv, &sta->sta, gfp); } static inline void rate_control_free_sta(struct sta_info *sta) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 260eed45b6d2..aa35977a9c4d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -32,6 +32,16 @@ #include "wme.h" #include "rate.h" +static inline void ieee80211_rx_stats(struct net_device *dev, u32 len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += len; + u64_stats_update_end(&tstats->syncp); +} + /* * monitor mode reception * @@ -529,8 +539,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, } prev_dev = sdata->dev; - sdata->dev->stats.rx_packets++; - sdata->dev->stats.rx_bytes += skb->len; + ieee80211_rx_stats(sdata->dev, skb->len); } if (prev_dev) { @@ -981,7 +990,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_local *local = rx->local; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct sta_info *sta = rx->sta; struct tid_ampdu_rx *tid_agg_rx; u16 sc; @@ -1016,10 +1024,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL) goto dont_reorder; - /* not actually part of this BA session */ - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - goto dont_reorder; - /* new, potentially un-ordered, ampdu frame - process it */ /* reset session timer */ @@ -1073,10 +1077,8 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { - if (status->rx_flags & IEEE80211_RX_RA_MATCH) { - rx->local->dot11FrameDuplicateCount++; - rx->sta->num_duplicates++; - } + I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); + rx->sta->num_duplicates++; return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; @@ -1200,6 +1202,8 @@ static void sta_ps_start(struct sta_info *sta) ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", sta->sta.addr, sta->sta.aid); + ieee80211_clear_fast_xmit(sta); + if (!sta->sta.txq[0]) return; @@ -1265,7 +1269,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); int tid, ac; - if (!rx->sta || !(status->rx_flags & IEEE80211_RX_RA_MATCH)) + if (!rx->sta) return RX_CONTINUE; if (sdata->vif.type != NL80211_IFTYPE_AP && @@ -1367,11 +1371,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { - u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, - NL80211_IFTYPE_OCB); - /* OCB uses wild-card BSSID */ - if (is_broadcast_ether_addr(bssid)) - sta->last_rx = jiffies; + sta->last_rx = jiffies; } else if (!is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to @@ -1386,9 +1386,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) } } - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_CONTINUE; - if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_notify(rx->sdata, hdr); @@ -1517,13 +1514,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * possible. */ - /* - * No point in finding a key and decrypting if the frame is neither - * addressed to us nor a multicast frame. - */ - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_CONTINUE; - /* start without a key */ rx->key = NULL; fc = hdr->frame_control; @@ -1795,7 +1785,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) frag = sc & IEEE80211_SCTL_FRAG; if (is_multicast_ether_addr(hdr->addr1)) { - rx->local->dot11MulticastReceivedFrameCount++; + I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); goto out_no_led; } @@ -1878,7 +1868,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rx->skb = __skb_dequeue(&entry->skb_list); if (skb_tailroom(rx->skb) < entry->extra_len) { - I802_DEBUG_INC(rx->local->rx_expand_skb_head2); + I802_DEBUG_INC(rx->local->rx_expand_skb_head_defrag); if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, GFP_ATOMIC))) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); @@ -2054,18 +2044,15 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) struct sk_buff *skb, *xmit_skb; struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; struct sta_info *dsta; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - - dev->stats.rx_packets++; - dev->stats.rx_bytes += rx->skb->len; skb = rx->skb; xmit_skb = NULL; + ieee80211_rx_stats(dev, skb->len); + if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && - (status->rx_flags & IEEE80211_RX_RA_MATCH) && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest)) { /* @@ -2206,7 +2193,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb, *fwd_skb; struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u16 q, hdrlen; @@ -2237,8 +2223,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr)) return RX_DROP_MONITOR; - if (!ieee80211_is_data(hdr->frame_control) || - !(status->rx_flags & IEEE80211_RX_RA_MATCH)) + if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; if (!mesh_hdr->ttl) @@ -2329,11 +2314,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames); ieee80211_add_pending_skb(local, fwd_skb); out: - if (is_multicast_ether_addr(hdr->addr1) || - sdata->dev->flags & IFF_PROMISC) + if (is_multicast_ether_addr(hdr->addr1)) return RX_CONTINUE; - else - return RX_DROP_MONITOR; + return RX_DROP_MONITOR; } #endif @@ -2444,6 +2427,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) struct { __le16 control, start_seq_num; } __packed bar_data; + struct ieee80211_event event = { + .type = BAR_RX_EVENT, + }; if (!rx->sta) return RX_DROP_MONITOR; @@ -2459,6 +2445,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_DROP_MONITOR; start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; + event.u.ba.tid = tid; + event.u.ba.ssn = start_seq_num; + event.u.ba.sta = &rx->sta->sta; /* reset session timer */ if (tid_agg_rx->timeout) @@ -2471,6 +2460,8 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) start_seq_num, frames); spin_unlock(&tid_agg_rx->reorder_lock); + drv_event_callback(rx->local, rx->sdata, &event); + kfree_skb(skb); return RX_QUEUED; } @@ -2560,9 +2551,6 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) rx->flags |= IEEE80211_RX_BEACON_REPORTED; } - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_DROP_MONITOR; - if (ieee80211_drop_unencrypted_mgmt(rx)) return RX_DROP_UNUSABLE; @@ -2590,9 +2578,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) return RX_DROP_UNUSABLE; - if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) - return RX_DROP_UNUSABLE; - switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: /* reject HT action frames from stations not supporting HT */ @@ -3076,8 +3061,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, } prev_dev = sdata->dev; - sdata->dev->stats.rx_packets++; - sdata->dev->stats.rx_bytes += skb->len; + ieee80211_rx_stats(sdata->dev, skb->len); } if (prev_dev) { @@ -3245,16 +3229,25 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames); spin_unlock(&tid_agg_rx->reorder_lock); + if (!skb_queue_empty(&frames)) { + struct ieee80211_event event = { + .type = BA_FRAME_TIMEOUT, + .u.ba.tid = tid, + .u.ba.sta = &sta->sta, + }; + drv_event_callback(rx.local, rx.sdata, &event); + } + ieee80211_rx_handlers(&rx, &frames); } /* main receive path */ -static bool prepare_for_handlers(struct ieee80211_rx_data *rx, - struct ieee80211_hdr *hdr) +static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); int multicast = is_multicast_ether_addr(hdr->addr1); @@ -3263,30 +3256,23 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_STATION: if (!bssid && !sdata->u.mgd.use_4addr) return false; - if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC) || - sdata->u.mgd.use_4addr) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } - break; + if (multicast) + return true; + return ether_addr_equal(sdata->vif.addr, hdr->addr1); case NL80211_IFTYPE_ADHOC: if (!bssid) return false; if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) return false; - if (ieee80211_is_beacon(hdr->frame_control)) { + if (ieee80211_is_beacon(hdr->frame_control)) return true; - } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { + if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) return false; - } else if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!rx->sta) { + if (!multicast && + !ether_addr_equal(sdata->vif.addr, hdr->addr1)) + return false; + if (!rx->sta) { int rate_idx; if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) rate_idx = 0; /* TODO: HT/VHT rates */ @@ -3295,25 +3281,18 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, ieee80211_ibss_rx_no_sta(sdata, bssid, hdr->addr2, BIT(rate_idx)); } - break; + return true; case NL80211_IFTYPE_OCB: if (!bssid) return false; - if (ieee80211_is_beacon(hdr->frame_control)) { + if (ieee80211_is_beacon(hdr->frame_control)) return false; - } else if (!is_broadcast_ether_addr(bssid)) { - ocb_dbg(sdata, "BSSID mismatch in OCB mode!\n"); + if (!is_broadcast_ether_addr(bssid)) return false; - } else if (!multicast && - !ether_addr_equal(sdata->dev->dev_addr, - hdr->addr1)) { - /* if we are in promisc mode we also accept - * packets not destined for us - */ - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - rx->flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!rx->sta) { + if (!multicast && + !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1)) + return false; + if (!rx->sta) { int rate_idx; if (status->flag & RX_FLAG_HT) rate_idx = 0; /* TODO: HT rates */ @@ -3322,22 +3301,17 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, ieee80211_ocb_rx_no_sta(sdata, bssid, hdr->addr2, BIT(rate_idx)); } - break; + return true; case NL80211_IFTYPE_MESH_POINT: - if (!multicast && - !ether_addr_equal(sdata->vif.addr, hdr->addr1)) { - if (!(sdata->dev->flags & IFF_PROMISC)) - return false; - - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } - break; + if (multicast) + return true; + return ether_addr_equal(sdata->vif.addr, hdr->addr1); case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_AP: - if (!bssid) { - if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) - return false; - } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { + if (!bssid) + return ether_addr_equal(sdata->vif.addr, hdr->addr1); + + if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { /* * Accept public action frames even when the * BSSID doesn't match, this is used for P2P @@ -3349,10 +3323,10 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, return false; if (ieee80211_is_public_action(hdr, skb->len)) return true; - if (!ieee80211_is_beacon(hdr->frame_control)) - return false; - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!ieee80211_has_tods(hdr->frame_control)) { + return ieee80211_is_beacon(hdr->frame_control); + } + + if (!ieee80211_has_tods(hdr->frame_control)) { /* ignore data frames to TDLS-peers */ if (ieee80211_is_data(hdr->frame_control)) return false; @@ -3361,30 +3335,22 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx, !ether_addr_equal(bssid, hdr->addr1)) return false; } - break; + return true; case NL80211_IFTYPE_WDS: if (bssid || !ieee80211_is_data(hdr->frame_control)) return false; - if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2)) - return false; - break; + return ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2); case NL80211_IFTYPE_P2P_DEVICE: - if (!ieee80211_is_public_action(hdr, skb->len) && - !ieee80211_is_probe_req(hdr->frame_control) && - !ieee80211_is_probe_resp(hdr->frame_control) && - !ieee80211_is_beacon(hdr->frame_control)) - return false; - if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) && - !multicast) - status->rx_flags &= ~IEEE80211_RX_RA_MATCH; - break; + return ieee80211_is_public_action(hdr, skb->len) || + ieee80211_is_probe_req(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control) || + ieee80211_is_beacon(hdr->frame_control); default: - /* should never get here */ - WARN_ON_ONCE(1); break; } - return true; + WARN_ON_ONCE(1); + return false; } /* @@ -3398,13 +3364,10 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, { struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - struct ieee80211_hdr *hdr = (void *)skb->data; rx->skb = skb; - status->rx_flags |= IEEE80211_RX_RA_MATCH; - if (!prepare_for_handlers(rx, hdr)) + if (!ieee80211_accept_frame(rx)) return false; if (!consume) { @@ -3447,7 +3410,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, rx.local = local; if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) - local->dot11ReceivedFragmentCount++; + I802_DEBUG_INC(local->dot11ReceivedFragmentCount); if (ieee80211_is_mgmt(fc)) { /* drop frame if too short for header */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 2880f2ae99ab..ce0c1662de42 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -71,6 +71,7 @@ static const struct rhashtable_params sta_rht_params = { .key_offset = offsetof(struct sta_info, sta.addr), .key_len = ETH_ALEN, .hashfn = sta_addr_hash, + .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; /* Caller must hold local->sta_mtx */ @@ -286,7 +287,7 @@ static int sta_prepare_rate_control(struct ieee80211_local *local, sta->rate_ctrl = local->rate_ctrl; sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - &sta->sta, gfp); + sta, gfp); if (!sta->rate_ctrl_priv) return -ENOMEM; @@ -312,6 +313,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); #ifdef CONFIG_MAC80211_MESH + spin_lock_init(&sta->plink_lock); if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.user_mpm) init_timer(&sta->plink_timer); @@ -1217,6 +1219,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) ps_dbg(sdata, "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", sta->sta.addr, sta->sta.aid, filtered, buffered); + + ieee80211_check_fast_xmit(sta); } static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, @@ -1615,6 +1619,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, if (block) { set_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_clear_fast_xmit(sta); return; } @@ -1632,6 +1637,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, ieee80211_queue_work(hw, &sta->drv_deliver_wk); } else { clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + ieee80211_check_fast_xmit(sta); } } EXPORT_SYMBOL(ieee80211_sta_block_awake); @@ -1736,6 +1742,7 @@ int sta_info_move_state(struct sta_info *sta, !sta->sdata->u.vlan.sta)) atomic_dec(&sta->sdata->bss->num_mcast_sta); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_clear_fast_xmit(sta); } break; case IEEE80211_STA_AUTHORIZED: @@ -1745,6 +1752,7 @@ int sta_info_move_state(struct sta_info *sta, !sta->sdata->u.vlan.sta)) atomic_inc(&sta->sdata->bss->num_mcast_sta); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + ieee80211_check_fast_xmit(sta); } break; default: diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5c164fb3f6c5..9bd1e97876bd 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -241,6 +241,34 @@ struct sta_ampdu_mlme { /* Value to indicate no TID reservation */ #define IEEE80211_TID_UNRESERVED 0xff +#define IEEE80211_FAST_XMIT_MAX_IV 18 + +/** + * struct ieee80211_fast_tx - TX fastpath information + * @key: key to use for hw crypto + * @hdr: the 802.11 header to put with the frame + * @hdr_len: actual 802.11 header length + * @sa_offs: offset of the SA + * @da_offs: offset of the DA + * @pn_offs: offset where to put PN for crypto (or 0 if not needed) + * @band: band this will be transmitted on, for tx_info + * @rcu_head: RCU head to free this struct + * + * This struct is small enough so that the common case (maximum crypto + * header length of 8 like for CCMP/GCMP) fits into a single 64-byte + * cache line. + */ +struct ieee80211_fast_tx { + struct ieee80211_key *key; + u8 hdr_len; + u8 sa_offs, da_offs, pn_offs; + u8 band; + u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + + sizeof(rfc1042_header)]; + + struct rcu_head rcu_head; +}; + /** * struct sta_info - STA information * @@ -257,6 +285,8 @@ struct sta_ampdu_mlme { * @gtk: group keys negotiated with this station, if any * @gtk_idx: last installed group key index * @rate_ctrl: rate control algorithm reference + * @rate_ctrl_lock: spinlock used to protect rate control data + * (data inside the algorithm, so serializes calls there) * @rate_ctrl_priv: rate control private per-STA pointer * @last_tx_rate: rate used for last transmit, to report to userspace as * "the" transmit rate @@ -295,10 +325,10 @@ struct sta_ampdu_mlme { * @fail_avg: moving percentage of failed MSDUs * @tx_packets: number of RX/TX MSDUs * @tx_bytes: number of bytes transmitted to this STA - * @tx_fragments: number of transmitted MPDUs * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers + * @plink_lock: serialize access to plink fields * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state @@ -338,6 +368,7 @@ struct sta_ampdu_mlme { * using IEEE80211_NUM_TID entry for non-QoS frames * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID * entry for non-QoS frames + * @fast_tx: TX fastpath information */ struct sta_info { /* General information, mostly static */ @@ -352,8 +383,11 @@ struct sta_info { u8 ptk_idx; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; + spinlock_t rate_ctrl_lock; spinlock_t lock; + struct ieee80211_fast_tx __rcu *fast_tx; + struct work_struct drv_deliver_wk; u16 listen_interval; @@ -400,7 +434,6 @@ struct sta_info { unsigned int fail_avg; /* Updated from TX path only, no locking requirements */ - u32 tx_fragments; u64 tx_packets[IEEE80211_NUM_ACS]; u64 tx_bytes[IEEE80211_NUM_ACS]; struct ieee80211_tx_rate last_tx_rate; @@ -422,9 +455,10 @@ struct sta_info { #ifdef CONFIG_MAC80211_MESH /* - * Mesh peer link attributes + * Mesh peer link attributes, protected by plink_lock. * TODO: move to a sub-structure that is referenced with pointer? */ + spinlock_t plink_lock; u16 llid; u16 plid; u16 reason; @@ -432,6 +466,7 @@ struct sta_info { enum nl80211_plink_state plink_state; u32 plink_timeout; struct timer_list plink_timer; + s64 t_offset; s64 t_offset_setpoint; /* mesh power save */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 005fdbe39a8b..461594966b65 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -631,15 +631,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, } if (acked || noack_success) { - local->dot11TransmittedFrameCount++; - if (!pubsta) - local->dot11MulticastTransmittedFrameCount++; - if (retry_count > 0) - local->dot11RetryCount++; - if (retry_count > 1) - local->dot11MultipleRetryCount++; + I802_DEBUG_INC(local->dot11TransmittedFrameCount); + if (!pubsta) + I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount); + if (retry_count > 0) + I802_DEBUG_INC(local->dot11RetryCount); + if (retry_count > 1) + I802_DEBUG_INC(local->dot11MultipleRetryCount); } else { - local->dot11FailedCount++; + I802_DEBUG_INC(local->dot11FailedCount); } } EXPORT_SYMBOL(ieee80211_tx_status_noskb); @@ -802,13 +802,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if ((info->flags & IEEE80211_TX_STAT_ACK) || (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) { if (ieee80211_is_first_frag(hdr->seq_ctrl)) { - local->dot11TransmittedFrameCount++; + I802_DEBUG_INC(local->dot11TransmittedFrameCount); if (is_multicast_ether_addr(ieee80211_get_DA(hdr))) - local->dot11MulticastTransmittedFrameCount++; + I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount); if (retry_count > 0) - local->dot11RetryCount++; + I802_DEBUG_INC(local->dot11RetryCount); if (retry_count > 1) - local->dot11MultipleRetryCount++; + I802_DEBUG_INC(local->dot11MultipleRetryCount); } /* This counter shall be incremented for an acknowledged MPDU @@ -818,10 +818,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (!is_multicast_ether_addr(hdr->addr1) || ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) - local->dot11TransmittedFragmentCount++; + I802_DEBUG_INC(local->dot11TransmittedFragmentCount); } else { if (ieee80211_is_first_frag(hdr->seq_ctrl)) - local->dot11FailedCount++; + I802_DEBUG_INC(local->dot11FailedCount); } if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 4c2e7690226a..6f14591d8ca9 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -69,6 +69,17 @@ #define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \ __entry->rx_chains_static, __entry->rx_chains_dynamic +#define KEY_ENTRY __field(u32, cipher) \ + __field(u8, hw_key_idx) \ + __field(u8, flags) \ + __field(s8, keyidx) +#define KEY_ASSIGN(k) __entry->cipher = (k)->cipher; \ + __entry->flags = (k)->flags; \ + __entry->keyidx = (k)->keyidx; \ + __entry->hw_key_idx = (k)->hw_key_idx; +#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d" +#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx + /* @@ -522,25 +533,19 @@ TRACE_EVENT(drv_set_key, LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(u32, cipher) - __field(u8, hw_key_idx) - __field(u8, flags) - __field(s8, keyidx) + KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->cipher = key->cipher; - __entry->flags = key->flags; - __entry->keyidx = key->keyidx; - __entry->hw_key_idx = key->hw_key_idx; + KEY_ASSIGN(key); ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT KEY_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, KEY_PR_ARG ) ); @@ -656,28 +661,25 @@ TRACE_EVENT(drv_get_stats, ) ); -TRACE_EVENT(drv_get_tkip_seq, +TRACE_EVENT(drv_get_key_seq, TP_PROTO(struct ieee80211_local *local, - u8 hw_key_idx, u32 *iv32, u16 *iv16), + struct ieee80211_key_conf *key), - TP_ARGS(local, hw_key_idx, iv32, iv16), + TP_ARGS(local, key), TP_STRUCT__entry( LOCAL_ENTRY - __field(u8, hw_key_idx) - __field(u32, iv32) - __field(u16, iv16) + KEY_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; - __entry->hw_key_idx = hw_key_idx; - __entry->iv32 = *iv32; - __entry->iv16 = *iv16; + KEY_ASSIGN(key); ), TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG + LOCAL_PR_FMT KEY_PR_FMT, + LOCAL_PR_ARG, KEY_PR_ARG ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 667111ee6a20..8df134213adf 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -37,6 +37,16 @@ /* misc utils */ +static inline void ieee80211_tx_stats(struct net_device *dev, u32 len) +{ + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_packets++; + tstats->tx_bytes += len; + u64_stats_update_end(&tstats->syncp); +} + static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct sk_buff *skb, int group_addr, int next_frag_len) @@ -987,7 +997,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) skb_queue_walk(&tx->skbs, skb) { ac = skb_get_queue_mapping(skb); - tx->sta->tx_fragments++; tx->sta->tx_bytes[ac] += skb->len; } if (ac >= 0) @@ -1600,7 +1609,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) || !skb_clone_writable(skb, ETH_HLEN) || - sdata->crypto_tx_tailroom_needed_cnt)) + (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@ -2387,12 +2396,460 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, return ERR_PTR(ret); } +/* + * fast-xmit overview + * + * The core idea of this fast-xmit is to remove per-packet checks by checking + * them out of band. ieee80211_check_fast_xmit() implements the out-of-band + * checks that are needed to get the sta->fast_tx pointer assigned, after which + * much less work can be done per packet. For example, fragmentation must be + * disabled or the fast_tx pointer will not be set. All the conditions are seen + * in the code here. + * + * Once assigned, the fast_tx data structure also caches the per-packet 802.11 + * header and other data to aid packet processing in ieee80211_xmit_fast(). + * + * The most difficult part of this is that when any of these assumptions + * change, an external trigger (i.e. a call to ieee80211_clear_fast_xmit(), + * ieee80211_check_fast_xmit() or friends) is required to reset the data, + * since the per-packet code no longer checks the conditions. This is reflected + * by the calls to these functions throughout the rest of the code, and must be + * maintained if any of the TX path checks change. + */ + +void ieee80211_check_fast_xmit(struct sta_info *sta) +{ + struct ieee80211_fast_tx build = {}, *fast_tx = NULL, *old; + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_hdr *hdr = (void *)build.hdr; + struct ieee80211_chanctx_conf *chanctx_conf; + __le16 fc; + + if (!(local->hw.flags & IEEE80211_HW_SUPPORT_FAST_XMIT)) + return; + + /* Locking here protects both the pointer itself, and against concurrent + * invocations winning data access races to, e.g., the key pointer that + * is used. + * Without it, the invocation of this function right after the key + * pointer changes wouldn't be sufficient, as another CPU could access + * the pointer, then stall, and then do the cache update after the CPU + * that invalidated the key. + * With the locking, such scenarios cannot happen as the check for the + * key and the fast-tx assignment are done atomically, so the CPU that + * modifies the key will either wait or other one will see the key + * cleared/changed already. + */ + spin_lock_bh(&sta->lock); + if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS && + !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) && + sdata->vif.type == NL80211_IFTYPE_STATION) + goto out; + + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + goto out; + + if (test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER) || + test_sta_flag(sta, WLAN_STA_PS_DELIVER)) + goto out; + + if (sdata->noack_map) + goto out; + + /* fast-xmit doesn't handle fragmentation at all */ + if (local->hw.wiphy->frag_threshold != (u32)-1 && + !local->ops->set_frag_threshold) + goto out; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + if (!chanctx_conf) { + rcu_read_unlock(); + goto out; + } + build.band = chanctx_conf->def.chan->band; + rcu_read_unlock(); + + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_ADHOC: + /* DA SA BSSID */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + memcpy(hdr->addr3, sdata->u.ibss.bssid, ETH_ALEN); + build.hdr_len = 24; + break; + case NL80211_IFTYPE_STATION: + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + /* DA SA BSSID */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN); + build.hdr_len = 24; + break; + } + + if (sdata->u.mgd.use_4addr) { + /* non-regular ethertype cannot use the fastpath */ + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr4); + build.hdr_len = 30; + break; + } + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + build.hdr_len = 24; + break; + case NL80211_IFTYPE_AP_VLAN: + if (sdata->wdev.use_4addr) { + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.da_offs = offsetof(struct ieee80211_hdr, addr3); + build.sa_offs = offsetof(struct ieee80211_hdr, addr4); + build.hdr_len = 30; + break; + } + /* fall through */ + case NL80211_IFTYPE_AP: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); + /* DA BSSID SA */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + build.sa_offs = offsetof(struct ieee80211_hdr, addr3); + build.hdr_len = 24; + break; + default: + /* not handled on fast-xmit */ + goto out; + } + + if (sta->sta.wme) { + build.hdr_len += 2; + fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); + } + + /* We store the key here so there's no point in using rcu_dereference() + * but that's fine because the code that changes the pointers will call + * this function after doing so. For a single CPU that would be enough, + * for multiple see the comment above. + */ + build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]); + if (!build.key) + build.key = rcu_access_pointer(sdata->default_unicast_key); + if (build.key) { + bool gen_iv, iv_spc, mmic; + + gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; + iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; + mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC; + + /* don't handle software crypto */ + if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + goto out; + + switch (build.key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + /* add fixed key ID */ + if (gen_iv) { + (build.hdr + build.hdr_len)[3] = + 0x20 | (build.key->conf.keyidx << 6); + build.pn_offs = build.hdr_len; + } + if (gen_iv || iv_spc) + build.hdr_len += IEEE80211_CCMP_HDR_LEN; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + /* add fixed key ID */ + if (gen_iv) { + (build.hdr + build.hdr_len)[3] = + 0x20 | (build.key->conf.keyidx << 6); + build.pn_offs = build.hdr_len; + } + if (gen_iv || iv_spc) + build.hdr_len += IEEE80211_GCMP_HDR_LEN; + break; + case WLAN_CIPHER_SUITE_TKIP: + /* cannot handle MMIC or IV generation in xmit-fast */ + if (mmic || gen_iv) + goto out; + if (iv_spc) + build.hdr_len += IEEE80211_TKIP_IV_LEN; + break; + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + /* cannot handle IV generation in fast-xmit */ + if (gen_iv) + goto out; + if (iv_spc) + build.hdr_len += IEEE80211_WEP_IV_LEN; + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_CMAC_256: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + WARN(1, + "management cipher suite 0x%x enabled for data\n", + build.key->conf.cipher); + goto out; + default: + /* we don't know how to generate IVs for this at all */ + if (WARN_ON(gen_iv)) + goto out; + /* pure hardware keys are OK, of course */ + if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME)) + break; + /* cipher scheme might require space allocation */ + if (iv_spc && + build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV) + goto out; + if (iv_spc) + build.hdr_len += build.key->conf.iv_len; + } + + fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + } + + hdr->frame_control = fc; + + memcpy(build.hdr + build.hdr_len, + rfc1042_header, sizeof(rfc1042_header)); + build.hdr_len += sizeof(rfc1042_header); + + fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC); + /* if the kmemdup fails, continue w/o fast_tx */ + if (!fast_tx) + goto out; + + out: + /* we might have raced against another call to this function */ + old = rcu_dereference_protected(sta->fast_tx, + lockdep_is_held(&sta->lock)); + rcu_assign_pointer(sta->fast_tx, fast_tx); + if (old) + kfree_rcu(old, rcu_head); + spin_unlock_bh(&sta->lock); +} + +void ieee80211_check_fast_xmit_all(struct ieee80211_local *local) +{ + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) + ieee80211_check_fast_xmit(sta); + rcu_read_unlock(); +} + +void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + rcu_read_lock(); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata && + (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) + continue; + ieee80211_check_fast_xmit(sta); + } + + rcu_read_unlock(); +} + +void ieee80211_clear_fast_xmit(struct sta_info *sta) +{ + struct ieee80211_fast_tx *fast_tx; + + spin_lock_bh(&sta->lock); + fast_tx = rcu_dereference_protected(sta->fast_tx, + lockdep_is_held(&sta->lock)); + RCU_INIT_POINTER(sta->fast_tx, NULL); + spin_unlock_bh(&sta->lock); + + if (fast_tx) + kfree_rcu(fast_tx, rcu_head); +} + +static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, + struct net_device *dev, struct sta_info *sta, + struct ieee80211_fast_tx *fast_tx, + struct sk_buff *skb) +{ + struct ieee80211_local *local = sdata->local; + u16 ethertype = (skb->data[12] << 8) | skb->data[13]; + int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2); + int hw_headroom = sdata->local->hw.extra_tx_headroom; + struct ethhdr eth; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)fast_tx->hdr; + struct ieee80211_tx_data tx; + ieee80211_tx_result r; + struct tid_ampdu_tx *tid_tx = NULL; + u8 tid = IEEE80211_NUM_TIDS; + + /* control port protocol needs a lot of special handling */ + if (cpu_to_be16(ethertype) == sdata->control_port_protocol) + return false; + + /* only RFC 1042 SNAP */ + if (ethertype < ETH_P_802_3_MIN) + return false; + + /* don't handle TX status request here either */ + if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) + return false; + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (tid_tx && + !test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) + return false; + } + + /* after this point (skb is modified) we cannot return false */ + + if (skb_shared(skb)) { + struct sk_buff *tmp_skb = skb; + + skb = skb_clone(skb, GFP_ATOMIC); + kfree_skb(tmp_skb); + + if (!skb) + return true; + } + + ieee80211_tx_stats(dev, skb->len + extra_head); + + /* will not be crypto-handled beyond what we do here, so use false + * as the may-encrypt argument for the resize to not account for + * more room than we already have in 'extra_head' + */ + if (unlikely(ieee80211_skb_resize(sdata, skb, + max_t(int, extra_head + hw_headroom - + skb_headroom(skb), 0), + false))) { + kfree_skb(skb); + return true; + } + + memcpy(ð, skb->data, ETH_HLEN - 2); + hdr = (void *)skb_push(skb, extra_head); + memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len); + memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN); + memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN); + + memset(info, 0, sizeof(*info)); + info->band = fast_tx->band; + info->control.vif = &sdata->vif; + info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | + IEEE80211_TX_CTL_DONTFRAG | + (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + *ieee80211_get_qos_ctl(hdr) = tid; + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); + } else { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); + sdata->sequence_number += 0x10; + } + + sta->tx_msdu[tid]++; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + __skb_queue_head_init(&tx.skbs); + + tx.flags = IEEE80211_TX_UNICAST; + tx.local = local; + tx.sdata = sdata; + tx.sta = sta; + tx.key = fast_tx->key; + + if (fast_tx->key) + info->control.hw_key = &fast_tx->key->conf; + + if (!(local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) { + tx.skb = skb; + r = ieee80211_tx_h_rate_ctrl(&tx); + skb = tx.skb; + tx.skb = NULL; + + if (r != TX_CONTINUE) { + if (r != TX_QUEUED) + kfree_skb(skb); + return true; + } + } + + /* statistics normally done by ieee80211_tx_h_stats (but that + * has to consider fragmentation, so is more complex) + */ + sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_packets[skb_get_queue_mapping(skb)]++; + + if (fast_tx->pn_offs) { + u64 pn; + u8 *crypto_hdr = skb->data + fast_tx->pn_offs; + + switch (fast_tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + pn = atomic64_inc_return(&fast_tx->key->u.ccmp.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_inc_return(&fast_tx->key->u.gcmp.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + } + } + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + + __skb_queue_tail(&tx.skbs, skb); + ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false); + return true; +} + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; + struct sk_buff *next; if (unlikely(skb->len < ETH_HLEN)) { kfree_skb(skb); @@ -2401,20 +2858,67 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, rcu_read_lock(); - if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) { - kfree_skb(skb); - goto out; + if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) + goto out_free; + + if (!IS_ERR_OR_NULL(sta)) { + struct ieee80211_fast_tx *fast_tx; + + fast_tx = rcu_dereference(sta->fast_tx); + + if (fast_tx && + ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb)) + goto out; } - skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); - if (IS_ERR(skb)) - goto out; + if (skb_is_gso(skb)) { + struct sk_buff *segs; + + segs = skb_gso_segment(skb, 0); + if (IS_ERR(segs)) { + goto out_free; + } else if (segs) { + consume_skb(skb); + skb = segs; + } + } else { + /* we cannot process non-linear frames on this path */ + if (skb_linearize(skb)) { + kfree_skb(skb); + goto out; + } + + /* the frame could be fragmented, software-encrypted, and other + * things so we cannot really handle checksum offload with it - + * fix it up in software before we handle anything else. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); + if (skb_checksum_help(skb)) + goto out_free; + } + } + + next = skb; + while (next) { + skb = next; + next = skb->next; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - dev->trans_start = jiffies; + skb->prev = NULL; + skb->next = NULL; + + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta); + if (IS_ERR(skb)) + goto out; - ieee80211_xmit(sdata, sta, skb); + ieee80211_tx_stats(dev, skb->len); + + ieee80211_xmit(sdata, sta, skb); + } + goto out; + out_free: + kfree_skb(skb); out: rcu_read_unlock(); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 19b9cce6c210..b08ba9538d12 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1457,18 +1457,12 @@ static struct socket *make_send_sock(struct net *net, int id) struct socket *sock; int result; - /* First create a socket move it to right name space later */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + /* First create a socket */ + result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - /* - * Kernel sockets that are a part of a namespace, should not - * hold a reference to a namespace in order to allow to stop it. - * After sk_change_net should be released using sk_release_kernel. - */ - sk_change_net(sock->sk, net); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); @@ -1497,7 +1491,7 @@ static struct socket *make_send_sock(struct net *net, int id) return sock; error: - sk_release_kernel(sock->sk); + sock_release(sock); return ERR_PTR(result); } @@ -1518,17 +1512,11 @@ static struct socket *make_receive_sock(struct net *net, int id) int result; /* First create a socket */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - /* - * Kernel sockets that are a part of a namespace, should not - * hold a reference to a namespace in order to allow to stop it. - * After sk_change_net should be released using sk_release_kernel. - */ - sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); @@ -1554,7 +1542,7 @@ static struct socket *make_receive_sock(struct net *net, int id) return sock; error: - sk_release_kernel(sock->sk); + sock_release(sock); return ERR_PTR(result); } @@ -1692,7 +1680,7 @@ done: ip_vs_sync_buff_release(sb); /* release the sending multicast socket */ - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo); return 0; @@ -1729,7 +1717,7 @@ static int sync_thread_backup(void *data) } /* release the sending multicast socket */ - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo->buf); kfree(tinfo); @@ -1854,11 +1842,11 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) return 0; outsocket: - sk_release_kernel(sock->sk); + sock_release(sock); outtinfo: if (tinfo) { - sk_release_kernel(tinfo->sock->sk); + sock_release(tinfo->sock); kfree(tinfo->buf); kfree(tinfo); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index daa0b818174b..164ded7050b8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -76,17 +76,18 @@ struct listeners { }; /* state bits */ -#define NETLINK_CONGESTED 0x0 +#define NETLINK_S_CONGESTED 0x0 /* flags */ -#define NETLINK_KERNEL_SOCKET 0x1 -#define NETLINK_RECV_PKTINFO 0x2 -#define NETLINK_BROADCAST_SEND_ERROR 0x4 -#define NETLINK_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_KERNEL_SOCKET 0x1 +#define NETLINK_F_RECV_PKTINFO 0x2 +#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 static inline int netlink_is_kernel(struct sock *sk) { - return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; + return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; } struct netlink_table *nl_table; @@ -256,8 +257,9 @@ static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); - if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { - if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) { + if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { + if (!test_and_set_bit(NETLINK_S_CONGESTED, + &nlk_sk(sk)->state)) { sk->sk_err = ENOBUFS; sk->sk_error_report(sk); } @@ -270,8 +272,8 @@ static void netlink_rcv_wake(struct sock *sk) struct netlink_sock *nlk = nlk_sk(sk); if (skb_queue_empty(&sk->sk_receive_queue)) - clear_bit(NETLINK_CONGESTED, &nlk->state); - if (!test_bit(NETLINK_CONGESTED, &nlk->state)) + clear_bit(NETLINK_S_CONGESTED, &nlk->state); + if (!test_bit(NETLINK_S_CONGESTED, &nlk->state)) wake_up_interruptible(&nlk->wait); } @@ -1117,14 +1119,15 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *cb_mutex, int protocol) + struct mutex *cb_mutex, int protocol, + int kern) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern); if (!sk) return -ENOMEM; @@ -1186,7 +1189,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol); + err = __netlink_create(net, sock, cb_mutex, protocol, kern); if (err < 0) goto out_module; @@ -1656,7 +1659,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, nlk = nlk_sk(sk); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_CONGESTED, &nlk->state)) && + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && !netlink_skb_is_mmaped(skb)) { DECLARE_WAITQUEUE(wait, current); if (!*timeo) { @@ -1671,7 +1674,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, add_wait_queue(&nlk->wait, &wait); if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_CONGESTED, &nlk->state)) && + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && !sock_flag(sk, SOCK_DEAD)) *timeo = schedule_timeout(*timeo); @@ -1895,7 +1898,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_sock *nlk = nlk_sk(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && - !test_bit(NETLINK_CONGESTED, &nlk->state)) { + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); @@ -1931,8 +1934,17 @@ static void do_one_broadcast(struct sock *sk, !test_bit(p->group - 1, nlk->groups)) return; - if (!net_eq(sock_net(sk), p->net)) - return; + if (!net_eq(sock_net(sk), p->net)) { + if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID)) + return; + + if (!peernet_has_id(sock_net(sk), p->net)) + return; + + if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns, + CAP_NET_BROADCAST)) + return; + } if (p->failure) { netlink_overrun(sk); @@ -1956,23 +1968,33 @@ static void do_one_broadcast(struct sock *sk, netlink_overrun(sk); /* Clone failed. Notify ALL listeners. */ p->failure = 1; - if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; - } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { + goto out; + } + if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if (sk_filter(sk, p->skb2)) { + goto out; + } + if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; - } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { + goto out; + } + NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); + NETLINK_CB(p->skb2).nsid_is_set = true; + val = netlink_broadcast_deliver(sk, p->skb2); + if (val < 0) { netlink_overrun(sk); - if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; p->skb2 = NULL; } +out: sock_put(sk); } @@ -2057,7 +2079,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) !test_bit(p->group - 1, nlk->groups)) goto out; - if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) { + if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) { ret = 1; goto out; } @@ -2076,7 +2098,7 @@ out: * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the - * NETLINK_RECV_NO_ENOBUFS socket option. + * NETLINK_NO_ENOBUFS socket option. */ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { @@ -2136,9 +2158,9 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, switch (optname) { case NETLINK_PKTINFO: if (val) - nlk->flags |= NETLINK_RECV_PKTINFO; + nlk->flags |= NETLINK_F_RECV_PKTINFO; else - nlk->flags &= ~NETLINK_RECV_PKTINFO; + nlk->flags &= ~NETLINK_F_RECV_PKTINFO; err = 0; break; case NETLINK_ADD_MEMBERSHIP: @@ -2167,18 +2189,18 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, } case NETLINK_BROADCAST_ERROR: if (val) - nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; + nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR; else - nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; + nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR; err = 0; break; case NETLINK_NO_ENOBUFS: if (val) { - nlk->flags |= NETLINK_RECV_NO_ENOBUFS; - clear_bit(NETLINK_CONGESTED, &nlk->state); + nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS; + clear_bit(NETLINK_S_CONGESTED, &nlk->state); wake_up_interruptible(&nlk->wait); } else { - nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; + nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS; } err = 0; break; @@ -2201,6 +2223,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; } #endif /* CONFIG_NETLINK_MMAP */ + case NETLINK_LISTEN_ALL_NSID: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) + return -EPERM; + + if (val) + nlk->flags |= NETLINK_F_LISTEN_ALL_NSID; + else + nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -2227,7 +2259,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(int)) return -EINVAL; len = sizeof(int); - val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; + val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0; if (put_user(len, optlen) || put_user(val, optval)) return -EFAULT; @@ -2237,7 +2269,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(int)) return -EINVAL; len = sizeof(int); - val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; + val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0; if (put_user(len, optlen) || put_user(val, optval)) return -EFAULT; @@ -2247,7 +2279,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(int)) return -EINVAL; len = sizeof(int); - val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; + val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0; if (put_user(len, optlen) || put_user(val, optval)) return -EFAULT; @@ -2267,6 +2299,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } +static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + if (!NETLINK_CB(skb).nsid_is_set) + return; + + put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int), + &NETLINK_CB(skb).nsid); +} + static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; @@ -2418,8 +2460,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(*addr); } - if (nlk->flags & NETLINK_RECV_PKTINFO) + if (nlk->flags & NETLINK_F_RECV_PKTINFO) netlink_cmsg_recv_pktinfo(msg, skb); + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + netlink_cmsg_listen_all_nsid(sk, msg, skb); memset(&scm, 0, sizeof(scm)); scm.creds = *NETLINK_CREDS(skb); @@ -2473,17 +2517,10 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - /* - * We have to just have a reference on the net from sk, but don't - * get_net it. Besides, we cannot get and then put the net here. - * So we create one inside init_net and the move it to net. - */ - - if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0) + if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) goto out_sock_release_nosk; sk = sock->sk; - sk_change_net(sk, net); if (!cfg || cfg->groups < 32) groups = 32; @@ -2502,7 +2539,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, goto out_sock_release; nlk = nlk_sk(sk); - nlk->flags |= NETLINK_KERNEL_SOCKET; + nlk->flags |= NETLINK_F_KERNEL_SOCKET; netlink_table_grab(); if (!nl_table[unit].registered) { @@ -2539,7 +2576,10 @@ EXPORT_SYMBOL(__netlink_kernel_create); void netlink_kernel_release(struct sock *sk) { - sk_release_kernel(sk); + if (sk == NULL || sk->sk_socket == NULL) + return; + + sock_release(sk->sk_socket); } EXPORT_SYMBOL(netlink_kernel_release); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index b987fd56c3c5..ed212ffc1d9d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -433,7 +433,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto); + sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern); if (sk == NULL) return -ENOMEM; @@ -476,7 +476,7 @@ static struct sock *nr_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot); + sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0); if (sk == NULL) return NULL; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 2277276f52bc..54e40fa47822 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -40,7 +40,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto, read_lock(&proto_tab_lock); if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) { - rc = proto_tab[proto]->create(net, sock, proto_tab[proto]); + rc = proto_tab[proto]->create(net, sock, proto_tab[proto], kern); module_put(proto_tab[proto]->owner); } read_unlock(&proto_tab_lock); diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index de1789e3cc82..1f68724d44d3 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -225,7 +225,7 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local, struct sk_buff *skb, u8 direction); /* Sock API */ -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp); +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern); void nfc_llcp_sock_free(struct nfc_llcp_sock *sock); void nfc_llcp_accept_unlink(struct sock *sk); void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index b18f07ccb504..98876274a1ee 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -934,7 +934,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, sock->ssap = ssap; } - new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC); + new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0); if (new_sk == NULL) { reason = LLCP_DM_REJ; release_sock(&sock->sk); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 9578bd6a4f3e..b7de0da46acd 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -942,12 +942,12 @@ static void llcp_sock_destruct(struct sock *sk) } } -struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) +struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern) { struct sock *sk; struct nfc_llcp_sock *llcp_sock; - sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto); + sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern); if (!sk) return NULL; @@ -993,7 +993,7 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock) } static int llcp_sock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -1009,7 +1009,7 @@ static int llcp_sock_create(struct net *net, struct socket *sock, else sock->ops = &llcp_sock_ops; - sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC); + sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index a8ce80b47720..5c93e8412a26 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -30,7 +30,7 @@ struct nfc_protocol { struct proto *proto; struct module *owner; int (*create)(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto); + const struct nfc_protocol *nfc_proto, int kern); }; struct nfc_rawsock { diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 82b4e8024778..e9a91488fe3d 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -334,7 +334,7 @@ static void rawsock_destruct(struct sock *sk) } static int rawsock_create(struct net *net, struct socket *sock, - const struct nfc_protocol *nfc_proto) + const struct nfc_protocol *nfc_proto, int kern) { struct sock *sk; @@ -348,7 +348,7 @@ static int rawsock_create(struct net *net, struct socket *sock, else sock->ops = &rawsock_ops; - sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto); + sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) return -ENOMEM; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 096c6276e6b9..3b90461317ec 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -545,7 +545,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) /* Normally, setting the skb 'protocol' field would be handled by a * call to eth_type_trans(), but it assumes there's a sending * device, which we may not have. */ - if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(eth->h_proto)) packet->protocol = eth->h_proto; else packet->protocol = htons(ETH_P_802_2); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2dacc7b5af23..bc7b0aba994a 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -332,7 +332,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); - if (ntohs(proto) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(proto)) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) @@ -349,7 +349,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) __skb_pull(skb, sizeof(struct llc_snap_hdr)); - if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) + if (eth_proto_is_802_3(llc->ethertype)) return llc->ethertype; return htons(ETH_P_802_2); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c691b1a1eee0..624e41c4267f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -816,7 +816,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, if (is_mask) { /* Always exact match EtherType. */ eth_type = htons(0xffff); - } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { + } else if (!eth_proto_is_802_3(eth_type)) { OVS_NLERR(log, "EtherType %x is less than min %x", ntohs(eth_type), ETH_P_802_3_MIN); return -EINVAL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index b5989c6ee551..12c5dde8e344 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2835,7 +2835,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 32ab87d34828..10d42f3220ab 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -97,7 +97,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, goto out; } - sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot, kern); if (sk == NULL) { err = -ENOMEM; goto out; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 6de2aeb98a1f..850a86cde0b3 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -845,7 +845,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) } /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 10443377fb9d..3d83641f2861 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -440,7 +440,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto); + sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8ae603069a1a..36dbc2da3661 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -520,7 +520,7 @@ static int rose_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern); if (sk == NULL) return -ENOMEM; @@ -559,7 +559,7 @@ static struct sock *rose_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) return NULL; - sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto); + sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0); if (sk == NULL) return NULL; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0095b9a0b779..25d60ed15284 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -632,7 +632,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern); if (!sk) return -ENOMEM; diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c index ca904ed5400a..78483b4602bf 100644 --- a/net/rxrpc/ar-local.c +++ b/net/rxrpc/ar-local.c @@ -73,8 +73,8 @@ static int rxrpc_create_local(struct rxrpc_local *local) _enter("%p{%d}", local, local->srx.transport_type); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP, - &local->socket); + ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, + IPPROTO_UDP, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3d43e4979f27..af427a3dbcba 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -392,11 +392,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, list_for_each_entry(a, actions, list) { repeat: ret = a->ops->act(skb, a, res); - if (TC_MUNGED & skb->tc_verd) { - /* copied already, allow trampling */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - } if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ if (ret != TC_ACT_PIPE) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 59649d588d79..17e6d6669c7f 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = a->priv; - int i, munged = 0; + int i; unsigned int off; if (skb_unclone(skb, GFP_ATOMIC)) @@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, *ptr = ((*ptr & tkey->mask) ^ tkey->val); if (ptr == &_data) skb_store_bits(skb, off + offset, ptr, 4); - munged++; } - if (munged) - skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; } else WARN(1, "pedit BUG: index %d\n", p->tcf_index); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index c009eb9045ce..dfe3da75594c 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -133,16 +133,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) --sch->q.qlen; } -/* private part of skb->cb[] that a qdisc is allowed to use - * is limited to QDISC_CB_PRIV_LEN bytes. - * As a flow key might be too large, we store a part of it only. - */ -#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3) - struct choke_skb_cb { u16 classid; u8 keys_valid; - u8 keys[QDISC_CB_PRIV_LEN - 3]; + struct flow_keys_digest keys; }; static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) @@ -177,18 +171,18 @@ static bool choke_match_flow(struct sk_buff *skb1, if (!choke_skb_cb(skb1)->keys_valid) { choke_skb_cb(skb1)->keys_valid = 1; skb_flow_dissect(skb1, &temp); - memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN); + make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp); } if (!choke_skb_cb(skb2)->keys_valid) { choke_skb_cb(skb2)->keys_valid = 1; skb_flow_dissect(skb2, &temp); - memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN); + make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp); } return !memcmp(&choke_skb_cb(skb1)->keys, &choke_skb_cb(skb2)->keys, - CHOKE_K_LEN); + sizeof(choke_skb_cb(skb1)->keys)); } /* diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 7a0bdb16ac92..535007d5f0b5 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -6,7 +6,7 @@ * * Implemented on linux by : * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> - * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { [TCA_CODEL_LIMIT] = { .type = NLA_U32 }, [TCA_CODEL_INTERVAL] = { .type = NLA_U32 }, [TCA_CODEL_ECN] = { .type = NLA_U32 }, + [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 }, }; static int codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_CODEL_CE_THRESHOLD]) { + u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]); + + q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + } + if (tb[TCA_CODEL_INTERVAL]) { u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]); @@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_CODEL_ECN, q->params.ecn)) goto nla_put_failure; - + if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD && + nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD, + codel_time_to_us(q->params.ce_threshold))) + goto nla_put_failure; return nla_nest_end(skb, opts); nla_put_failure: @@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .ldelay = codel_time_to_us(q->vars.ldelay), .dropping = q->vars.dropping, .ecn_mark = q->stats.ecn_mark, + .ce_mark = q->stats.ce_mark, }; if (q->vars.dropping) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index c244c45b78d7..d75993f89fac 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> */ #include <linux/module.h> @@ -23,7 +23,6 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> -#include <net/flow_keys.h> #include <net/codel.h> /* Fair Queue CoDel. @@ -68,15 +67,9 @@ struct fq_codel_sched_data { }; static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, - const struct sk_buff *skb) + struct sk_buff *skb) { - struct flow_keys keys; - unsigned int hash; - - skb_flow_dissect(skb, &keys); - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src ^ keys.ip_proto, - (__force u32)keys.ports, q->perturbation); + u32 hash = skb_get_hash_perturb(skb, q->perturbation); return reciprocal_scale(hash, q->flows_cnt); } @@ -299,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 }, [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -329,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; } + if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) { + u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]); + + q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + } + if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); @@ -448,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) q->flows_cnt)) goto nla_put_failure; + if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD && + nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, + codel_time_to_us(q->cparams.ce_threshold))) + goto nla_put_failure; + return nla_nest_end(skb, opts); nla_put_failure: @@ -466,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.qdisc_stats.drop_overlimit = q->drop_overlimit; st.qdisc_stats.ecn_mark = q->cstats.ecn_mark; st.qdisc_stats.new_flow_count = q->new_flow_count; + st.qdisc_stats.ce_mark = q->cstats.ce_mark; list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 634529e0ce6b..abb9f2fec28f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) * if no default DP has been configured. This * allows for DP flows to be left untouched. */ - if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= + sch->limit)) return qdisc_enqueue_tail(skb, sch); else goto drop; @@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, q->DP = dp; q->prio = prio; - q->limit = ctl->limit; + if (ctl->limit > sch->limit) + q->limit = sch->limit; + else + q->limit = ctl->limit; if (q->backlog == 0) red_end_of_idle_period(&q->vars); @@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { [TCA_GRED_STAB] = { .len = 256 }, [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, [TCA_GRED_MAX_P] = { .type = NLA_U32 }, + [TCA_GRED_LIMIT] = { .type = NLA_U32 }, }; static int gred_change(struct Qdisc *sch, struct nlattr *opt) @@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; - if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) + if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { + if (tb[TCA_GRED_LIMIT] != NULL) + sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); return gred_change_table_def(sch, opt); + } if (tb[TCA_GRED_PARMS] == NULL || - tb[TCA_GRED_STAB] == NULL) + tb[TCA_GRED_STAB] == NULL || + tb[TCA_GRED_LIMIT] != NULL) return -EINVAL; max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; @@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) return -EINVAL; + if (tb[TCA_GRED_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); + else { + u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1; + + sch->limit = qlen * psched_mtu(qdisc_dev(sch)); + } + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } @@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) goto nla_put_failure; + if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit)) + goto nla_put_failure; + parms = nla_nest_start(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 15d3aabfe250..9d15cb6b8cb1 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> -#include <net/flow_keys.h> #include <net/pkt_sched.h> #include <net/sock.h> @@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void) return jiffies; } -static unsigned int skb_hash(const struct hhf_sched_data *q, - const struct sk_buff *skb) -{ - struct flow_keys keys; - unsigned int hash; - - if (skb->sk && skb->sk->sk_hash) - return skb->sk->sk_hash; - - skb_flow_dissect(skb, &keys); - hash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src ^ keys.ip_proto, - (__force u32)keys.ports, q->perturbation); - return hash; -} - /* Looks up a heavy-hitter flow in a chaining list of table T. */ static struct hh_flow_state *seek_list(const u32 hash, struct list_head *head, @@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) } /* Get hashed flow-id of the skb. */ - hash = skb_hash(q, skb); + hash = skb_get_hash_perturb(skb, q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 4cdbfb85686a..e7c648fa9dc3 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -12,16 +12,10 @@ #include <linux/list.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> + #include <net/netlink.h> #include <net/pkt_sched.h> - -struct ingress_qdisc_data { - struct tcf_proto __rcu *filter_list; -}; - -/* ------------------------- Class/flow operations ------------------------- */ - static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { return NULL; @@ -49,57 +43,24 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, unsigned long cl) { - struct ingress_qdisc_data *p = qdisc_priv(sch); - - return &p->filter_list; -} - -/* --------------------------- Qdisc operations ---------------------------- */ + struct net_device *dev = qdisc_dev(sch); -static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct ingress_qdisc_data *p = qdisc_priv(sch); - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result; - - result = tc_classify(skb, fl, &res); - - qdisc_bstats_update(sch, skb); - switch (result) { - case TC_ACT_SHOT: - result = TC_ACT_SHOT; - qdisc_qstats_drop(sch); - break; - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - result = TC_ACT_STOLEN; - break; - case TC_ACT_RECLASSIFY: - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - default: - result = TC_ACT_OK; - break; - } - - return result; + return &dev->ingress_cl_list; } -/* ------------------------------------------------------------- */ - static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { net_inc_ingress_queue(); + sch->flags |= TCQ_F_CPUSTATS; return 0; } static void ingress_destroy(struct Qdisc *sch) { - struct ingress_qdisc_data *p = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); - tcf_destroy_chain(&p->filter_list); + tcf_destroy_chain(&dev->ingress_cl_list); net_dec_ingress_queue(); } @@ -110,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; + return nla_nest_end(skb, nest); nla_put_failure: @@ -130,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = { static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", - .priv_size = sizeof(struct ingress_qdisc_data), - .enqueue = ingress_enqueue, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -148,6 +108,7 @@ static void __exit ingress_module_exit(void) unregister_qdisc(&ingress_qdisc_ops); } -module_init(ingress_module_init) -module_exit(ingress_module_exit) +module_init(ingress_module_init); +module_exit(ingress_module_exit); + MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 956ead2cab9a..5abd1d9de989 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { struct Qdisc *rootq = qdisc_root(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - q->duplicate = 0; - qdisc_enqueue_root(skb2, rootq); + q->duplicate = 0; + rootq->enqueue(skb2, rootq); q->duplicate = dupsave; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 5819dd82630d..4b815193326c 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -26,7 +26,6 @@ #include <net/ip.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> -#include <net/flow_keys.h> /* * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) @@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) int i; u32 p_min = ~0; u32 minqlen = ~0; - u32 r, slot, salt, sfbhash; + u32 r, sfbhash; + u32 slot = q->slot; int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - struct flow_keys keys; if (unlikely(sch->q.qlen >= q->limit)) { qdisc_qstats_overlimit(sch); @@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) fl = rcu_dereference_bh(q->filter_list); if (fl) { + u32 salt; + /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; - keys.src = salt; - keys.dst = 0; - keys.ports = 0; + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); } else { - skb_flow_dissect(skb, &keys); + sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); } - slot = q->slot; - sfbhash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, - q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; @@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (unlikely(p_min >= SFB_MAX_PROB)) { /* Inelastic flow */ if (q->double_buffering) { - sfbhash = jhash_3words((__force u32)keys.dst, - (__force u32)keys.src, - (__force u32)keys.ports, - q->bins[slot].perturbation); + sfbhash = skb_get_hash_perturb(skb, + q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b877140beda5..7d1492663360 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -23,7 +23,6 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> -#include <net/flow_keys.h> #include <net/red.h> @@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index return &q->dep[val - SFQ_MAX_FLOWS]; } -/* - * In order to be able to quickly rehash our queue when timer changes - * q->perturbation, we store flow_keys in skb->cb[] - */ -struct sfq_skb_cb { - struct flow_keys keys; -}; - -static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) -{ - qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb)); - return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; -} - static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; - unsigned int hash; - - hash = jhash_3words((__force u32)keys->dst, - (__force u32)keys->src ^ keys->ip_proto, - (__force u32)keys->ports, q->perturbation); - return hash & (q->divisor - 1); + return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return TC_H_MIN(skb->priority); fl = rcu_dereference_bh(q->filter_list); - if (!fl) { - skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); + if (!fl) return sfq_hash(q, skb) + 1; - } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; result = tc_classify(skb, fl, &res); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0e4198ee2370..e703ff7fed40 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -635,7 +635,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot); + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53b7acde9aa3..59e80356672b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -550,7 +550,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, - sk->sk_prot); + sk->sk_prot, 0); struct inet_sock *newinet; if (!newsk) diff --git a/net/socket.c b/net/socket.c index 884e32997698..9963a0b53a64 100644 --- a/net/socket.c +++ b/net/socket.c @@ -576,9 +576,6 @@ void sock_release(struct socket *sock) if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__); - if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags)) - return; - this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); @@ -1213,9 +1210,9 @@ int sock_create(int family, int type, int protocol, struct socket **res) } EXPORT_SYMBOL(sock_create); -int sock_create_kern(int family, int type, int protocol, struct socket **res) +int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { - return __sock_create(&init_net, family, type, protocol, res, 1); + return __sock_create(net, family, type, protocol, res, 1); } EXPORT_SYMBOL(sock_create_kern); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 46568b85c333..0409f9b5bdbc 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -15,97 +15,329 @@ #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/netdevice.h> +#include <linux/if_bridge.h> #include <net/ip_fib.h> #include <net/switchdev.h> /** - * netdev_switch_parent_id_get - Get ID of a switch + * switchdev_port_attr_get - Get port attribute + * * @dev: port device - * @psid: switch ID + * @attr: attribute to get + */ +int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + struct switchdev_attr first = { + .id = SWITCHDEV_ATTR_UNDEFINED + }; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_attr_get) + return ops->switchdev_port_attr_get(dev, attr); + + if (attr->flags & SWITCHDEV_F_NO_RECURSE) + return err; + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to get attr on + * each port. Return -ENODATA if attr values don't + * compare across ports. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = switchdev_port_attr_get(lower_dev, attr); + if (err) + break; + if (first.id == SWITCHDEV_ATTR_UNDEFINED) + first = *attr; + else if (memcmp(&first, attr, sizeof(*attr))) + return -ENODATA; + } + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_attr_get); + +static int __switchdev_port_attr_set(struct net_device *dev, + struct switchdev_attr *attr) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_attr_set) + return ops->switchdev_port_attr_set(dev, attr); + + if (attr->flags & SWITCHDEV_F_NO_RECURSE) + return err; + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to set attr on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = __switchdev_port_attr_set(lower_dev, attr); + if (err) + break; + } + + return err; +} + +struct switchdev_attr_set_work { + struct work_struct work; + struct net_device *dev; + struct switchdev_attr attr; +}; + +static void switchdev_port_attr_set_work(struct work_struct *work) +{ + struct switchdev_attr_set_work *asw = + container_of(work, struct switchdev_attr_set_work, work); + int err; + + rtnl_lock(); + err = switchdev_port_attr_set(asw->dev, &asw->attr); + BUG_ON(err); + rtnl_unlock(); + + dev_put(asw->dev); + kfree(work); +} + +static int switchdev_port_attr_set_defer(struct net_device *dev, + struct switchdev_attr *attr) +{ + struct switchdev_attr_set_work *asw; + + asw = kmalloc(sizeof(*asw), GFP_ATOMIC); + if (!asw) + return -ENOMEM; + + INIT_WORK(&asw->work, switchdev_port_attr_set_work); + + dev_hold(dev); + asw->dev = dev; + memcpy(&asw->attr, attr, sizeof(asw->attr)); + + schedule_work(&asw->work); + + return 0; +} + +/** + * switchdev_port_attr_set - Set port attribute + * + * @dev: port device + * @attr: attribute to set * - * Get ID of a switch this port is part of. + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. */ -int netdev_switch_parent_id_get(struct net_device *dev, - struct netdev_phys_item_id *psid) +int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { - const struct swdev_ops *ops = dev->swdev_ops; + int err; + + if (!rtnl_is_locked()) { + /* Running prepare-commit transaction across stacked + * devices requires nothing moves, so if rtnl_lock is + * not held, schedule a worker thread to hold rtnl_lock + * while setting attr. + */ + + return switchdev_port_attr_set_defer(dev, attr); + } - if (!ops || !ops->swdev_parent_id_get) - return -EOPNOTSUPP; - return ops->swdev_parent_id_get(dev, psid); + /* Phase I: prepare for attr set. Driver/device should fail + * here if there are going to be issues in the commit phase, + * such as lack of resources or support. The driver/device + * should reserve resources needed for the commit phase here, + * but should not commit the attr. + */ + + attr->trans = SWITCHDEV_TRANS_PREPARE; + err = __switchdev_port_attr_set(dev, attr); + if (err) { + /* Prepare phase failed: abort the transaction. Any + * resources reserved in the prepare phase are + * released. + */ + + attr->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_attr_set(dev, attr); + + return err; + } + + /* Phase II: commit attr set. This cannot fail as a fault + * of driver/device. If it does, it's a bug in the driver/device + * because the driver said everythings was OK in phase I. + */ + + attr->trans = SWITCHDEV_TRANS_COMMIT; + err = __switchdev_port_attr_set(dev, attr); + BUG_ON(err); + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_attr_set); + +static int __switchdev_port_obj_add(struct net_device *dev, + struct switchdev_obj *obj) +{ + const struct switchdev_ops *ops = dev->switchdev_ops; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops && ops->switchdev_port_obj_add) + return ops->switchdev_port_obj_add(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to add object on + * each port. + */ + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = __switchdev_port_obj_add(lower_dev, obj); + if (err) + break; + } + + return err; } -EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get); /** - * netdev_switch_port_stp_update - Notify switch device port of STP - * state change + * switchdev_port_obj_add - Add port object + * * @dev: port device - * @state: port STP state + * @obj: object to add + * + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. + * + * rtnl_lock must be held. + */ +int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) +{ + int err; + + ASSERT_RTNL(); + + /* Phase I: prepare for obj add. Driver/device should fail + * here if there are going to be issues in the commit phase, + * such as lack of resources or support. The driver/device + * should reserve resources needed for the commit phase here, + * but should not commit the obj. + */ + + obj->trans = SWITCHDEV_TRANS_PREPARE; + err = __switchdev_port_obj_add(dev, obj); + if (err) { + /* Prepare phase failed: abort the transaction. Any + * resources reserved in the prepare phase are + * released. + */ + + obj->trans = SWITCHDEV_TRANS_ABORT; + __switchdev_port_obj_add(dev, obj); + + return err; + } + + /* Phase II: commit obj add. This cannot fail as a fault + * of driver/device. If it does, it's a bug in the driver/device + * because the driver said everythings was OK in phase I. + */ + + obj->trans = SWITCHDEV_TRANS_COMMIT; + err = __switchdev_port_obj_add(dev, obj); + WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); + + return err; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_add); + +/** + * switchdev_port_obj_del - Delete port object * - * Notify switch device port of bridge port STP state change. + * @dev: port device + * @obj: object to delete */ -int netdev_switch_port_stp_update(struct net_device *dev, u8 state) +int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct list_head *iter; int err = -EOPNOTSUPP; - if (ops && ops->swdev_port_stp_update) - return ops->swdev_port_stp_update(dev, state); + if (ops && ops->switchdev_port_obj_del) + return ops->switchdev_port_obj_del(dev, obj); + + /* Switch device port(s) may be stacked under + * bond/team/vlan dev, so recurse down to delete object on + * each port. + */ netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_stp_update(lower_dev, state); - if (err && err != -EOPNOTSUPP) - return err; + err = switchdev_port_obj_del(lower_dev, obj); + if (err) + break; } return err; } -EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update); +EXPORT_SYMBOL_GPL(switchdev_port_obj_del); -static DEFINE_MUTEX(netdev_switch_mutex); -static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain); +static DEFINE_MUTEX(switchdev_mutex); +static RAW_NOTIFIER_HEAD(switchdev_notif_chain); /** - * register_netdev_switch_notifier - Register notifier + * register_switchdev_notifier - Register notifier * @nb: notifier_block * * Register switch device notifier. This should be used by code * which needs to monitor events happening in particular device. * Return values are same as for atomic_notifier_chain_register(). */ -int register_netdev_switch_notifier(struct notifier_block *nb) +int register_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_chain_register(&switchdev_notif_chain, nb); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(register_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(register_switchdev_notifier); /** - * unregister_netdev_switch_notifier - Unregister notifier + * unregister_switchdev_notifier - Unregister notifier * @nb: notifier_block * * Unregister switch device notifier. * Return values are same as for atomic_notifier_chain_unregister(). */ -int unregister_netdev_switch_notifier(struct notifier_block *nb) +int unregister_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); +EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); /** - * call_netdev_switch_notifiers - Call notifiers + * call_switchdev_notifiers - Call notifiers * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data @@ -114,146 +346,242 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier); * when it needs to propagate hardware event. * Return values are same as for atomic_notifier_call_chain(). */ -int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev, - struct netdev_switch_notifier_info *info) +int call_switchdev_notifiers(unsigned long val, struct net_device *dev, + struct switchdev_notifier_info *info) { int err; info->dev = dev; - mutex_lock(&netdev_switch_mutex); - err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info); - mutex_unlock(&netdev_switch_mutex); + mutex_lock(&switchdev_mutex); + err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); + mutex_unlock(&switchdev_mutex); return err; } -EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers); +EXPORT_SYMBOL_GPL(call_switchdev_notifiers); /** - * netdev_switch_port_bridge_setlink - Notify switch device port of bridge - * port attributes + * switchdev_port_bridge_getlink - Get bridge port attributes * * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags * - * Notify switch device port of bridge port attributes + * Called for SELF on rtnl_bridge_getlink to get bridge port + * attributes. */ -int netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) { - const struct net_device_ops *ops = dev->netdev_ops; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, + }; + u16 mode = BRIDGE_MODE_UNDEF; + u32 mask = BR_LEARNING | BR_LEARNING_SYNC; + int err; - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return 0; + err = switchdev_port_attr_get(dev, &attr); + if (err) + return err; + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, + attr.u.brport_flags, mask, nlflags); +} +EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); + +static int switchdev_port_br_setflag(struct net_device *dev, + struct nlattr *nlattr, + unsigned long brport_flag) +{ + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, + }; + u8 flag = nla_get_u8(nlattr); + int err; + + err = switchdev_port_attr_get(dev, &attr); + if (err) + return err; - if (!ops->ndo_bridge_setlink) - return -EOPNOTSUPP; + if (flag) + attr.u.brport_flags |= brport_flag; + else + attr.u.brport_flags &= ~brport_flag; - return ops->ndo_bridge_setlink(dev, nlh, flags); + return switchdev_port_attr_set(dev, &attr); } -EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink); -/** - * netdev_switch_port_bridge_dellink - Notify switch device port of bridge - * port attribute delete - * - * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags - * - * Notify switch device port of bridge port attribute delete - */ -int netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +static const struct nla_policy +switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { + [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, + [IFLA_BRPORT_COST] = { .type = NLA_U32 }, + [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, + [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, +}; + +static int switchdev_port_br_setlink_protinfo(struct net_device *dev, + struct nlattr *protinfo) { - const struct net_device_ops *ops = dev->netdev_ops; + struct nlattr *attr; + int rem; + int err; - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return 0; + err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, + switchdev_port_bridge_policy); + if (err) + return err; + + nla_for_each_nested(attr, protinfo, rem) { + switch (nla_type(attr)) { + case IFLA_BRPORT_LEARNING: + err = switchdev_port_br_setflag(dev, attr, + BR_LEARNING); + break; + case IFLA_BRPORT_LEARNING_SYNC: + err = switchdev_port_br_setflag(dev, attr, + BR_LEARNING_SYNC); + break; + default: + err = -EOPNOTSUPP; + break; + } + if (err) + return err; + } + + return 0; +} + +static int switchdev_port_br_afspec(struct net_device *dev, + struct nlattr *afspec, + int (*f)(struct net_device *dev, + struct switchdev_obj *obj)) +{ + struct nlattr *attr; + struct bridge_vlan_info *vinfo; + struct switchdev_obj obj = { + .id = SWITCHDEV_OBJ_PORT_VLAN, + }; + struct switchdev_obj_vlan *vlan = &obj.u.vlan; + int rem; + int err; - if (!ops->ndo_bridge_dellink) - return -EOPNOTSUPP; + nla_for_each_nested(attr, afspec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) + continue; + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) + return -EINVAL; + vinfo = nla_data(attr); + vlan->flags = vinfo->flags; + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (vlan->vid_start) + return -EINVAL; + vlan->vid_start = vinfo->vid; + } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { + if (!vlan->vid_start) + return -EINVAL; + vlan->vid_end = vinfo->vid; + if (vlan->vid_end <= vlan->vid_start) + return -EINVAL; + err = f(dev, &obj); + if (err) + return err; + memset(vlan, 0, sizeof(*vlan)); + } else { + if (vlan->vid_start) + return -EINVAL; + vlan->vid_start = vinfo->vid; + vlan->vid_end = vinfo->vid; + err = f(dev, &obj); + if (err) + return err; + memset(vlan, 0, sizeof(*vlan)); + } + } - return ops->ndo_bridge_dellink(dev, nlh, flags); + return 0; } -EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_dellink); /** - * ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink - * op for master devices + * switchdev_port_bridge_setlink - Set bridge port attributes * * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge setlink flags + * @nlh: netlink header + * @flags: netlink flags * - * Notify master device slaves of bridge port attributes + * Called for SELF on rtnl_bridge_setlink to set bridge port + * attributes. */ -int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { - struct net_device *lower_dev; - struct list_head *iter; - int ret = 0, err = 0; - - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return ret; + struct nlattr *protinfo; + struct nlattr *afspec; + int err = 0; - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags); - if (err && err != -EOPNOTSUPP) - ret = err; + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), + IFLA_PROTINFO); + if (protinfo) { + err = switchdev_port_br_setlink_protinfo(dev, protinfo); + if (err) + return err; } - return ret; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), + IFLA_AF_SPEC); + if (afspec) + err = switchdev_port_br_afspec(dev, afspec, + switchdev_port_obj_add); + + return err; } -EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_setlink); +EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); /** - * ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink - * op for master devices + * switchdev_port_bridge_dellink - Set bridge port attributes * * @dev: port device - * @nlh: netlink msg with bridge port attributes - * @flags: bridge dellink flags + * @nlh: netlink header + * @flags: netlink flags * - * Notify master device slaves of bridge port attribute deletes + * Called for SELF on rtnl_bridge_dellink to set bridge port + * attributes. */ -int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) +int switchdev_port_bridge_dellink(struct net_device *dev, + struct nlmsghdr *nlh, u16 flags) { - struct net_device *lower_dev; - struct list_head *iter; - int ret = 0, err = 0; + struct nlattr *afspec; - if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD)) - return ret; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), + IFLA_AF_SPEC); + if (afspec) + return switchdev_port_br_afspec(dev, afspec, + switchdev_port_obj_del); - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags); - if (err && err != -EOPNOTSUPP) - ret = err; - } - - return ret; + return 0; } -EXPORT_SYMBOL_GPL(ndo_dflt_netdev_switch_port_bridge_dellink); +EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); -static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) +static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) { - const struct swdev_ops *ops = dev->swdev_ops; + const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; struct net_device *port_dev; struct list_head *iter; /* Recusively search down until we find a sw port dev. - * (A sw port dev supports swdev_parent_id_get). + * (A sw port dev supports switchdev_port_attr_get). */ - if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD && - ops && ops->swdev_parent_id_get) + if (ops && ops->switchdev_port_attr_get) return dev; netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = netdev_switch_get_lowest_dev(lower_dev); + port_dev = switchdev_get_lowest_dev(lower_dev); if (port_dev) return port_dev; } @@ -261,10 +589,12 @@ static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev) return NULL; } -static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) +static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) { - struct netdev_phys_item_id psid; - struct netdev_phys_item_id prev_psid; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_PORT_PARENT_ID, + }; + struct switchdev_attr prev_attr; struct net_device *dev = NULL; int nhsel; @@ -276,28 +606,29 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) if (!nh->nh_dev) return NULL; - dev = netdev_switch_get_lowest_dev(nh->nh_dev); + dev = switchdev_get_lowest_dev(nh->nh_dev); if (!dev) return NULL; - if (netdev_switch_parent_id_get(dev, &psid)) + if (switchdev_port_attr_get(dev, &attr)) return NULL; if (nhsel > 0) { - if (prev_psid.id_len != psid.id_len) + if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) return NULL; - if (memcmp(prev_psid.id, psid.id, psid.id_len)) + if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, + attr.u.ppid.id_len)) return NULL; } - prev_psid = psid; + prev_attr = attr; } return dev; } /** - * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch + * switchdev_fib_ipv4_add - Add IPv4 route entry to switch * * @dst: route's IPv4 destination address * @dst_len: destination address length (prefix length) @@ -309,11 +640,22 @@ static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi) * * Add IPv4 route entry to switch device. */ -int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) +int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 nlflags, u32 tb_id) { + struct switchdev_obj fib_obj = { + .id = SWITCHDEV_OBJ_IPV4_FIB, + .u.ipv4_fib = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = nlflags, + .tb_id = tb_id, + }, + }; struct net_device *dev; - const struct swdev_ops *ops; int err = 0; /* Don't offload route if using custom ip rules or if @@ -328,25 +670,20 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (fi->fib_net->ipv4.fib_offload_disabled) return 0; - dev = netdev_switch_get_dev_by_nhs(fi); + dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; - - if (ops->swdev_fib_ipv4_add) { - err = ops->swdev_fib_ipv4_add(dev, htonl(dst), dst_len, - fi, tos, type, nlflags, - tb_id); - if (!err) - fi->fib_flags |= RTNH_F_EXTERNAL; - } + + err = switchdev_port_obj_add(dev, &fib_obj); + if (!err) + fi->fib_flags |= RTNH_F_EXTERNAL; return err; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); /** - * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch + * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch * * @dst: route's IPv4 destination address * @dst_len: destination address length (prefix length) @@ -357,38 +694,45 @@ EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_add); * * Delete IPv4 route entry from switch device. */ -int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) +int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id) { + struct switchdev_obj fib_obj = { + .id = SWITCHDEV_OBJ_IPV4_FIB, + .u.ipv4_fib = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = 0, + .tb_id = tb_id, + }, + }; struct net_device *dev; - const struct swdev_ops *ops; int err = 0; if (!(fi->fib_flags & RTNH_F_EXTERNAL)) return 0; - dev = netdev_switch_get_dev_by_nhs(fi); + dev = switchdev_get_dev_by_nhs(fi); if (!dev) return 0; - ops = dev->swdev_ops; - if (ops->swdev_fib_ipv4_del) { - err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len, - fi, tos, type, tb_id); - if (!err) - fi->fib_flags &= ~RTNH_F_EXTERNAL; - } + err = switchdev_port_obj_del(dev, &fib_obj); + if (!err) + fi->fib_flags &= ~RTNH_F_EXTERNAL; return err; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_del); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); /** - * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation + * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation * * @fi: route FIB info structure */ -void netdev_switch_fib_ipv4_abort(struct fib_info *fi) +void switchdev_fib_ipv4_abort(struct fib_info *fi) { /* There was a problem installing this route to the offload * device. For now, until we come up with more refined @@ -401,4 +745,4 @@ void netdev_switch_fib_ipv4_abort(struct fib_info *fi) fib_flush_external(fi->fib_net); fi->fib_net->ipv4.fib_offload_disabled = true; } -EXPORT_SYMBOL_GPL(netdev_switch_fib_ipv4_abort); +EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index c5cbdcb1f0b5..3e18fc6c7877 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -866,6 +866,27 @@ int tipc_bclink_set_queue_limits(struct net *net, u32 limit) return 0; } +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) +{ + int err; + u32 win; + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + if (!attrs[TIPC_NLA_LINK_PROP]) + return -EINVAL; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); + if (err) + return err; + + if (!props[TIPC_NLA_PROP_WIN]) + return -EOPNOTSUPP; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + + return tipc_bclink_set_queue_limits(net, win); +} + int tipc_bclink_init(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 4bdc12277d33..3c290a48f720 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -131,6 +131,7 @@ uint tipc_bclink_get_mtu(void); int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list); void tipc_bclink_wakeup_users(struct net *net); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); void tipc_bclink_input(struct net *net); #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 70e3dacbf84a..99c0bd43feed 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -812,7 +812,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) char *name; struct tipc_bearer *b; struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; - struct net *net = genl_info_net(info); + struct net *net = sock_net(skb->sk); if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; diff --git a/net/tipc/core.c b/net/tipc/core.c index be1c9fa60b09..005ba5eb0ea4 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -68,7 +68,7 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_nametbl; - err = tipc_subscr_start(net); + err = tipc_topsrv_start(net); if (err) goto out_subscr; return 0; @@ -83,7 +83,7 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { - tipc_subscr_stop(net); + tipc_topsrv_stop(net); tipc_net_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 43a515dc97b0..374d52335168 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1893,6 +1893,9 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); + node = tipc_link_find_owner(net, name, &bearer_id); if (!node) return -EINVAL; @@ -2175,50 +2178,53 @@ out: int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - struct sk_buff *ans_skb; struct tipc_nl_msg msg; - struct tipc_link *link; - struct tipc_node *node; char *name; - int bearer_id; int err; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + if (!info->attrs[TIPC_NLA_LINK_NAME]) return -EINVAL; - name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); - node = tipc_link_find_owner(net, name, &bearer_id); - if (!node) - return -EINVAL; - ans_skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!ans_skb) + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) return -ENOMEM; - msg.skb = ans_skb; - msg.portid = info->snd_portid; - msg.seq = info->snd_seq; - - tipc_node_lock(node); - link = node->links[bearer_id]; - if (!link) { - err = -EINVAL; - goto err_out; - } - - err = __tipc_nl_add_link(net, &msg, link, 0); - if (err) - goto err_out; + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; - tipc_node_unlock(node); + node = tipc_link_find_owner(net, name, &bearer_id); + if (!node) + return -EINVAL; - return genlmsg_reply(ans_skb, info); + tipc_node_lock(node); + link = node->links[bearer_id]; + if (!link) { + tipc_node_unlock(node); + nlmsg_free(msg.skb); + return -EINVAL; + } -err_out: - tipc_node_unlock(node); - nlmsg_free(ans_skb); + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_unlock(node); + if (err) { + nlmsg_free(msg.skb); + return err; + } + } - return err; + return genlmsg_reply(msg.skb, info); } int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index ab0ac62a1287..0f47f08bf38f 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -330,13 +330,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, /* Any subscriptions waiting for notification? */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscr_report_overlap(s, - publ->lower, - publ->upper, - TIPC_PUBLISHED, - publ->ref, - publ->node, - created_subseq); + tipc_subscrp_report_overlap(s, publ->lower, publ->upper, + TIPC_PUBLISHED, publ->ref, + publ->node, created_subseq); } return publ; } @@ -404,13 +400,9 @@ found: /* Notify any waiting subscriptions */ list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { - tipc_subscr_report_overlap(s, - publ->lower, - publ->upper, - TIPC_WITHDRAWN, - publ->ref, - publ->node, - removed_subseq); + tipc_subscrp_report_overlap(s, publ->lower, publ->upper, + TIPC_WITHDRAWN, publ->ref, + publ->node, removed_subseq); } return publ; @@ -432,19 +424,17 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, return; while (sseq != &nseq->sseqs[nseq->first_free]) { - if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { + if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) { struct publication *crs; struct name_info *info = sseq->info; int must_report = 1; list_for_each_entry(crs, &info->zone_list, zone_list) { - tipc_subscr_report_overlap(s, - sseq->lower, - sseq->upper, - TIPC_PUBLISHED, - crs->ref, - crs->node, - must_report); + tipc_subscrp_report_overlap(s, sseq->lower, + sseq->upper, + TIPC_PUBLISHED, + crs->ref, crs->node, + must_report); must_report = 0; } } diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index ce9121e8e990..53e0fee80086 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -55,6 +55,7 @@ struct tipc_nl_compat_msg { int rep_type; int rep_size; int req_type; + struct net *net; struct sk_buff *rep; struct tlv_desc *req; struct sock *dst_sk; @@ -68,7 +69,8 @@ struct tipc_nl_compat_cmd_dump { struct tipc_nl_compat_cmd_doit { int (*doit)(struct sk_buff *skb, struct genl_info *info); - int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg); + int (*transcode)(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg); }; static int tipc_skb_tailroom(struct sk_buff *skb) @@ -281,7 +283,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, if (!trans_buf) return -ENOMEM; - err = (*cmd->transcode)(trans_buf, msg); + err = (*cmd->transcode)(cmd, trans_buf, msg); if (err) goto trans_out; @@ -353,7 +355,8 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, nla_len(bearer[TIPC_NLA_BEARER_NAME])); } -static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, +static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { struct nlattr *prop; @@ -385,7 +388,8 @@ static int tipc_nl_compat_bearer_enable(struct sk_buff *skb, return 0; } -static int tipc_nl_compat_bearer_disable(struct sk_buff *skb, +static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { char *name; @@ -576,11 +580,81 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, &link_info, sizeof(link_info)); } -static int tipc_nl_compat_link_set(struct sk_buff *skb, - struct tipc_nl_compat_msg *msg) +static int __tipc_add_link_prop(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg, + struct tipc_link_config *lc) +{ + switch (msg->cmd) { + case TIPC_CMD_SET_LINK_PRI: + return nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_TOL: + return nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_WINDOW: + return nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value)); + } + + return -EINVAL; +} + +static int tipc_nl_compat_media_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) { - struct nlattr *link; struct nlattr *prop; + struct nlattr *media; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + media = nla_nest_start(skb, TIPC_NLA_MEDIA); + if (!media) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, media); + + return 0; +} + +static int tipc_nl_compat_bearer_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, bearer); + + return 0; +} + +static int __tipc_nl_compat_link_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *link; struct tipc_link_config *lc; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -596,24 +670,40 @@ static int tipc_nl_compat_link_set(struct sk_buff *skb, if (!prop) return -EMSGSIZE; - if (msg->cmd == TIPC_CMD_SET_LINK_PRI) { - if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value))) - return -EMSGSIZE; - } else if (msg->cmd == TIPC_CMD_SET_LINK_TOL) { - if (nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value))) - return -EMSGSIZE; - } else if (msg->cmd == TIPC_CMD_SET_LINK_WINDOW) { - if (nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value))) - return -EMSGSIZE; - } - + __tipc_add_link_prop(skb, msg, lc); nla_nest_end(skb, prop); nla_nest_end(skb, link); return 0; } -static int tipc_nl_compat_link_reset_stats(struct sk_buff *skb, +static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct tipc_link_config *lc; + struct tipc_bearer *bearer; + struct tipc_media *media; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + media = tipc_media_find(lc->name); + if (media) { + cmd->doit = &tipc_nl_media_set; + return tipc_nl_compat_media_set(skb, msg); + } + + bearer = tipc_bearer_find(msg->net, lc->name); + if (bearer) { + cmd->doit = &tipc_nl_bearer_set; + return tipc_nl_compat_bearer_set(skb, msg); + } + + return __tipc_nl_compat_link_set(skb, msg); +} + +static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { char *name; @@ -851,7 +941,8 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, sizeof(node_info)); } -static int tipc_nl_compat_net_set(struct sk_buff *skb, +static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg) { u32 val; @@ -1007,7 +1098,6 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) struct nlmsghdr *req_nlh; struct nlmsghdr *rep_nlh; struct tipc_genlmsghdr *req_userhdr = info->userhdr; - struct net *net = genl_info_net(info); memset(&msg, 0, sizeof(msg)); @@ -1015,6 +1105,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; msg.cmd = req_userhdr->cmd; msg.dst_sk = info->dst_sk; + msg.net = genl_info_net(info); if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); @@ -1030,7 +1121,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } err = tipc_nl_compat_handle(&msg); - if (err == -EOPNOTSUPP) + if ((err == -EOPNOTSUPP) || (err == -EPERM)) msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); else if (err == -EINVAL) msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); @@ -1043,7 +1134,7 @@ send: rep_nlh = nlmsg_hdr(msg.rep); memcpy(rep_nlh, info->nlhdr, len); rep_nlh->nlmsg_len = msg.rep->len; - genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid); + genlmsg_unicast(msg.net, msg.rep, NETLINK_CB(skb).portid); return err; } diff --git a/net/tipc/server.c b/net/tipc/server.c index 77ff03ed1e18..a91a2f79209a 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -309,6 +309,10 @@ static int tipc_accept_from_sock(struct tipc_conn *con) /* Notify that new connection is incoming */ newcon->usr_data = s->tipc_conn_new(newcon->conid); + if (!newcon->usr_data) { + sock_release(newsock); + return -ENOMEM; + } /* Wake up receive process in case of 'SYN+' message */ newsock->sk->sk_data_ready(newsock->sk); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5cede38..8f3c8e2cef8e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -342,7 +342,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); if (sk == NULL) return -ENOMEM; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 1c147c869c2e..350cca33ee0a 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -40,16 +40,21 @@ /** * struct tipc_subscriber - TIPC network topology subscriber + * @kref: reference counter to tipc_subscription object * @conid: connection identifier to server connecting to subscriber * @lock: control access to subscriber - * @subscription_list: list of subscription objects for this subscriber + * @subscrp_list: list of subscription objects for this subscriber */ struct tipc_subscriber { + struct kref kref; int conid; spinlock_t lock; - struct list_head subscription_list; + struct list_head subscrp_list; }; +static void tipc_subscrp_delete(struct tipc_subscription *sub); +static void tipc_subscrb_put(struct tipc_subscriber *subscriber); + /** * htohl - convert value to endianness used by destination * @in: value to convert @@ -62,9 +67,9 @@ static u32 htohl(u32 in, int swap) return swap ? swab32(in) : in; } -static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node) +static void tipc_subscrp_send_event(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port_ref, u32 node) { struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; @@ -82,12 +87,13 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, } /** - * tipc_subscr_overlap - test for subscription overlap with the given values + * tipc_subscrp_check_overlap - test for subscription overlap with the + * given values * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper) +int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper) { if (found_lower < sub->seq.lower) found_lower = sub->seq.lower; @@ -98,138 +104,121 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, return 1; } -/** - * tipc_subscr_report_overlap - issue event if there is subscription overlap - * - * Protected by nameseq.lock in name_table.c - */ -void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, int must) +void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must) { - if (!tipc_subscr_overlap(sub, found_lower, found_upper)) + if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper)) return; if (!must && !(sub->filter & TIPC_SUB_PORTS)) return; - subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); + tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, + node); } -static void subscr_timeout(unsigned long data) +static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; struct tipc_subscriber *subscriber = sub->subscriber; - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - /* The spin lock per subscriber is used to protect its members */ - spin_lock_bh(&subscriber->lock); + /* Notify subscriber of timeout */ + tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); - /* Validate timeout (in case subscription is being cancelled) */ - if (sub->timeout == TIPC_WAIT_FOREVER) { - spin_unlock_bh(&subscriber->lock); - return; - } + spin_lock_bh(&subscriber->lock); + tipc_subscrp_delete(sub); + spin_unlock_bh(&subscriber->lock); - /* Unlink subscription from name table */ - tipc_nametbl_unsubscribe(sub); + tipc_subscrb_put(subscriber); +} - /* Unlink subscription from subscriber */ - list_del(&sub->subscription_list); +static void tipc_subscrb_kref_release(struct kref *kref) +{ + struct tipc_subscriber *subcriber = container_of(kref, + struct tipc_subscriber, kref); - spin_unlock_bh(&subscriber->lock); + kfree(subcriber); +} - /* Notify subscriber of timeout */ - subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, - TIPC_SUBSCR_TIMEOUT, 0, 0); +static void tipc_subscrb_put(struct tipc_subscriber *subscriber) +{ + kref_put(&subscriber->kref, tipc_subscrb_kref_release); +} - /* Now destroy subscription */ - kfree(sub); - atomic_dec(&tn->subscription_count); +static void tipc_subscrb_get(struct tipc_subscriber *subscriber) +{ + kref_get(&subscriber->kref); } -/** - * subscr_del - delete a subscription within a subscription list - * - * Called with subscriber lock held. - */ -static void subscr_del(struct tipc_subscription *sub) +static struct tipc_subscriber *tipc_subscrb_create(int conid) { - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber; - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscription_list); - kfree(sub); - atomic_dec(&tn->subscription_count); + subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC); + if (!subscriber) { + pr_warn("Subscriber rejected, no memory\n"); + return NULL; + } + kref_init(&subscriber->kref); + INIT_LIST_HEAD(&subscriber->subscrp_list); + subscriber->conid = conid; + spin_lock_init(&subscriber->lock); + + return subscriber; } -static void subscr_release(struct tipc_subscriber *subscriber) +static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) { - struct tipc_subscription *sub; - struct tipc_subscription *sub_temp; + struct tipc_subscription *sub, *temp; spin_lock_bh(&subscriber->lock); - /* Destroy any existing subscriptions for subscriber */ - list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, - subscription_list) { - if (sub->timeout != TIPC_WAIT_FOREVER) { - spin_unlock_bh(&subscriber->lock); - del_timer_sync(&sub->timer); - spin_lock_bh(&subscriber->lock); + list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, + subscrp_list) { + if (del_timer(&sub->timer)) { + tipc_subscrp_delete(sub); + tipc_subscrb_put(subscriber); } - subscr_del(sub); } spin_unlock_bh(&subscriber->lock); - /* Now destroy subscriber */ - kfree(subscriber); + tipc_subscrb_put(subscriber); } -/** - * subscr_cancel - handle subscription cancellation request - * - * Called with subscriber lock held. Routine must temporarily release lock - * to enable the subscription timeout routine to finish without deadlocking; - * the lock is then reclaimed to allow caller to release it upon return. - * - * Note that fields of 's' use subscriber's endianness! - */ -static void subscr_cancel(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) +static void tipc_subscrp_delete(struct tipc_subscription *sub) { - struct tipc_subscription *sub; - struct tipc_subscription *sub_temp; - int found = 0; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + kfree(sub); + atomic_dec(&tn->subscription_count); +} +static void tipc_subscrp_cancel(struct tipc_subscr *s, + struct tipc_subscriber *subscriber) +{ + struct tipc_subscription *sub, *temp; + + spin_lock_bh(&subscriber->lock); /* Find first matching subscription, exit if not found */ - list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, - subscription_list) { + list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, + subscrp_list) { if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - found = 1; + if (del_timer(&sub->timer)) { + tipc_subscrp_delete(sub); + tipc_subscrb_put(subscriber); + } break; } } - if (!found) - return; - - /* Cancel subscription timer (if used), then delete subscription */ - if (sub->timeout != TIPC_WAIT_FOREVER) { - sub->timeout = TIPC_WAIT_FOREVER; - spin_unlock_bh(&subscriber->lock); - del_timer_sync(&sub->timer); - spin_lock_bh(&subscriber->lock); - } - subscr_del(sub); + spin_unlock_bh(&subscriber->lock); } -/** - * subscr_subscribe - create subscription for subscriber - * - * Called with subscriber lock held. - */ -static int subscr_subscribe(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, - struct tipc_subscription **sub_p) +static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, + struct tipc_subscriber *subscriber, + struct tipc_subscription **sub_p) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub; @@ -241,7 +230,7 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s, /* Detect & process a subscription cancellation request */ if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - subscr_cancel(s, subscriber); + tipc_subscrp_cancel(s, subscriber); return 0; } @@ -273,62 +262,51 @@ static int subscr_subscribe(struct net *net, struct tipc_subscr *s, kfree(sub); return -EINVAL; } - list_add(&sub->subscription_list, &subscriber->subscription_list); + spin_lock_bh(&subscriber->lock); + list_add(&sub->subscrp_list, &subscriber->subscrp_list); + spin_unlock_bh(&subscriber->lock); sub->subscriber = subscriber; sub->swap = swap; - memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); + memcpy(&sub->evt.s, s, sizeof(*s)); atomic_inc(&tn->subscription_count); - if (sub->timeout != TIPC_WAIT_FOREVER) { - setup_timer(&sub->timer, subscr_timeout, (unsigned long)sub); - mod_timer(&sub->timer, jiffies + sub->timeout); - } + setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); + if (sub->timeout != TIPC_WAIT_FOREVER) + sub->timeout += jiffies; + if (!mod_timer(&sub->timer, sub->timeout)) + tipc_subscrb_get(subscriber); *sub_p = sub; return 0; } /* Handle one termination request for the subscriber */ -static void subscr_conn_shutdown_event(int conid, void *usr_data) +static void tipc_subscrb_shutdown_cb(int conid, void *usr_data) { - subscr_release((struct tipc_subscriber *)usr_data); + tipc_subscrb_delete((struct tipc_subscriber *)usr_data); } /* Handle one request to create a new subscription for the subscriber */ -static void subscr_conn_msg_event(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len) +static void tipc_subscrb_rcv_cb(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; struct tipc_net *tn = net_generic(net, tipc_net_id); - spin_lock_bh(&subscriber->lock); - subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); + tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub); if (sub) tipc_nametbl_subscribe(sub); else tipc_conn_terminate(tn->topsrv, subscriber->conid); - spin_unlock_bh(&subscriber->lock); } /* Handle one request to establish a new subscriber */ -static void *subscr_named_msg_event(int conid) +static void *tipc_subscrb_connect_cb(int conid) { - struct tipc_subscriber *subscriber; - - /* Create subscriber object */ - subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); - if (subscriber == NULL) { - pr_warn("Subscriber rejected, no memory\n"); - return NULL; - } - INIT_LIST_HEAD(&subscriber->subscription_list); - subscriber->conid = conid; - spin_lock_init(&subscriber->lock); - - return (void *)subscriber; + return (void *)tipc_subscrb_create(conid); } -int tipc_subscr_start(struct net *net) +int tipc_topsrv_start(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); const char name[] = "topology_server"; @@ -355,9 +333,9 @@ int tipc_subscr_start(struct net *net) topsrv->imp = TIPC_CRITICAL_IMPORTANCE; topsrv->type = SOCK_SEQPACKET; topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); - topsrv->tipc_conn_recvmsg = subscr_conn_msg_event; - topsrv->tipc_conn_new = subscr_named_msg_event; - topsrv->tipc_conn_shutdown = subscr_conn_shutdown_event; + topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; + topsrv->tipc_conn_new = tipc_subscrb_connect_cb; + topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb; strncpy(topsrv->name, name, strlen(name) + 1); tn->topsrv = topsrv; @@ -366,7 +344,7 @@ int tipc_subscr_start(struct net *net) return tipc_server_start(topsrv); } -void tipc_subscr_stop(struct net *net) +void tipc_topsrv_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_server *topsrv = tn->topsrv; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 33488bd9fe3c..92ee18cc5fe6 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -54,7 +54,7 @@ struct tipc_subscriber; * @filter: event filtering to be done for subscription * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list - * @subscription_list: adjacent subscriptions in subscriber's subscription list + * @subscrp_list: adjacent subscriptions in subscriber's subscription list * @server_ref: object reference of server port associated with subscription * @swap: indicates if subscriber uses opposite endianness in its messages * @evt: template for events generated by subscription @@ -67,17 +67,17 @@ struct tipc_subscription { u32 filter; struct timer_list timer; struct list_head nameseq_list; - struct list_head subscription_list; + struct list_head subscrp_list; int swap; struct tipc_event evt; }; -int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper); -void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, - u32 found_upper, u32 event, u32 port_ref, - u32 node, int must); -int tipc_subscr_start(struct net *net); -void tipc_subscr_stop(struct net *net); +int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper); +void tipc_subscrp_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, u32 event, + u32 port_ref, u32 node, int must); +int tipc_topsrv_start(struct net *net); +void tipc_topsrv_stop(struct net *net); #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5266ea7b922b..941b3d26e3bf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -620,7 +620,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock *unix_create1(struct net *net, struct socket *sock) +static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { struct sock *sk = NULL; struct unix_sock *u; @@ -629,7 +629,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); if (!sk) goto out; @@ -688,7 +688,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock) ? 0 : -ENOMEM; + return unix_create1(net, sock, kern) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1088,7 +1088,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL); + newsk = unix_create1(sock_net(sk), NULL, 0); if (newsk == NULL) goto out; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2ec86e652a19..df5fc6b340f1 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -581,13 +581,14 @@ struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, gfp_t priority, - unsigned short type) + unsigned short type, + int kern) { struct sock *sk; struct vsock_sock *psk; struct vsock_sock *vsk; - sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto); + sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern); if (!sk) return NULL; @@ -1866,7 +1867,7 @@ static int vsock_create(struct net *net, struct socket *sock, sock->state = SS_UNCONNECTED; - return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM; + return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM; } static const struct net_proto_family vsock_family_ops = { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c294da095461..1f63daff3965 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1022,7 +1022,7 @@ static int vmci_transport_recv_listen(struct sock *sk, } pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type); + sk->sk_type, 0); if (!pending) { vmci_transport_send_reset(sk, pkt); return -ENOMEM; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 7aaf7415dc4c..915b328b9ac5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -698,19 +698,20 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_chandef_usable); /* - * For GO only, check if the channel can be used under permissive conditions - * mandated by the some regulatory bodies, i.e., the channel is marked with - * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface + * Check if the channel can be used under permissive conditions mandated by + * some regulatory bodies, i.e., the channel is marked with + * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface * associated to an AP on the same channel or on the same UNII band * (assuming that the AP is an authorized master). - * In addition allow the GO to operate on a channel on which indoor operation is + * In addition allow operation on a channel on which indoor operation is * allowed, iff we are currently operating in an indoor environment. */ -static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, +static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, + enum nl80211_iftype iftype, struct ieee80211_channel *chan) { - struct wireless_dev *wdev_iter; - struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx); + struct wireless_dev *wdev; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_RTNL(); @@ -718,32 +719,48 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev, !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) return false; + /* only valid for GO and TDLS off-channel (station/p2p-CL) */ + if (iftype != NL80211_IFTYPE_P2P_GO && + iftype != NL80211_IFTYPE_STATION && + iftype != NL80211_IFTYPE_P2P_CLIENT) + return false; + if (regulatory_indoor_allowed() && (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) return true; - if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT)) + if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT)) return false; /* * Generally, it is possible to rely on another device/driver to allow - * the GO concurrent relaxation, however, since the device can further + * the IR concurrent relaxation, however, since the device can further * enforce the relaxation (by doing a similar verifications as this), * and thus fail the GO instantiation, consider only the interfaces of * the current registered device. */ - list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { struct ieee80211_channel *other_chan = NULL; int r1, r2; - if (wdev_iter->iftype != NL80211_IFTYPE_STATION || - !netif_running(wdev_iter->netdev)) - continue; - - wdev_lock(wdev_iter); - if (wdev_iter->current_bss) - other_chan = wdev_iter->current_bss->pub.channel; - wdev_unlock(wdev_iter); + wdev_lock(wdev); + if (wdev->iftype == NL80211_IFTYPE_STATION && + wdev->current_bss) + other_chan = wdev->current_bss->pub.channel; + + /* + * If a GO already operates on the same GO_CONCURRENT channel, + * this one (maybe the same one) can beacon as well. We allow + * the operation even if the station we relied on with + * GO_CONCURRENT is disconnected now. But then we must make sure + * we're not outdoor on an indoor-only channel. + */ + if (iftype == NL80211_IFTYPE_P2P_GO && + wdev->iftype == NL80211_IFTYPE_P2P_GO && + wdev->beacon_interval && + !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) + other_chan = wdev->chandef.chan; + wdev_unlock(wdev); if (!other_chan) continue; @@ -784,7 +801,6 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); bool res; u32 prohibited_flags = IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_RADAR; @@ -792,13 +808,12 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype); /* - * Under certain conditions suggested by the some regulatory bodies - * a GO can operate on channels marked with IEEE80211_NO_IR - * so set this flag only if such relaxations are not enabled and - * the conditions are not met. + * Under certain conditions suggested by some regulatory bodies a + * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag + * only if such relaxations are not enabled and the conditions are not + * met. */ - if (iftype != NL80211_IFTYPE_P2P_GO || - !cfg80211_go_permissive_chan(rdev, chandef->chan)) + if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan)) prohibited_flags |= IEEE80211_CHAN_NO_IR; if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 && diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dd78445c7d50..c264effd00a6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -639,8 +639,8 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT)) + if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) @@ -4061,7 +4061,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy, return -EINVAL; break; case CFG80211_STA_MESH_PEER_USER: - if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION && + params->plink_action != NL80211_PLINK_ACTION_BLOCK) return -EINVAL; break; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0e347f888fe9..d359e0610198 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -989,8 +989,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_OFDM; if (rd_flags & NL80211_RRF_NO_OUTDOOR) channel_flags |= IEEE80211_CHAN_INDOOR_ONLY; - if (rd_flags & NL80211_RRF_GO_CONCURRENT) - channel_flags |= IEEE80211_CHAN_GO_CONCURRENT; + if (rd_flags & NL80211_RRF_IR_CONCURRENT) + channel_flags |= IEEE80211_CHAN_IR_CONCURRENT; if (rd_flags & NL80211_RRF_NO_HT40MINUS) channel_flags |= IEEE80211_CHAN_NO_HT40MINUS; if (rd_flags & NL80211_RRF_NO_HT40PLUS) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index c3ab230e4493..a750f330b8dd 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -515,10 +515,10 @@ static struct proto x25_proto = { .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(struct net *net) +static struct sock *x25_alloc_socket(struct net *net, int kern) { struct x25_sock *x25; - struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, kern); if (!sk) goto out; @@ -553,7 +553,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol, goto out; rc = -ENOBUFS; - if ((sk = x25_alloc_socket(net)) == NULL) + if ((sk = x25_alloc_socket(net, kern)) == NULL) goto out; x25 = x25_sk(sk); @@ -602,7 +602,7 @@ static struct sock *x25_make_new(struct sock *osk) if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket(sock_net(osk))) == NULL) + if ((sk = x25_alloc_socket(sock_net(osk), 0)) == NULL) goto out; x25 = x25_sk(sk); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fbcedbe33190..68ada2ca4b60 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -38,6 +38,18 @@ static int xfrm_skb_check_space(struct sk_buff *skb) return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } +/* Children define the path of the packet through the + * Linux networking. Thus, destinations are stackable. + */ + +static struct dst_entry *skb_dst_pop(struct sk_buff *skb) +{ + struct dst_entry *child = dst_clone(skb_dst(skb)->child); + + skb_dst_drop(skb); + return child; +} + static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb_dst(skb); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 76e3458a5419..8fdbd73429dd 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -44,7 +44,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt # point this to your LLVM backend with bpf support LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc -%.o: %.c +$(obj)/%.o: $(src)/%.c clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ |