summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 19:28:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 19:28:34 -0700
commit51835949dda3783d4639cfa74ce13a3c9829de00 (patch)
tree2b593de5eba6ecc73f7c58fc65fdaffae45c7323 /net/ipv6
parent0434dbe32053d07d658165be681505120c6b1abc (diff)
parent77ae5e5b00720372af2860efdc4bc652ac682696 (diff)
Merge tag 'net-next-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextHEADmaster
Pull networking updates from Jakub Kicinski: "Not much excitement - a handful of large patchsets (devmem among them) did not make it in time. Core & protocols: - Use local_lock in addition to local_bh_disable() to protect per-CPU resources in networking, a step closer for local_bh_disable() not to act as a big lock on PREEMPT_RT - Use flex array for netdevice priv area, ensure its cache alignment - Add a sysctl knob to allow user to specify a default rto_min at socket init time. Bit of a big hammer but multiple companies were independently carrying such patch downstream so clearly it's useful - Support scheduling transmission of packets based on CLOCK_TAI - Un-pin TCP TIMEWAIT timer to avoid it firing on CPUs later cordoned off using cpusets - Support multiple L2TPv3 UDP tunnels using the same 5-tuple address - Allow configuration of multipath hash seed, to both allow synchronizing hashing of two routers, and preventing partial accidental sync - Improve TCP compliance with RFC 9293 for simultaneous connect() - Support sending NAT keepalives in IPsec ESP in UDP states. Userspace IKE daemon had to do this before, but the kernel can better keep track of it - Support sending supervision HSR frames with MAC addresses stored in ProxyNodeTable when RedBox (i.e. HSR-SAN) is enabled - Introduce IPPROTO_SMC for selecting SMC when socket is created - Allow UDP GSO transmit from devices with no checksum offload - openvswitch: add packet sampling via psample, separating the sampled traffic from "upcall" packets sent to user space for forwarding - nf_tables: shrink memory consumption for transaction objects Things we sprinkled into general kernel code: - Power Sequencing subsystem (used by Qualcomm Bluetooth driver for QCA6390) [ Already merged separately - Linus ] - Add IRQ information in sysfs for auxiliary bus - Introduce guard definition for local_lock - Add aligned flavor of __cacheline_group_{begin, end}() markings for grouping fields in structures BPF: - Notify user space (via epoll) when a struct_ops object is getting detached/unregistered - Add new kfuncs for a generic, open-coded bits iterator - Enable BPF programs to declare arrays of kptr, bpf_rb_root, and bpf_list_head - Support resilient split BTF which cuts down on duplication and makes BTF as compact as possible WRT BTF from modules - Add support for dumping kfunc prototypes from BTF which enables both detecting as well as dumping compilable prototypes for kfuncs - riscv64 BPF JIT improvements in particular to add 12-argument support for BPF trampolines and to utilize bpf_prog_pack for the latter - Add the capability to offload the netfilter flowtable in XDP layer through kfuncs Driver API: - Allow users to configure IRQ tresholds between which automatic IRQ moderation can choose - Expand Power Sourcing (PoE) status with power, class and failure reason. Support setting power limits - Track additional RSS contexts in the core, make sure configuration changes don't break them - Support IPsec crypto offload for IPv6 ESP and IPv4 UDP-encapsulated ESP data paths - Support updating firmware on SFP modules Tests and tooling: - mptcp: use net/lib.sh to manage netns - TCP-AO and TCP-MD5: replace debug prints used by tests with tracepoints - openvswitch: make test self-contained (don't depend on OvS CLI tools) Drivers: - Ethernet high-speed NICs: - Broadcom (bnxt): - increase the max total outstanding PTP TX packets to 4 - add timestamping statistics support - implement netdev_queue_mgmt_ops - support new RSS context API - Intel (100G, ice, idpf): - implement FEC statistics and dumping signal quality indicators - support E825C products (with 56Gbps PHYs) - nVidia/Mellanox: - support HW-GRO - mlx4/mlx5: support per-queue statistics via netlink - obey the max number of EQs setting in sub-functions - AMD/Solarflare: - support new RSS context API - AMD/Pensando: - ionic: rework fix for doorbell miss to lower overhead and skip it on new HW - Wangxun: - txgbe: support Flow Director perfect filters - Ethernet NICs consumer, embedded and virtual: - Add driver for Tehuti Networks TN40xx chips - Add driver for Meta's internal NIC chips - Add driver for Ethernet MAC on Airoha EN7581 SoCs - Add driver for Renesas Ethernet-TSN devices - Google cloud vNIC: - flow steering support - Microsoft vNIC: - support page sizes other than 4KB on ARM64 - vmware vNIC: - support latency measurement (update to version 9) - VirtIO net: - support for Byte Queue Limits - support configuring thresholds for automatic IRQ moderation - support for AF_XDP Rx zero-copy - Synopsys (stmmac): - support for STM32MP13 SoC - let platforms select the right PCS implementation - TI: - icssg-prueth: add multicast filtering support - icssg-prueth: enable PTP timestamping and PPS - Renesas: - ravb: improve Rx performance 30-400% by using page pool, theaded NAPI and timer-based IRQ coalescing - ravb: add MII support for R-Car V4M - Cadence (macb): - macb: add ARP support to Wake-On-LAN - Cortina: - use phylib for RX and TX pause configuration - Ethernet switches: - nVidia/Mellanox: - support configuration of multipath hash seed - report more accurate max MTU - use page_pool to improve Rx performance - MediaTek: - mt7530: add support for bridge port isolation - Qualcomm: - qca8k: add support for bridge port isolation - Microchip: - lan9371/2: add 100BaseTX PHY support - NXP: - vsc73xx: implement VLAN operations - Ethernet PHYs: - aquantia: enable support for aqr115c - aquantia: add support for PHY LEDs - realtek: add support for rtl8224 2.5Gbps PHY - xpcs: add memory-mapped device support - add BroadR-Reach link mode and support in Broadcom's PHY driver - CAN: - add document for ISO 15765-2 protocol support - mcp251xfd: workaround for erratum DS80000789E, use timestamps to catch when device returns incorrect FIFO status - WiFi: - mac80211/cfg80211: - parse Transmit Power Envelope (TPE) data in mac80211 instead of in drivers - improvements for 6 GHz regulatory flexibility - multi-link improvements - support multiple radios per wiphy - remove DEAUTH_NEED_MGD_TX_PREP flag - Intel (iwlwifi): - bump FW API to 91 for BZ/SC devices - report 64-bit radiotap timestamp - enable P2P low latency by default - handle Transmit Power Envelope (TPE) advertised by AP - remove support for older FW for new devices - fast resume (keeping the device configured) - mvm: re-enable Multi-Link Operation (MLO) - aggregation (A-MSDU) optimizations - MediaTek (mt76): - mt7925 Multi-Link Operation (MLO) support - Qualcomm (ath10k): - LED support for various chipsets - Qualcomm (ath12k): - remove unsupported Tx monitor handling - support channel 2 in 6 GHz band - support Spatial Multiplexing Power Save (SMPS) in 6 GHz band - supprt multiple BSSID (MBSSID) and Enhanced Multi-BSSID Advertisements (EMA) - support dynamic VLAN - add panic handler for resetting the firmware state - DebugFS support for datapath statistics - WCN7850: support for Wake on WLAN - Microchip (wilc1000): - read MAC address during probe to make it visible to user space - suspend/resume improvements - TI (wl18xx): - support newer firmware versions - RealTek (rtw89): - preparation for RTL8852BE-VT support - Wake on WLAN support for WiFi 6 chips - 36-bit PCI DMA support - RealTek (rtlwifi): - RTL8192DU support - Broadcom (brcmfmac): - Management Frame Protection support (to enable WPA3) - Bluetooth: - qualcomm: use the power sequencer for QCA6390 - btusb: mediatek: add ISO data transmission functions - hci_bcm4377: add BCM4388 support - btintel: add support for BlazarU core - btintel: add support for Whale Peak2 - btnxpuart: add support for AW693 A1 chipset - btnxpuart: add support for IW615 chipset - btusb: add Realtek RTL8852BE support ID 0x13d3:0x3591" * tag 'net-next-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1589 commits) eth: fbnic: Fix spelling mistake "tiggerring" -> "triggering" tcp: Replace strncpy() with strscpy() wifi: ath12k: fix build vs old compiler tcp: Don't access uninit tcp_rsk(req)->ao_keyid in tcp_create_openreq_child(). eth: fbnic: Write the TCAM tables used for RSS control and Rx to host eth: fbnic: Add L2 address programming eth: fbnic: Add basic Rx handling eth: fbnic: Add basic Tx handling eth: fbnic: Add link detection eth: fbnic: Add initial messaging to notify FW of our presence eth: fbnic: Implement Rx queue alloc/start/stop/free eth: fbnic: Implement Tx queue alloc/start/stop/free eth: fbnic: Allocate a netdevice and napi vectors with queues eth: fbnic: Add FW communication mechanism eth: fbnic: Add message parsing for FW messages eth: fbnic: Add register init to set PCIe/Ethernet device config eth: fbnic: Allocate core device specific structures and devlink interface eth: fbnic: Add scaffolding for Meta's NIC driver PCI: Add Meta Platforms vendor ID net/sched: cls_flower: propagate tca[TCA_OPTIONS] to NL_REQ_ATTR_CHECK ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c11
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/esp6_offload.c7
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/raw.c10
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c27
-rw-r--r--net/ipv6/seg6.c33
-rw-r--r--net/ipv6/seg6_local.c22
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c34
-rw-r--r--net/ipv6/udp.c14
-rw-r--r--net/ipv6/xfrm6_policy.c7
19 files changed, 100 insertions, 102 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5c424a0e7232..55a0fd589fc8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -863,7 +863,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
}
}
-static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
+static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int newf)
{
struct net *net;
int old;
@@ -931,7 +931,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
}
}
-static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
+static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int newf)
{
struct net *net;
int old;
@@ -1873,7 +1873,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
master, &dst,
scores, hiscore_idx);
- if (scores[hiscore_idx].ifa)
+ if (scores[hiscore_idx].ifa &&
+ scores[hiscore_idx].scopedist >= 0)
goto out;
}
@@ -6378,7 +6379,7 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
}
}
-static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
+static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf)
{
struct net *net = (struct net *)table->extra2;
int old;
@@ -6669,7 +6670,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
}
static
-int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
+int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
{
struct net *net = (struct net *)ctl->extra2;
struct inet6_dev *idev;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8041dc181bd4..90d2c7e3f5e9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -509,7 +509,7 @@ void inet6_cleanup_sock(struct sock *sk)
/* Free tx options */
- opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ opt = unrcu_pointer(xchg(&np->opt, NULL));
if (opt) {
atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
txopt_put(opt);
@@ -1060,6 +1060,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
.ipv6_fragment = ip6_fragment,
.ipv6_dev_find = ipv6_dev_find,
+ .ip6_xmit = ip6_xmit,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 34a9a5b9ed00..3920e8aa1031 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -256,8 +256,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
#else
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
- kfree_skb(skb);
-
+ WARN_ON(1);
return -EOPNOTSUPP;
}
#endif
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 527b7caddbc6..919ebfabbe4e 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -83,6 +83,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
+
+ if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ /* non-offload path will record the error and audit log */
+ xfrm_state_put(x);
+ x = NULL;
+ }
+
if (!x)
goto out_reset;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 83e4f9855ae1..eb111d20615c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -987,7 +987,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 27d8725445e3..ab504d31f0cd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -859,7 +859,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
ktime_t tstamp = skb->tstamp;
@@ -955,7 +955,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -1016,7 +1016,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- skb_set_delivery_time(frag, tstamp, mono_delivery_time);
+ skb_set_delivery_time(frag, tstamp, tstamp_type);
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
+ fl6->flowi6_l3mdev,
&fl6->saddr);
rcu_read_unlock();
@@ -1924,7 +1925,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark;
- skb->tstamp = cork->base.transmit_time;
+ if (sk_is_tcp(sk))
+ skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
+ else
+ skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d4c28ec1bc51..cd342d5015c6 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,8 +111,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
- opt);
+ opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt)));
sk_dst_reset(sk);
return opt;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d914b23256ce..254b192c5705 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1936,7 +1936,7 @@ static struct notifier_block ndisc_netdev_notifier = {
};
#ifdef CONFIG_SYSCTL
-static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
+static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
const char *func, const char *dev_name)
{
static char warncomm[TASK_COMM_LEN];
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 5d989d803009..581ce055bf52 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -127,7 +127,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
@@ -193,7 +193,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -226,7 +226,7 @@ slow_path:
goto blackhole;
}
- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5e1b50c6a44d..6f0844c9315d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 2eedf255600b..608fa9d05b55 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -362,14 +362,14 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return NET_RX_DROP;
}
@@ -390,7 +390,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
@@ -415,7 +415,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet_test_bit(HDRINCL, sk)) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
@@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
skb_put(skb, length);
skb_reset_network_header(skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 327caca64257..a48be617a8ab 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8d72ca0b086d..c752e9ed20e6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -131,7 +131,6 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
struct uncached_list {
spinlock_t lock;
struct list_head head;
- struct list_head quarantine;
};
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
@@ -189,8 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
handled = true;
}
if (handled)
- list_move(&rt->dst.rt_uncached,
- &ul->quarantine);
+ list_del_init(&rt->dst.rt_uncached);
}
spin_unlock_bh(&ul->lock);
}
@@ -368,7 +366,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- from = xchg((__force struct fib6_info **)&rt->from, NULL);
+ from = unrcu_pointer(xchg(&rt->from, NULL));
fib6_info_release(from);
}
@@ -1440,7 +1438,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
if (res->f6i->fib6_destroying) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
@@ -1469,7 +1467,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
+ from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL));
fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
@@ -2376,7 +2374,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net,
hash_keys.ports.dst = keys.ports.dst;
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_inner(const struct net *net,
@@ -2425,7 +2423,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
hash_keys.ports.dst = keys.ports.dst;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_skb(const struct net *net,
@@ -2464,7 +2462,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl6->fl6_dport;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
/* if skb is set it will be used and fl6 can be NULL */
@@ -2486,7 +2484,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 1:
if (skb) {
@@ -2518,7 +2516,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2555,7 +2553,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 3:
if (skb)
@@ -3764,7 +3762,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
- rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
+ rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len,
extack);
if (IS_ERR(rt->fib6_metrics)) {
err = PTR_ERR(rt->fib6_metrics);
@@ -5689,7 +5687,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
} else if (dest) {
struct in6_addr saddr_buf;
- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
+ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 &&
nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
goto nla_put_failure;
}
@@ -6758,7 +6756,6 @@ int __init ip6_route_init(void)
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head);
- INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock);
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index a31521e270f7..180da19c148c 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -21,9 +21,7 @@
#include <net/genetlink.h>
#include <linux/seg6.h>
#include <linux/seg6_genl.h>
-#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
-#endif
bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced)
{
@@ -437,13 +435,11 @@ static int __net_init seg6_net_init(struct net *net)
net->ipv6.seg6_data = sdata;
-#ifdef CONFIG_IPV6_SEG6_HMAC
if (seg6_hmac_net_init(net)) {
kfree(rcu_dereference_raw(sdata->tun_src));
kfree(sdata);
return -ENOMEM;
}
-#endif
return 0;
}
@@ -452,9 +448,7 @@ static void __net_exit seg6_net_exit(struct net *net)
{
struct seg6_pernet_data *sdata = seg6_pernet(net);
-#ifdef CONFIG_IPV6_SEG6_HMAC
seg6_hmac_net_exit(net);
-#endif
kfree(rcu_dereference_raw(sdata->tun_src));
kfree(sdata);
@@ -520,41 +514,28 @@ int __init seg6_init(void)
if (err)
goto out_unregister_pernet;
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
goto out_unregister_genl;
err = seg6_local_init();
- if (err) {
- seg6_iptunnel_exit();
- goto out_unregister_genl;
- }
-#endif
+ if (err)
+ goto out_unregister_iptun;
-#ifdef CONFIG_IPV6_SEG6_HMAC
err = seg6_hmac_init();
if (err)
- goto out_unregister_iptun;
-#endif
+ goto out_unregister_seg6;
pr_info("Segment Routing with IPv6\n");
out:
return err;
-#ifdef CONFIG_IPV6_SEG6_HMAC
-out_unregister_iptun:
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+out_unregister_seg6:
seg6_local_exit();
+out_unregister_iptun:
seg6_iptunnel_exit();
-#endif
-#endif
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
out_unregister_genl:
-#endif
-#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC)
genl_unregister_family(&seg6_genl_family);
-#endif
out_unregister_pernet:
unregister_pernet_subsys(&ip6_segments_ops);
goto out;
@@ -562,13 +543,9 @@ out_unregister_pernet:
void seg6_exit(void)
{
-#ifdef CONFIG_IPV6_SEG6_HMAC
seg6_hmac_exit();
-#endif
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
seg6_local_exit();
seg6_iptunnel_exit();
-#endif
genl_unregister_family(&seg6_genl_family);
unregister_pernet_subsys(&ip6_segments_ops);
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c434940131b1..c74705ead984 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -1380,7 +1380,9 @@ drop:
return err;
}
-DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
{
@@ -1388,6 +1390,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
+ lockdep_assert_held(&srh_state->bh_lock);
if (unlikely(srh == NULL))
return false;
@@ -1408,8 +1411,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
static int input_action_end_bpf(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- struct seg6_bpf_srh_state *srh_state =
- this_cpu_ptr(&seg6_bpf_srh_states);
+ struct seg6_bpf_srh_state *srh_state;
struct ipv6_sr_hdr *srh;
int ret;
@@ -1420,10 +1422,14 @@ static int input_action_end_bpf(struct sk_buff *skb,
}
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- /* preempt_disable is needed to protect the per-CPU buffer srh_state,
- * which is also accessed by the bpf_lwt_seg6_* helpers
+ /* The access to the per-CPU buffer srh_state is protected by running
+ * always in softirq context (with disabled BH). On PREEMPT_RT the
+ * required locking is provided by the following local_lock_nested_bh()
+ * statement. It is also accessed by the bpf_lwt_seg6_* helpers via
+ * bpf_prog_run_save_cb().
*/
- preempt_disable();
+ local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock);
+ srh_state = this_cpu_ptr(&seg6_bpf_srh_states);
srh_state->srh = srh;
srh_state->hdrlen = srh->hdrlen << 3;
srh_state->valid = true;
@@ -1446,15 +1452,15 @@ static int input_action_end_bpf(struct sk_buff *skb,
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
goto drop;
+ local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
- preempt_enable();
if (ret != BPF_REDIRECT)
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
drop:
- preempt_enable();
+ local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bfad1e89b6a6..9d83eadd308b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -275,6 +275,6 @@ out:
out_free:
reqsk_free(req);
out_drop:
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return NULL;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 729faf8bd366..200fea92f12f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -490,14 +490,10 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
- if (!sock_owned_by_user(sk)) {
- WRITE_ONCE(sk->sk_err, err);
- sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
-
- tcp_done(sk);
- } else {
+ if (!sock_owned_by_user(sk))
+ tcp_done_with_error(sk, err);
+ else
WRITE_ONCE(sk->sk_err_soft, err);
- }
goto out;
case TCP_LISTEN:
break;
@@ -975,7 +971,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
mark = inet_twsk(sk)->tw_mark;
else
mark = READ_ONCE(sk->sk_mark);
- skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
+ skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
}
if (txhash) {
/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
@@ -1200,9 +1196,9 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
- tw->tw_txhash);
+ READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
+ &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
+ tw->tw_priority, tw->tw_txhash);
#ifdef CONFIG_TCP_AO
out:
@@ -1678,7 +1674,7 @@ reset:
discard:
if (opt_skb)
__kfree_skb(opt_skb);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
csum_err:
reason = SKB_DROP_REASON_TCP_CSUM;
@@ -1751,8 +1747,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
int dif = inet6_iif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
+ struct sock *sk = NULL;
bool refcounted;
- struct sock *sk;
int ret;
u32 isn;
struct net *net = dev_net(skb->dev);
@@ -1944,7 +1940,7 @@ bad_packet:
discard_it:
SKB_DR_OR(drop_reason, NOT_SPECIFIED);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
discard_and_relse:
@@ -2383,8 +2379,14 @@ static struct inet_protosw tcpv6_protosw = {
static int __net_init tcpv6_net_init(struct net *net)
{
- return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
- SOCK_RAW, IPPROTO_TCP, net);
+ int res;
+
+ res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_TCP, net);
+ if (!res)
+ net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
+
+ return res;
}
static void __net_exit tcpv6_net_exit(struct net *net)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c81a07ac0463..6602a2e9cdb5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,7 +46,6 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
-#include <trace/events/udp.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -673,7 +672,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
trace_udp_fail_queue_rcv_skb(rc, sk, skb);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -776,7 +775,7 @@ csum_error:
drop:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -940,8 +939,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
+ struct sock *sk = NULL;
struct udphdr *uh;
- struct sock *sk;
bool refcounted;
u32 ulen = 0;
@@ -1033,7 +1032,7 @@ no_sk:
__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
short_packet:
@@ -1054,7 +1053,7 @@ csum_error:
__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
__UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
}
@@ -1257,8 +1256,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb))) {
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
kfree_skb(skb);
return -EIO;
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 2f1ea5f999a2..b1d81c4270ab 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -290,8 +290,14 @@ int __init xfrm6_init(void)
ret = register_pernet_subsys(&xfrm6_net_ops);
if (ret)
goto out_protocol;
+
+ ret = xfrm_nat_keepalive_init(AF_INET6);
+ if (ret)
+ goto out_nat_keepalive;
out:
return ret;
+out_nat_keepalive:
+ unregister_pernet_subsys(&xfrm6_net_ops);
out_protocol:
xfrm6_protocol_fini();
out_state:
@@ -303,6 +309,7 @@ out_policy:
void xfrm6_fini(void)
{
+ xfrm_nat_keepalive_fini(AF_INET6);
unregister_pernet_subsys(&xfrm6_net_ops);
xfrm6_protocol_fini();
xfrm6_policy_fini();