summaryrefslogtreecommitdiff
path: root/net/l2tp/l2tp_core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 19:28:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-16 19:28:34 -0700
commit51835949dda3783d4639cfa74ce13a3c9829de00 (patch)
tree2b593de5eba6ecc73f7c58fc65fdaffae45c7323 /net/l2tp/l2tp_core.c
parent0434dbe32053d07d658165be681505120c6b1abc (diff)
parent77ae5e5b00720372af2860efdc4bc652ac682696 (diff)
Merge tag 'net-next-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextHEADmaster
Pull networking updates from Jakub Kicinski: "Not much excitement - a handful of large patchsets (devmem among them) did not make it in time. Core & protocols: - Use local_lock in addition to local_bh_disable() to protect per-CPU resources in networking, a step closer for local_bh_disable() not to act as a big lock on PREEMPT_RT - Use flex array for netdevice priv area, ensure its cache alignment - Add a sysctl knob to allow user to specify a default rto_min at socket init time. Bit of a big hammer but multiple companies were independently carrying such patch downstream so clearly it's useful - Support scheduling transmission of packets based on CLOCK_TAI - Un-pin TCP TIMEWAIT timer to avoid it firing on CPUs later cordoned off using cpusets - Support multiple L2TPv3 UDP tunnels using the same 5-tuple address - Allow configuration of multipath hash seed, to both allow synchronizing hashing of two routers, and preventing partial accidental sync - Improve TCP compliance with RFC 9293 for simultaneous connect() - Support sending NAT keepalives in IPsec ESP in UDP states. Userspace IKE daemon had to do this before, but the kernel can better keep track of it - Support sending supervision HSR frames with MAC addresses stored in ProxyNodeTable when RedBox (i.e. HSR-SAN) is enabled - Introduce IPPROTO_SMC for selecting SMC when socket is created - Allow UDP GSO transmit from devices with no checksum offload - openvswitch: add packet sampling via psample, separating the sampled traffic from "upcall" packets sent to user space for forwarding - nf_tables: shrink memory consumption for transaction objects Things we sprinkled into general kernel code: - Power Sequencing subsystem (used by Qualcomm Bluetooth driver for QCA6390) [ Already merged separately - Linus ] - Add IRQ information in sysfs for auxiliary bus - Introduce guard definition for local_lock - Add aligned flavor of __cacheline_group_{begin, end}() markings for grouping fields in structures BPF: - Notify user space (via epoll) when a struct_ops object is getting detached/unregistered - Add new kfuncs for a generic, open-coded bits iterator - Enable BPF programs to declare arrays of kptr, bpf_rb_root, and bpf_list_head - Support resilient split BTF which cuts down on duplication and makes BTF as compact as possible WRT BTF from modules - Add support for dumping kfunc prototypes from BTF which enables both detecting as well as dumping compilable prototypes for kfuncs - riscv64 BPF JIT improvements in particular to add 12-argument support for BPF trampolines and to utilize bpf_prog_pack for the latter - Add the capability to offload the netfilter flowtable in XDP layer through kfuncs Driver API: - Allow users to configure IRQ tresholds between which automatic IRQ moderation can choose - Expand Power Sourcing (PoE) status with power, class and failure reason. Support setting power limits - Track additional RSS contexts in the core, make sure configuration changes don't break them - Support IPsec crypto offload for IPv6 ESP and IPv4 UDP-encapsulated ESP data paths - Support updating firmware on SFP modules Tests and tooling: - mptcp: use net/lib.sh to manage netns - TCP-AO and TCP-MD5: replace debug prints used by tests with tracepoints - openvswitch: make test self-contained (don't depend on OvS CLI tools) Drivers: - Ethernet high-speed NICs: - Broadcom (bnxt): - increase the max total outstanding PTP TX packets to 4 - add timestamping statistics support - implement netdev_queue_mgmt_ops - support new RSS context API - Intel (100G, ice, idpf): - implement FEC statistics and dumping signal quality indicators - support E825C products (with 56Gbps PHYs) - nVidia/Mellanox: - support HW-GRO - mlx4/mlx5: support per-queue statistics via netlink - obey the max number of EQs setting in sub-functions - AMD/Solarflare: - support new RSS context API - AMD/Pensando: - ionic: rework fix for doorbell miss to lower overhead and skip it on new HW - Wangxun: - txgbe: support Flow Director perfect filters - Ethernet NICs consumer, embedded and virtual: - Add driver for Tehuti Networks TN40xx chips - Add driver for Meta's internal NIC chips - Add driver for Ethernet MAC on Airoha EN7581 SoCs - Add driver for Renesas Ethernet-TSN devices - Google cloud vNIC: - flow steering support - Microsoft vNIC: - support page sizes other than 4KB on ARM64 - vmware vNIC: - support latency measurement (update to version 9) - VirtIO net: - support for Byte Queue Limits - support configuring thresholds for automatic IRQ moderation - support for AF_XDP Rx zero-copy - Synopsys (stmmac): - support for STM32MP13 SoC - let platforms select the right PCS implementation - TI: - icssg-prueth: add multicast filtering support - icssg-prueth: enable PTP timestamping and PPS - Renesas: - ravb: improve Rx performance 30-400% by using page pool, theaded NAPI and timer-based IRQ coalescing - ravb: add MII support for R-Car V4M - Cadence (macb): - macb: add ARP support to Wake-On-LAN - Cortina: - use phylib for RX and TX pause configuration - Ethernet switches: - nVidia/Mellanox: - support configuration of multipath hash seed - report more accurate max MTU - use page_pool to improve Rx performance - MediaTek: - mt7530: add support for bridge port isolation - Qualcomm: - qca8k: add support for bridge port isolation - Microchip: - lan9371/2: add 100BaseTX PHY support - NXP: - vsc73xx: implement VLAN operations - Ethernet PHYs: - aquantia: enable support for aqr115c - aquantia: add support for PHY LEDs - realtek: add support for rtl8224 2.5Gbps PHY - xpcs: add memory-mapped device support - add BroadR-Reach link mode and support in Broadcom's PHY driver - CAN: - add document for ISO 15765-2 protocol support - mcp251xfd: workaround for erratum DS80000789E, use timestamps to catch when device returns incorrect FIFO status - WiFi: - mac80211/cfg80211: - parse Transmit Power Envelope (TPE) data in mac80211 instead of in drivers - improvements for 6 GHz regulatory flexibility - multi-link improvements - support multiple radios per wiphy - remove DEAUTH_NEED_MGD_TX_PREP flag - Intel (iwlwifi): - bump FW API to 91 for BZ/SC devices - report 64-bit radiotap timestamp - enable P2P low latency by default - handle Transmit Power Envelope (TPE) advertised by AP - remove support for older FW for new devices - fast resume (keeping the device configured) - mvm: re-enable Multi-Link Operation (MLO) - aggregation (A-MSDU) optimizations - MediaTek (mt76): - mt7925 Multi-Link Operation (MLO) support - Qualcomm (ath10k): - LED support for various chipsets - Qualcomm (ath12k): - remove unsupported Tx monitor handling - support channel 2 in 6 GHz band - support Spatial Multiplexing Power Save (SMPS) in 6 GHz band - supprt multiple BSSID (MBSSID) and Enhanced Multi-BSSID Advertisements (EMA) - support dynamic VLAN - add panic handler for resetting the firmware state - DebugFS support for datapath statistics - WCN7850: support for Wake on WLAN - Microchip (wilc1000): - read MAC address during probe to make it visible to user space - suspend/resume improvements - TI (wl18xx): - support newer firmware versions - RealTek (rtw89): - preparation for RTL8852BE-VT support - Wake on WLAN support for WiFi 6 chips - 36-bit PCI DMA support - RealTek (rtlwifi): - RTL8192DU support - Broadcom (brcmfmac): - Management Frame Protection support (to enable WPA3) - Bluetooth: - qualcomm: use the power sequencer for QCA6390 - btusb: mediatek: add ISO data transmission functions - hci_bcm4377: add BCM4388 support - btintel: add support for BlazarU core - btintel: add support for Whale Peak2 - btnxpuart: add support for AW693 A1 chipset - btnxpuart: add support for IW615 chipset - btusb: add Realtek RTL8852BE support ID 0x13d3:0x3591" * tag 'net-next-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1589 commits) eth: fbnic: Fix spelling mistake "tiggerring" -> "triggering" tcp: Replace strncpy() with strscpy() wifi: ath12k: fix build vs old compiler tcp: Don't access uninit tcp_rsk(req)->ao_keyid in tcp_create_openreq_child(). eth: fbnic: Write the TCAM tables used for RSS control and Rx to host eth: fbnic: Add L2 address programming eth: fbnic: Add basic Rx handling eth: fbnic: Add basic Tx handling eth: fbnic: Add link detection eth: fbnic: Add initial messaging to notify FW of our presence eth: fbnic: Implement Rx queue alloc/start/stop/free eth: fbnic: Implement Tx queue alloc/start/stop/free eth: fbnic: Allocate a netdevice and napi vectors with queues eth: fbnic: Add FW communication mechanism eth: fbnic: Add message parsing for FW messages eth: fbnic: Add register init to set PCIe/Ethernet device config eth: fbnic: Allocate core device specific structures and devlink interface eth: fbnic: Add scaffolding for Meta's NIC driver PCI: Add Meta Platforms vendor ID net/sched: cls_flower: propagate tca[TCA_OPTIONS] to NL_REQ_ATTR_CHECK ...
Diffstat (limited to 'net/l2tp/l2tp_core.c')
-rw-r--r--net/l2tp/l2tp_core.c507
1 files changed, 277 insertions, 230 deletions
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 88a34db265d8..1c1decce7f06 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -39,7 +39,6 @@
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/l2tp.h>
-#include <linux/hash.h>
#include <linux/sort.h>
#include <linux/file.h>
#include <linux/nsproxy.h>
@@ -61,7 +60,6 @@
#include <linux/atomic.h>
#include "l2tp_core.h"
-#include "trace.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -107,11 +105,23 @@ struct l2tp_net {
/* Lock for write access to l2tp_tunnel_idr */
spinlock_t l2tp_tunnel_idr_lock;
struct idr l2tp_tunnel_idr;
- struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
- /* Lock for write access to l2tp_session_hlist */
- spinlock_t l2tp_session_hlist_lock;
+ /* Lock for write access to l2tp_v[23]_session_idr/htable */
+ spinlock_t l2tp_session_idr_lock;
+ struct idr l2tp_v2_session_idr;
+ struct idr l2tp_v3_session_idr;
+ struct hlist_head l2tp_v3_session_htable[16];
};
+static inline u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
+{
+ return ((u32)tunnel_id) << 16 | session_id;
+}
+
+static inline unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
+{
+ return ((unsigned long)sk) + session_id;
+}
+
#if IS_ENABLED(CONFIG_IPV6)
static bool l2tp_sk_is_v6(struct sock *sk)
{
@@ -125,29 +135,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net)
return net_generic(net, l2tp_net_id);
}
-/* Session hash global list for L2TPv3.
- * The session_id SHOULD be random according to RFC3931, but several
- * L2TP implementations use incrementing session_ids. So we do a real
- * hash on the session_id, rather than a simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
-{
- return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
-}
-
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids. So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
-{
- return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
-}
-
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
trace_free_tunnel(tunnel);
@@ -240,66 +227,82 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth);
-struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel,
- u32 session_id)
+struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id)
{
- struct hlist_head *session_list;
+ const struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_session *session;
- session_list = l2tp_session_id_hash(tunnel, session_id);
-
rcu_read_lock_bh();
- hlist_for_each_entry_rcu(session, session_list, hlist)
- if (session->session_id == session_id) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
+ session = idr_find(&pn->l2tp_v3_session_idr, session_id);
+ if (session && !hash_hashed(&session->hlist) &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
- return session;
+ /* If we get here and session is non-NULL, the session_id
+ * collides with one in another tunnel. If sk is non-NULL,
+ * find the session matching sk.
+ */
+ if (session && sk) {
+ unsigned long key = l2tp_v3_session_hashkey(sk, session->session_id);
+
+ hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
+ hlist, key) {
+ if (session->tunnel->sock == sk &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
}
+ }
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_get_session);
+EXPORT_SYMBOL_GPL(l2tp_v3_session_get);
-struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id)
+struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id)
{
- struct hlist_head *session_list;
+ u32 session_key = l2tp_v2_session_key(tunnel_id, session_id);
+ const struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_session *session;
- session_list = l2tp_session_id_hash_2(l2tp_pernet(net), session_id);
-
rcu_read_lock_bh();
- hlist_for_each_entry_rcu(session, session_list, global_hlist)
- if (session->session_id == session_id) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
-
- return session;
- }
+ session = idr_find(&pn->l2tp_v2_session_idr, session_key);
+ if (session && refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
rcu_read_unlock_bh();
return NULL;
}
+EXPORT_SYMBOL_GPL(l2tp_v2_session_get);
+
+struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, u32 session_id)
+{
+ if (pver == L2TP_HDR_VER_2)
+ return l2tp_v2_session_get(net, tunnel_id, session_id);
+ else
+ return l2tp_v3_session_get(net, sk, session_id);
+}
EXPORT_SYMBOL_GPL(l2tp_session_get);
struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
{
- int hash;
struct l2tp_session *session;
int count = 0;
rcu_read_lock_bh();
- for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
- hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) {
- if (++count > nth) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
- return session;
- }
+ list_for_each_entry_rcu(session, &tunnel->session_list, list) {
+ if (++count > nth) {
+ l2tp_session_inc_refcount(session);
+ rcu_read_unlock_bh();
+ return session;
}
}
-
rcu_read_unlock_bh();
return NULL;
@@ -313,86 +316,188 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname)
{
struct l2tp_net *pn = l2tp_pernet(net);
- int hash;
+ unsigned long tunnel_id, tmp;
struct l2tp_session *session;
+ struct l2tp_tunnel *tunnel;
rcu_read_lock_bh();
- for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
- hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
- if (!strcmp(session->ifname, ifname)) {
- l2tp_session_inc_refcount(session);
- rcu_read_unlock_bh();
-
- return session;
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel) {
+ list_for_each_entry_rcu(session, &tunnel->session_list, list) {
+ if (!strcmp(session->ifname, ifname)) {
+ l2tp_session_inc_refcount(session);
+ rcu_read_unlock_bh();
+
+ return session;
+ }
}
}
}
-
rcu_read_unlock_bh();
return NULL;
}
EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
+static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist,
+ struct l2tp_session *session)
+{
+ l2tp_session_inc_refcount(session);
+ WARN_ON_ONCE(session->coll_list);
+ session->coll_list = clist;
+ spin_lock(&clist->lock);
+ list_add(&session->clist, &clist->list);
+ spin_unlock(&clist->lock);
+}
+
+static int l2tp_session_collision_add(struct l2tp_net *pn,
+ struct l2tp_session *session1,
+ struct l2tp_session *session2)
+{
+ struct l2tp_session_coll_list *clist;
+
+ lockdep_assert_held(&pn->l2tp_session_idr_lock);
+
+ if (!session2)
+ return -EEXIST;
+
+ /* If existing session is in IP-encap tunnel, refuse new session */
+ if (session2->tunnel->encap == L2TP_ENCAPTYPE_IP)
+ return -EEXIST;
+
+ clist = session2->coll_list;
+ if (!clist) {
+ /* First collision. Allocate list to manage the collided sessions
+ * and add the existing session to the list.
+ */
+ clist = kmalloc(sizeof(*clist), GFP_ATOMIC);
+ if (!clist)
+ return -ENOMEM;
+
+ spin_lock_init(&clist->lock);
+ INIT_LIST_HEAD(&clist->list);
+ refcount_set(&clist->ref_count, 1);
+ l2tp_session_coll_list_add(clist, session2);
+ }
+
+ /* If existing session isn't already in the session hlist, add it. */
+ if (!hash_hashed(&session2->hlist))
+ hash_add(pn->l2tp_v3_session_htable, &session2->hlist,
+ session2->hlist_key);
+
+ /* Add new session to the hlist and collision list */
+ hash_add(pn->l2tp_v3_session_htable, &session1->hlist,
+ session1->hlist_key);
+ refcount_inc(&clist->ref_count);
+ l2tp_session_coll_list_add(clist, session1);
+
+ return 0;
+}
+
+static void l2tp_session_collision_del(struct l2tp_net *pn,
+ struct l2tp_session *session)
+{
+ struct l2tp_session_coll_list *clist = session->coll_list;
+ unsigned long session_key = session->session_id;
+ struct l2tp_session *session2;
+
+ lockdep_assert_held(&pn->l2tp_session_idr_lock);
+
+ hash_del(&session->hlist);
+
+ if (clist) {
+ /* Remove session from its collision list. If there
+ * are other sessions with the same ID, replace this
+ * session's IDR entry with that session, otherwise
+ * remove the IDR entry. If this is the last session,
+ * the collision list data is freed.
+ */
+ spin_lock(&clist->lock);
+ list_del_init(&session->clist);
+ session2 = list_first_entry_or_null(&clist->list, struct l2tp_session, clist);
+ if (session2) {
+ void *old = idr_replace(&pn->l2tp_v3_session_idr, session2, session_key);
+
+ WARN_ON_ONCE(IS_ERR_VALUE(old));
+ } else {
+ void *removed = idr_remove(&pn->l2tp_v3_session_idr, session_key);
+
+ WARN_ON_ONCE(removed != session);
+ }
+ session->coll_list = NULL;
+ spin_unlock(&clist->lock);
+ if (refcount_dec_and_test(&clist->ref_count))
+ kfree(clist);
+ l2tp_session_dec_refcount(session);
+ }
+}
+
int l2tp_session_register(struct l2tp_session *session,
struct l2tp_tunnel *tunnel)
{
- struct l2tp_session *session_walk;
- struct hlist_head *g_head;
- struct hlist_head *head;
- struct l2tp_net *pn;
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+ struct l2tp_session *other_session = NULL;
+ u32 session_key;
int err;
- head = l2tp_session_id_hash(tunnel, session->session_id);
-
- spin_lock_bh(&tunnel->hlist_lock);
+ spin_lock_bh(&tunnel->list_lock);
if (!tunnel->acpt_newsess) {
err = -ENODEV;
goto err_tlock;
}
- hlist_for_each_entry(session_walk, head, hlist)
- if (session_walk->session_id == session->session_id) {
- err = -EEXIST;
- goto err_tlock;
- }
-
if (tunnel->version == L2TP_HDR_VER_3) {
- pn = l2tp_pernet(tunnel->l2tp_net);
- g_head = l2tp_session_id_hash_2(pn, session->session_id);
-
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
-
+ session_key = session->session_id;
+ spin_lock_bh(&pn->l2tp_session_idr_lock);
+ err = idr_alloc_u32(&pn->l2tp_v3_session_idr, NULL,
+ &session_key, session_key, GFP_ATOMIC);
/* IP encap expects session IDs to be globally unique, while
- * UDP encap doesn't.
+ * UDP encap doesn't. This isn't per the RFC, which says that
+ * sessions are identified only by the session ID, but is to
+ * support existing userspace which depends on it.
*/
- hlist_for_each_entry(session_walk, g_head, global_hlist)
- if (session_walk->session_id == session->session_id &&
- (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP ||
- tunnel->encap == L2TP_ENCAPTYPE_IP)) {
- err = -EEXIST;
- goto err_tlock_pnlock;
- }
+ if (err == -ENOSPC && tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ other_session = idr_find(&pn->l2tp_v3_session_idr,
+ session_key);
+ err = l2tp_session_collision_add(pn, session,
+ other_session);
+ }
+ spin_unlock_bh(&pn->l2tp_session_idr_lock);
+ } else {
+ session_key = l2tp_v2_session_key(tunnel->tunnel_id,
+ session->session_id);
+ spin_lock_bh(&pn->l2tp_session_idr_lock);
+ err = idr_alloc_u32(&pn->l2tp_v2_session_idr, NULL,
+ &session_key, session_key, GFP_ATOMIC);
+ spin_unlock_bh(&pn->l2tp_session_idr_lock);
+ }
- l2tp_tunnel_inc_refcount(tunnel);
- hlist_add_head_rcu(&session->global_hlist, g_head);
+ if (err) {
+ if (err == -ENOSPC)
+ err = -EEXIST;
+ goto err_tlock;
+ }
+
+ l2tp_tunnel_inc_refcount(tunnel);
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ list_add(&session->list, &tunnel->session_list);
+ spin_unlock_bh(&tunnel->list_lock);
+
+ spin_lock_bh(&pn->l2tp_session_idr_lock);
+ if (tunnel->version == L2TP_HDR_VER_3) {
+ if (!other_session)
+ idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
} else {
- l2tp_tunnel_inc_refcount(tunnel);
+ idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
}
-
- hlist_add_head_rcu(&session->hlist, head);
- spin_unlock_bh(&tunnel->hlist_lock);
+ spin_unlock_bh(&pn->l2tp_session_idr_lock);
trace_register_session(session);
return 0;
-err_tlock_pnlock:
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
err_tlock:
- spin_unlock_bh(&tunnel->hlist_lock);
+ spin_unlock_bh(&tunnel->list_lock);
return err;
}
@@ -785,19 +890,14 @@ static void l2tp_session_queue_purge(struct l2tp_session *session)
}
}
-/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded. All such packets are passed up to userspace to deal with.
- */
-static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
+/* UDP encapsulation receive handler. See net/ipv4/udp.c for details. */
+int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_session *session = NULL;
- struct l2tp_tunnel *orig_tunnel = tunnel;
+ struct l2tp_tunnel *tunnel = NULL;
+ struct net *net = sock_net(sk);
unsigned char *ptr, *optr;
u16 hdrflags;
- u32 tunnel_id, session_id;
u16 version;
int length;
@@ -807,11 +907,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
__skb_pull(skb, sizeof(struct udphdr));
/* Short packet? */
- if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
- pr_debug_ratelimited("%s: recv short packet (len=%d)\n",
- tunnel->name, skb->len);
- goto invalid;
- }
+ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX))
+ goto pass;
/* Point to L2TP header */
optr = skb->data;
@@ -834,6 +931,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
ptr += 2;
if (version == L2TP_HDR_VER_2) {
+ u16 tunnel_id, session_id;
+
/* If length is present, skip it */
if (hdrflags & L2TP_HDRFLAG_L)
ptr += 2;
@@ -841,49 +940,35 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Extract tunnel and session ID */
tunnel_id = ntohs(*(__be16 *)ptr);
ptr += 2;
-
- if (tunnel_id != tunnel->tunnel_id) {
- /* We are receiving trafic for another tunnel, probably
- * because we have several tunnels between the same
- * IP/port quadruple, look it up.
- */
- struct l2tp_tunnel *alt_tunnel;
-
- alt_tunnel = l2tp_tunnel_get(tunnel->l2tp_net, tunnel_id);
- if (!alt_tunnel)
- goto pass;
- tunnel = alt_tunnel;
- }
-
session_id = ntohs(*(__be16 *)ptr);
ptr += 2;
+
+ session = l2tp_v2_session_get(net, tunnel_id, session_id);
} else {
+ u32 session_id;
+
ptr += 2; /* skip reserved bits */
- tunnel_id = tunnel->tunnel_id;
session_id = ntohl(*(__be32 *)ptr);
ptr += 4;
- }
- /* Check protocol version */
- if (version != tunnel->version) {
- pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
- tunnel->name, version, tunnel->version);
- goto invalid;
+ session = l2tp_v3_session_get(net, sk, session_id);
}
- /* Find the session context */
- session = l2tp_tunnel_get_session(tunnel, session_id);
if (!session || !session->recv_skb) {
if (session)
l2tp_session_dec_refcount(session);
/* Not found? Pass to userspace to deal with */
- pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n",
- tunnel->name, tunnel_id, session_id);
goto pass;
}
- if (tunnel->version == L2TP_HDR_VER_3 &&
+ tunnel = session->tunnel;
+
+ /* Check protocol version */
+ if (version != tunnel->version)
+ goto invalid;
+
+ if (version == L2TP_HDR_VER_3 &&
l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
l2tp_session_dec_refcount(session);
goto invalid;
@@ -892,9 +977,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
- if (tunnel != orig_tunnel)
- l2tp_tunnel_dec_refcount(tunnel);
-
return 0;
invalid:
@@ -904,51 +986,14 @@ pass:
/* Put UDP header back */
__skb_push(skb, sizeof(struct udphdr));
- if (tunnel != orig_tunnel)
- l2tp_tunnel_dec_refcount(tunnel);
-
- return 1;
-}
-
-/* UDP encapsulation receive and error receive handlers.
- * See net/ipv4/udp.c for details.
- *
- * Note that these functions are called from inside an
- * RCU-protected region, but without the socket being locked.
- *
- * Hence we use rcu_dereference_sk_user_data to access the
- * tunnel data structure rather the usual l2tp_sk_to_tunnel
- * accessor function.
- */
-int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct l2tp_tunnel *tunnel;
-
- tunnel = rcu_dereference_sk_user_data(sk);
- if (!tunnel)
- goto pass_up;
- if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
- goto pass_up;
-
- if (l2tp_udp_recv_core(tunnel, skb))
- goto pass_up;
-
- return 0;
-
-pass_up:
return 1;
}
EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
+/* UDP encapsulation receive error handler. See net/ipv4/udp.c for details. */
static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
- struct l2tp_tunnel *tunnel;
-
- tunnel = rcu_dereference_sk_user_data(sk);
- if (!tunnel || tunnel->fd < 0)
- return;
-
sk->sk_err = err;
sk_error_report(sk);
@@ -1206,26 +1251,36 @@ end:
return;
}
-/* Remove an l2tp session from l2tp_core's hash lists. */
+/* Remove an l2tp session from l2tp_core's lists. */
static void l2tp_session_unhash(struct l2tp_session *session)
{
struct l2tp_tunnel *tunnel = session->tunnel;
- /* Remove the session from core hashes */
if (tunnel) {
- /* Remove from the per-tunnel hash */
- spin_lock_bh(&tunnel->hlist_lock);
- hlist_del_init_rcu(&session->hlist);
- spin_unlock_bh(&tunnel->hlist_lock);
-
- /* For L2TPv3 we have a per-net hash: remove from there, too */
- if (tunnel->version != L2TP_HDR_VER_2) {
- struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_del_init_rcu(&session->global_hlist);
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+ struct l2tp_session *removed = session;
+
+ /* Remove from the per-tunnel list */
+ spin_lock_bh(&tunnel->list_lock);
+ list_del_init(&session->list);
+ spin_unlock_bh(&tunnel->list_lock);
+
+ /* Remove from per-net IDR */
+ spin_lock_bh(&pn->l2tp_session_idr_lock);
+ if (tunnel->version == L2TP_HDR_VER_3) {
+ if (hash_hashed(&session->hlist))
+ l2tp_session_collision_del(pn, session);
+ else
+ removed = idr_remove(&pn->l2tp_v3_session_idr,
+ session->session_id);
+ } else {
+ u32 session_key = l2tp_v2_session_key(tunnel->tunnel_id,
+ session->session_id);
+ removed = idr_remove(&pn->l2tp_v2_session_idr,
+ session_key);
}
+ WARN_ON_ONCE(removed && removed != session);
+ spin_unlock_bh(&pn->l2tp_session_idr_lock);
synchronize_rcu();
}
@@ -1236,28 +1291,22 @@ static void l2tp_session_unhash(struct l2tp_session *session)
static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
{
struct l2tp_session *session;
- int hash;
- spin_lock_bh(&tunnel->hlist_lock);
+ spin_lock_bh(&tunnel->list_lock);
tunnel->acpt_newsess = false;
- for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
-again:
- hlist_for_each_entry_rcu(session, &tunnel->session_hlist[hash], hlist) {
- hlist_del_init_rcu(&session->hlist);
-
- spin_unlock_bh(&tunnel->hlist_lock);
- l2tp_session_delete(session);
- spin_lock_bh(&tunnel->hlist_lock);
-
- /* Now restart from the beginning of this hash
- * chain. We always remove a session from the
- * list so we are guaranteed to make forward
- * progress.
- */
- goto again;
- }
+ for (;;) {
+ session = list_first_entry_or_null(&tunnel->session_list,
+ struct l2tp_session, list);
+ if (!session)
+ break;
+ l2tp_session_inc_refcount(session);
+ list_del_init(&session->list);
+ spin_unlock_bh(&tunnel->list_lock);
+ l2tp_session_delete(session);
+ spin_lock_bh(&tunnel->list_lock);
+ l2tp_session_dec_refcount(session);
}
- spin_unlock_bh(&tunnel->hlist_lock);
+ spin_unlock_bh(&tunnel->list_lock);
}
/* Tunnel socket destroy hook for UDP encapsulation */
@@ -1451,8 +1500,9 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
- spin_lock_init(&tunnel->hlist_lock);
+ spin_lock_init(&tunnel->list_lock);
tunnel->acpt_newsess = true;
+ INIT_LIST_HEAD(&tunnel->session_list);
tunnel->encap = encap;
@@ -1462,8 +1512,6 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
/* Init delete workqueue struct */
INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
- INIT_LIST_HEAD(&tunnel->list);
-
err = 0;
err:
if (tunnelp)
@@ -1651,8 +1699,10 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
skb_queue_head_init(&session->reorder_q);
+ session->hlist_key = l2tp_v3_session_hashkey(tunnel->sock, session->session_id);
INIT_HLIST_NODE(&session->hlist);
- INIT_HLIST_NODE(&session->global_hlist);
+ INIT_LIST_HEAD(&session->clist);
+ INIT_LIST_HEAD(&session->list);
if (cfg) {
session->pwtype = cfg->pw_type;
@@ -1685,15 +1735,13 @@ EXPORT_SYMBOL_GPL(l2tp_session_create);
static __net_init int l2tp_init_net(struct net *net)
{
struct l2tp_net *pn = net_generic(net, l2tp_net_id);
- int hash;
idr_init(&pn->l2tp_tunnel_idr);
spin_lock_init(&pn->l2tp_tunnel_idr_lock);
- for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
- INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
-
- spin_lock_init(&pn->l2tp_session_hlist_lock);
+ idr_init(&pn->l2tp_v2_session_idr);
+ idr_init(&pn->l2tp_v3_session_idr);
+ spin_lock_init(&pn->l2tp_session_idr_lock);
return 0;
}
@@ -1703,7 +1751,6 @@ static __net_exit void l2tp_exit_net(struct net *net)
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
unsigned long tunnel_id, tmp;
- int hash;
rcu_read_lock_bh();
idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
@@ -1716,8 +1763,8 @@ static __net_exit void l2tp_exit_net(struct net *net)
flush_workqueue(l2tp_wq);
rcu_barrier();
- for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
- WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
+ idr_destroy(&pn->l2tp_v2_session_idr);
+ idr_destroy(&pn->l2tp_v3_session_idr);
idr_destroy(&pn->l2tp_tunnel_idr);
}