summaryrefslogtreecommitdiff
path: root/net/xfrm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 16:27:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 16:27:18 -0700
commitcb8e59cc87201af93dfbb6c3dccc8fcad72a09c2 (patch)
treea334db9022f89654b777bbce8c4c6632e65b9031 /net/xfrm
parent2e63f6ce7ed2c4ff83ba30ad9ccad422289a6c63 (diff)
parent065fcfd49763ec71ae345bb5c5a74f961031e70e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Allow setting bluetooth L2CAP modes via socket option, from Luiz Augusto von Dentz. 2) Add GSO partial support to igc, from Sasha Neftin. 3) Several cleanups and improvements to r8169 from Heiner Kallweit. 4) Add IF_OPER_TESTING link state and use it when ethtool triggers a device self-test. From Andrew Lunn. 5) Start moving away from custom driver versions, use the globally defined kernel version instead, from Leon Romanovsky. 6) Support GRO vis gro_cells in DSA layer, from Alexander Lobakin. 7) Allow hard IRQ deferral during NAPI, from Eric Dumazet. 8) Add sriov and vf support to hinic, from Luo bin. 9) Support Media Redundancy Protocol (MRP) in the bridging code, from Horatiu Vultur. 10) Support netmap in the nft_nat code, from Pablo Neira Ayuso. 11) Allow UDPv6 encapsulation of ESP in the ipsec code, from Sabrina Dubroca. Also add ipv6 support for espintcp. 12) Lots of ReST conversions of the networking documentation, from Mauro Carvalho Chehab. 13) Support configuration of ethtool rxnfc flows in bcmgenet driver, from Doug Berger. 14) Allow to dump cgroup id and filter by it in inet_diag code, from Dmitry Yakunin. 15) Add infrastructure to export netlink attribute policies to userspace, from Johannes Berg. 16) Several optimizations to sch_fq scheduler, from Eric Dumazet. 17) Fallback to the default qdisc if qdisc init fails because otherwise a packet scheduler init failure will make a device inoperative. From Jesper Dangaard Brouer. 18) Several RISCV bpf jit optimizations, from Luke Nelson. 19) Correct the return type of the ->ndo_start_xmit() method in several drivers, it's netdev_tx_t but many drivers were using 'int'. From Yunjian Wang. 20) Add an ethtool interface for PHY master/slave config, from Oleksij Rempel. 21) Add BPF iterators, from Yonghang Song. 22) Add cable test infrastructure, including ethool interfaces, from Andrew Lunn. Marvell PHY driver is the first to support this facility. 23) Remove zero-length arrays all over, from Gustavo A. R. Silva. 24) Calculate and maintain an explicit frame size in XDP, from Jesper Dangaard Brouer. 25) Add CAP_BPF, from Alexei Starovoitov. 26) Support terse dumps in the packet scheduler, from Vlad Buslov. 27) Support XDP_TX bulking in dpaa2 driver, from Ioana Ciornei. 28) Add devm_register_netdev(), from Bartosz Golaszewski. 29) Minimize qdisc resets, from Cong Wang. 30) Get rid of kernel_getsockopt and kernel_setsockopt in order to eliminate set_fs/get_fs calls. From Christoph Hellwig. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2517 commits) selftests: net: ip_defrag: ignore EPERM net_failover: fixed rollback in net_failover_open() Revert "tipc: Fix potential tipc_aead refcnt leak in tipc_crypto_rcv" Revert "tipc: Fix potential tipc_node refcnt leak in tipc_rcv" vmxnet3: allow rx flow hash ops only when rss is enabled hinic: add set_channels ethtool_ops support selftests/bpf: Add a default $(CXX) value tools/bpf: Don't use $(COMPILE.c) bpf, selftests: Use bpf_probe_read_kernel s390/bpf: Use bcr 0,%0 as tail call nop filler s390/bpf: Maintain 8-byte stack alignment selftests/bpf: Fix verifier test selftests/bpf: Fix sample_cnt shared between two threads bpf, selftests: Adapt cls_redirect to call csum_level helper bpf: Add csum_level helper for fixing up csum levels bpf: Fix up bpf_skb_adjust_room helper's skb csum setting sfc: add missing annotation for efx_ef10_try_update_nic_stats_vf() crypto/chtls: IPv6 support for inline TLS Crypto/chcr: Fixes a coccinile check error Crypto/chcr: Fixes compilations warnings ...
Diffstat (limited to 'net/xfrm')
-rw-r--r--net/xfrm/Kconfig3
-rw-r--r--net/xfrm/Makefile2
-rw-r--r--net/xfrm/espintcp.c56
-rw-r--r--net/xfrm/xfrm_inout.h32
-rw-r--r--net/xfrm/xfrm_input.c21
-rw-r--r--net/xfrm/xfrm_interface.c5
-rw-r--r--net/xfrm/xfrm_output.c129
7 files changed, 216 insertions, 32 deletions
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 6921a18201a0..b7fd9c838416 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -99,4 +99,7 @@ config NET_KEY_MIGRATE
If unsure, say N.
+config XFRM_ESPINTCP
+ bool
+
endif # INET
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 212a4fcb4a88..2d4bb4b9f75e 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
-obj-$(CONFIG_INET_ESPINTCP) += espintcp.o
+obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 5a0ff665b71a..100e29682b48 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -6,6 +6,9 @@
#include <net/espintcp.h>
#include <linux/skmsg.h>
#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6_stubs.h>
+#endif
static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
struct sock *sk)
@@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk)
rcu_read_lock();
skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
local_bh_disable();
- xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+ else
+#endif
+ xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
local_bh_enable();
rcu_read_unlock();
}
@@ -347,6 +355,9 @@ unlock:
static struct proto espintcp_prot __ro_after_init;
static struct proto_ops espintcp_ops __ro_after_init;
+static struct proto espintcp6_prot;
+static struct proto_ops espintcp6_ops;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
static void espintcp_data_ready(struct sock *sk)
{
@@ -385,10 +396,14 @@ static void espintcp_destruct(struct sock *sk)
bool tcp_is_ulp_esp(struct sock *sk)
{
- return sk->sk_prot == &espintcp_prot;
+ return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot;
}
EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
+static void build_protos(struct proto *espintcp_prot,
+ struct proto_ops *espintcp_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops);
static int espintcp_init_sk(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -416,8 +431,19 @@ static int espintcp_init_sk(struct sock *sk)
strp_check_rcv(&ctx->strp);
skb_queue_head_init(&ctx->ike_queue);
skb_queue_head_init(&ctx->out_queue);
- sk->sk_prot = &espintcp_prot;
- sk->sk_socket->ops = &espintcp_ops;
+
+ if (sk->sk_family == AF_INET) {
+ sk->sk_prot = &espintcp_prot;
+ sk->sk_socket->ops = &espintcp_ops;
+ } else {
+ mutex_lock(&tcpv6_prot_mutex);
+ if (!espintcp6_prot.recvmsg)
+ build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops);
+ mutex_unlock(&tcpv6_prot_mutex);
+
+ sk->sk_prot = &espintcp6_prot;
+ sk->sk_socket->ops = &espintcp6_ops;
+ }
ctx->saved_data_ready = sk->sk_data_ready;
ctx->saved_write_space = sk->sk_write_space;
ctx->saved_destruct = sk->sk_destruct;
@@ -491,6 +517,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock,
return mask;
}
+static void build_protos(struct proto *espintcp_prot,
+ struct proto_ops *espintcp_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops)
+{
+ memcpy(espintcp_prot, orig_prot, sizeof(struct proto));
+ memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops));
+ espintcp_prot->sendmsg = espintcp_sendmsg;
+ espintcp_prot->recvmsg = espintcp_recvmsg;
+ espintcp_prot->close = espintcp_close;
+ espintcp_prot->release_cb = espintcp_release;
+ espintcp_ops->poll = espintcp_poll;
+}
+
static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
.name = "espintcp",
.owner = THIS_MODULE,
@@ -499,13 +539,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
void __init espintcp_init(void)
{
- memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot));
- memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops));
- espintcp_prot.sendmsg = espintcp_sendmsg;
- espintcp_prot.recvmsg = espintcp_recvmsg;
- espintcp_prot.close = espintcp_close;
- espintcp_prot.release_cb = espintcp_release;
- espintcp_ops.poll = espintcp_poll;
+ build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops);
tcp_register_ulp(&espintcp_ulp);
}
diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h
index c7b0318938e2..efc5e6b2e87b 100644
--- a/net/xfrm/xfrm_inout.h
+++ b/net/xfrm/xfrm_inout.h
@@ -6,6 +6,38 @@
#ifndef XFRM_INOUT_H
#define XFRM_INOUT_H 1
+static inline void xfrm4_extract_header(struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+
+ XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+ XFRM_MODE_SKB_CB(skb)->id = iph->id;
+ XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
+ XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
+ XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
+ XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph);
+ memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
+ sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+}
+
+static inline void xfrm6_extract_header(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+ XFRM_MODE_SKB_CB(skb)->id = 0;
+ XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
+ XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
+ XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+ XFRM_MODE_SKB_CB(skb)->optlen = 0;
+ memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
+ sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+#else
+ WARN_ON_ONCE(1);
+#endif
+}
+
static inline void xfrm6_beet_make_header(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 8a202c44f89a..bd984ff17c2d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -353,17 +353,18 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
{
const struct xfrm_mode *inner_mode = &x->inner_mode;
- const struct xfrm_state_afinfo *afinfo;
- int err = -EAFNOSUPPORT;
-
- rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
- if (likely(afinfo))
- err = afinfo->extract_input(x, skb);
- rcu_read_unlock();
- if (err)
- return err;
+ switch (x->outer_mode.family) {
+ case AF_INET:
+ xfrm4_extract_header(skb);
+ break;
+ case AF_INET6:
+ xfrm6_extract_header(skb);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+ }
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 1e115cbf21d3..c407ecbc5d46 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -145,7 +145,6 @@ static int xfrmi_create(struct net_device *dev)
if (err < 0)
goto out;
- dev_hold(dev);
xfrmi_link(xfrmn, xi);
return 0;
@@ -175,7 +174,6 @@ static void xfrmi_dev_uninit(struct net_device *dev)
struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
xfrmi_unlink(xfrmn, xi);
- dev_put(dev);
}
static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
@@ -778,6 +776,7 @@ static struct pernet_operations xfrmi_net_ops = {
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -785,6 +784,7 @@ static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -792,6 +792,7 @@ static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 69c4900db817..e4c23f69f69f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -13,9 +13,15 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/dst.h>
+#include <net/icmp.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_route.h>
+#include <net/ipv6_stubs.h>
+#endif
+
#include "xfrm_inout.h"
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -565,6 +571,22 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
struct xfrm_state *x = skb_dst(skb)->xfrm;
int err;
+ switch (x->outer_mode.family) {
+ case AF_INET:
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+#ifdef CONFIG_NETFILTER
+ IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+#endif
+ break;
+ case AF_INET6:
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
+#ifdef CONFIG_NETFILTER
+ IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
+ break;
+ }
+
secpath_reset(skb);
if (xfrm_dev_offload_ok(skb, x)) {
@@ -611,11 +633,101 @@ out:
}
EXPORT_SYMBOL_GPL(xfrm_output);
+static int xfrm4_tunnel_check_size(struct sk_buff *skb)
+{
+ int mtu, ret = 0;
+
+ if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
+ goto out;
+
+ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
+ goto out;
+
+ mtu = dst_mtu(skb_dst(skb));
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) &&
+ !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
+ skb->protocol = htons(ETH_P_IP);
+
+ if (skb->sk)
+ xfrm_local_error(skb, mtu);
+ else
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
+ ret = -EMSGSIZE;
+ }
+out:
+ return ret;
+}
+
+static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ err = xfrm4_tunnel_check_size(skb);
+ if (err)
+ return err;
+
+ XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
+
+ xfrm4_extract_header(skb);
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+ int mtu, ret = 0;
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->ignore_df)
+ goto out;
+
+ mtu = dst_mtu(dst);
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) &&
+ !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
+ skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
+
+ if (xfrm6_local_dontfrag(skb->sk))
+ ipv6_stub->xfrm6_local_rxpmtu(skb, mtu);
+ else if (skb->sk)
+ xfrm_local_error(skb, mtu);
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ ret = -EMSGSIZE;
+ }
+out:
+ return ret;
+}
+#endif
+
+static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ int err;
+
+ err = xfrm6_tunnel_check_size(skb);
+ if (err)
+ return err;
+
+ XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
+
+ xfrm6_extract_header(skb);
+ return 0;
+#else
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+#endif
+}
+
static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
- const struct xfrm_state_afinfo *afinfo;
const struct xfrm_mode *inner_mode;
- int err = -EAFNOSUPPORT;
if (x->sel.family == AF_UNSPEC)
inner_mode = xfrm_ip2inner_mode(x,
@@ -626,13 +738,14 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
if (inner_mode == NULL)
return -EAFNOSUPPORT;
- rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
- if (likely(afinfo))
- err = afinfo->extract_output(x, skb);
- rcu_read_unlock();
+ switch (inner_mode->family) {
+ case AF_INET:
+ return xfrm4_extract_output(x, skb);
+ case AF_INET6:
+ return xfrm6_extract_output(x, skb);
+ }
- return err;
+ return -EAFNOSUPPORT;
}
void xfrm_local_error(struct sk_buff *skb, int mtu)