diff options
author | David S. Miller <davem@davemloft.net> | 2020-01-21 12:18:20 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-01-21 12:18:20 +0100 |
commit | 4f2c17e0f3324b3b82a3e0985245aefd6dcc5495 (patch) | |
tree | 997228d030d9f3c168f3e2d4d55246cbbc55026e /net/ipv4 | |
parent | d84b99ff69c14a03d76e7fdf9c04895848e6aece (diff) | |
parent | e27cca96cd68fa2c6814c90f9a1cfd36bb68c593 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next
Steffen Klassert says:
====================
pull request (net-next): ipsec-next 2020-01-21
1) Add support for TCP encapsulation of IKE and ESP messages,
as defined by RFC 8229. Patchset from Sabrina Dubroca.
Please note that there is a merge conflict in:
net/unix/af_unix.c
between commit:
3c32da19a858 ("unix: Show number of pending scm files of receive queue in fdinfo")
from the net-next tree and commit:
b50b0580d27b ("net: add queue argument to __skb_wait_for_more_packets and __skb_{,try_}recv_datagram")
from the ipsec-next tree.
The conflict can be solved as done in linux-next.
Please pull or let me know if there are problems.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 11 | ||||
-rw-r--r-- | net/ipv4/esp4.c | 264 | ||||
-rw-r--r-- | net/ipv4/udp.c | 3 | ||||
-rw-r--r-- | net/ipv4/xfrm4_protocol.c | 9 |
4 files changed, 261 insertions, 26 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index fc816b187170..f96bd489b362 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -378,6 +378,17 @@ config INET_ESP_OFFLOAD If unsure, say N. +config INET_ESPINTCP + bool "IP: ESP in TCP encapsulation (RFC 8229)" + depends on XFRM && INET_ESP + select STREAM_PARSER + select NET_SOCK_MSG + help + Support for RFC 8229 encapsulation of ESP and IKE over + TCP/IPv4 sockets. + + If unsure, say N. + config INET_IPCOMP tristate "IP: IPComp transformation" select INET_XFRM_TUNNEL diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 5c967764041f..103c7d599a3c 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -18,6 +18,8 @@ #include <net/icmp.h> #include <net/protocol.h> #include <net/udp.h> +#include <net/tcp.h> +#include <net/espintcp.h> #include <linux/highmem.h> @@ -117,6 +119,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) put_page(sg_page(sg)); } +#ifdef CONFIG_INET_ESPINTCP +struct esp_tcp_sk { + struct sock *sk; + struct rcu_head rcu; +}; + +static void esp_free_tcp_sk(struct rcu_head *head) +{ + struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); + + sock_put(esk->sk); + kfree(esk); +} + +static struct sock *esp_find_tcp_sk(struct xfrm_state *x) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct esp_tcp_sk *esk; + __be16 sport, dport; + struct sock *nsk; + struct sock *sk; + + sk = rcu_dereference(x->encap_sk); + if (sk && sk->sk_state == TCP_ESTABLISHED) + return sk; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (sk && sk == nsk) { + esk = kmalloc(sizeof(*esk), GFP_ATOMIC); + if (!esk) { + spin_unlock_bh(&x->lock); + return ERR_PTR(-ENOMEM); + } + RCU_INIT_POINTER(x->encap_sk, NULL); + esk->sk = sk; + call_rcu(&esk->rcu, esp_free_tcp_sk); + } + spin_unlock_bh(&x->lock); + + sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4, + dport, x->props.saddr.a4, sport, 0); + if (!sk) + return ERR_PTR(-ENOENT); + + if (!tcp_is_ulp_esp(sk)) { + sock_put(sk); + return ERR_PTR(-EINVAL); + } + + spin_lock_bh(&x->lock); + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (encap->encap_sport != sport || + encap->encap_dport != dport) { + sock_put(sk); + sk = nsk ?: ERR_PTR(-EREMCHG); + } else if (sk == nsk) { + sock_put(sk); + } else { + rcu_assign_pointer(x->encap_sk, sk); + } + spin_unlock_bh(&x->lock); + + return sk; +} + +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) +{ + struct sock *sk; + int err; + + rcu_read_lock(); + + sk = esp_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); + if (err) + goto out; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + err = espintcp_queue_out(sk, skb); + else + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + +out: + rcu_read_unlock(); + return err; +} + +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + return esp_output_tcp_finish(x, skb); +} + +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + local_bh_disable(); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + local_bh_enable(); + + /* EINPROGRESS just happens to do the right thing. It + * actually means that the skb has been consumed and + * isn't coming back. + */ + return err ?: -EINPROGRESS; +} +#else +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + kfree_skb(skb); + + return -EOPNOTSUPP; +} +#endif + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -147,7 +275,11 @@ static void esp_output_done(struct crypto_async_request *base, int err) secpath_reset(skb); xfrm_dev_resume(skb); } else { - xfrm_output_resume(skb, err); + if (!err && + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + esp_output_tail_tcp(x, skb); + else + xfrm_output_resume(skb, err); } } @@ -225,45 +357,100 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) tail[plen - 1] = proto; } -static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, + int encap_type, + struct esp_info *esp, + __be16 sport, + __be16 dport) { - int encap_type; struct udphdr *uh; __be32 *udpdata32; - __be16 sport, dport; - struct xfrm_encap_tmpl *encap = x->encap; - struct ip_esp_hdr *esph = esp->esph; unsigned int len; - spin_lock_bh(&x->lock); - sport = encap->encap_sport; - dport = encap->encap_dport; - encap_type = encap->encap_type; - spin_unlock_bh(&x->lock); - len = skb->len + esp->tailen - skb_transport_offset(skb); - if (len + sizeof(struct iphdr) >= IP_MAX_MTU) - return -EMSGSIZE; + if (len + sizeof(struct iphdr) > IP_MAX_MTU) + return ERR_PTR(-EMSGSIZE); - uh = (struct udphdr *)esph; + uh = (struct udphdr *)esp->esph; uh->source = sport; uh->dest = dport; uh->len = htons(len); uh->check = 0; + *skb_mac_header(skb) = IPPROTO_UDP; + + if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { + udpdata32 = (__be32 *)(uh + 1); + udpdata32[0] = udpdata32[1] = 0; + return (struct ip_esp_hdr *)(udpdata32 + 2); + } + + return (struct ip_esp_hdr *)(uh + 1); +} + +#ifdef CONFIG_INET_ESPINTCP +static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + __be16 *lenp = (void *)esp->esph; + struct ip_esp_hdr *esph; + unsigned int len; + struct sock *sk; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > IP_MAX_MTU) + return ERR_PTR(-EMSGSIZE); + + rcu_read_lock(); + sk = esp_find_tcp_sk(x); + rcu_read_unlock(); + + if (IS_ERR(sk)) + return ERR_CAST(sk); + + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + + return esph; +} +#else +static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif + +static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, + struct esp_info *esp) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct ip_esp_hdr *esph; + __be16 sport, dport; + int encap_type; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); + switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: - esph = (struct ip_esp_hdr *)(uh + 1); - break; case UDP_ENCAP_ESPINUDP_NON_IKE: - udpdata32 = (__be32 *)(uh + 1); - udpdata32[0] = udpdata32[1] = 0; - esph = (struct ip_esp_hdr *)(udpdata32 + 2); + esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport); + break; + case TCP_ENCAP_ESPINTCP: + esph = esp_output_tcp_encap(x, skb, esp); break; } - *skb_mac_header(skb) = IPPROTO_UDP; + if (IS_ERR(esph)) + return PTR_ERR(esph); + esp->esph = esph; return 0; @@ -279,9 +466,9 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * struct sk_buff *trailer; int tailen = esp->tailen; - /* this is non-NULL only with UDP Encapsulation */ + /* this is non-NULL only with TCP/UDP Encapsulation */ if (x->encap) { - int err = esp_output_udp_encap(x, skb, esp); + int err = esp_output_encap(x, skb, esp); if (err < 0) return err; @@ -474,6 +661,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * if (sg != dsg) esp_ssg_unref(x, tmp); + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + err = esp_output_tail_tcp(x, skb); + error_free: kfree(tmp); error: @@ -600,7 +790,23 @@ int esp_input_done2(struct sk_buff *skb, int err) if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; + struct tcphdr *th = (void *)(skb_network_header(skb) + ihl); struct udphdr *uh = (void *)(skb_network_header(skb) + ihl); + __be16 source; + + switch (x->encap->encap_type) { + case TCP_ENCAP_ESPINTCP: + source = th->source; + break; + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + source = uh->source; + break; + default: + WARN_ON_ONCE(1); + err = -EINVAL; + goto out; + } /* * 1) if the NAT-T peer's IP or port changed then @@ -609,11 +815,11 @@ int esp_input_done2(struct sk_buff *skb, int err) * SRC ports. */ if (iph->saddr != x->props.saddr.a4 || - uh->source != encap->encap_sport) { + source != encap->encap_sport) { xfrm_address_t ipaddr; ipaddr.a4 = iph->saddr; - km_new_mapping(x, &ipaddr, uh->source); + km_new_mapping(x, &ipaddr, source); /* XXX: perhaps add an extra * policy check here, to see @@ -988,6 +1194,14 @@ static int esp_init_state(struct xfrm_state *x) case UDP_ENCAP_ESPINUDP_NON_IKE: x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); break; +#ifdef CONFIG_INET_ESPINTCP + case TCP_ENCAP_ESPINTCP: + /* only the length field, TCP encap is done by + * the socket + */ + x->props.header_len += 2; + break; +#endif } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 208da0917469..e4fd4408b775 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1708,7 +1708,8 @@ busy_check: /* sk_queue is empty, reader_queue may contain peeked packets */ } while (timeo && - !__skb_wait_for_more_packets(sk, &error, &timeo, + !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, + &error, &timeo, (struct sk_buff *)sk_queue)); *err = error; diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 8a4285712808..ea595c8549c7 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -72,6 +72,14 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, if (!head) goto out; + if (!skb_dst(skb)) { + const struct iphdr *iph = ip_hdr(skb); + + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) + goto drop; + } + for_each_protocol_rcu(*head, handler) if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) return ret; @@ -79,6 +87,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: kfree_skb(skb); return 0; } |