From a36e185e8c85523413c1ae3e03a0bdde5501f403 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 8 Nov 2018 12:19:14 +0100 Subject: udp: Handle ICMP errors for tunnels with same destination port on both endpoints For both IPv4 and IPv6, if we can't match errors to a socket, try tunnels before ignoring them. Look up a socket with the original source and destination ports as found in the UDP packet inside the ICMP payload, this will work for tunnels that force the same destination port for both endpoints, i.e. VXLAN and GENEVE. Actually, lwtunnels could break this assumption if they are configured by an external control plane to have different destination ports on the endpoints: in this case, we won't be able to trace ICMP messages back to them. For IPv6 redirect messages, call ip6_redirect() directly with the output interface argument set to the interface we received the packet from (as it's the very interface we should build the exception on), otherwise the new nexthop will be rejected. There's no such need for IPv4. Tunnels can now export an encap_err_lookup() operation that indicates a match. Pass the packet to the lookup function, and if the tunnel driver reports a matching association, continue with regular ICMP error handling. v2: - Added newline between network and transport header sets in __udp{4,6}_lib_err_encap() (David Miller) - Removed redundant skb_reset_network_header(skb); in __udp4_lib_err_encap() - Removed redundant reassignment of iph in __udp4_lib_err_encap() (Sabrina Dubroca) - Edited comment to __udp{4,6}_lib_err_encap() to reflect the fact this won't work with lwtunnels configured to use asymmetric ports. By the way, it's VXLAN, not VxLAN (Jiri Benc) Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/udp.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 11 deletions(-) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c55698d19d68..1216c920f945 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -462,6 +462,61 @@ csum_copy_err: goto try_again; } +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +void udpv6_encap_enable(void) +{ + static_branch_enable(&udpv6_encap_needed_key); +} +EXPORT_SYMBOL(udpv6_encap_enable); + +/* Try to match ICMP errors to UDP tunnels by looking up a socket without + * reversing source and destination port: this will match tunnels that force the + * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that + * lwtunnels might actually break this assumption by being configured with + * different destination ports on endpoints, in this case we won't be able to + * trace ICMP messages back to them. + * + * Then ask the tunnel implementation to match the error against a valid + * association. + * + * Return the socket if we have a match. + */ +static struct sock *__udp6_lib_err_encap(struct net *net, + const struct ipv6hdr *hdr, int offset, + struct udphdr *uh, + struct udp_table *udptable, + struct sk_buff *skb) +{ + int (*lookup)(struct sock *sk, struct sk_buff *skb); + int network_offset, transport_offset; + struct udp_sock *up; + struct sock *sk; + + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, + &hdr->saddr, uh->dest, + inet6_iif(skb), 0, udptable, skb); + if (!sk) + return NULL; + + network_offset = skb_network_offset(skb); + transport_offset = skb_transport_offset(skb); + + /* Network header needs to point to the outer IPv6 header inside ICMP */ + skb_reset_network_header(skb); + + /* Transport header needs to point to the UDP header */ + skb_set_transport_header(skb, offset); + + up = udp_sk(sk); + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + + skb_set_transport_header(skb, transport_offset); + skb_set_network_header(skb, network_offset); + return sk; +} + void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info, struct udp_table *udptable) @@ -471,6 +526,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); + bool tunnel = false; struct sock *sk; int harderr; int err; @@ -479,9 +535,18 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; + /* No socket for error: try tunnels before discarding */ + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + sk = __udp6_lib_err_encap(net, hdr, offset, uh, + udptable, skb); + } + + if (!sk) { + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); + return; + } + tunnel = true; } harderr = icmpv6_err_convert(type, code, &err); @@ -495,10 +560,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, harderr = 1; } if (type == NDISC_REDIRECT) { - ip6_sk_redirect(skb, sk); + if (tunnel) { + ip6_redirect(skb, sock_net(sk), inet6_iif(skb), + sk->sk_mark, sk->sk_uid); + } else { + ip6_sk_redirect(skb, sk); + } goto out; } + /* Tunnels don't have an application socket: don't pass errors back */ + if (tunnel) + goto out; + if (!np->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; @@ -547,13 +621,6 @@ static __inline__ void udpv6_err(struct sk_buff *skb, __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -void udpv6_encap_enable(void) -{ - static_branch_enable(&udpv6_encap_needed_key); -} -EXPORT_SYMBOL(udpv6_encap_enable); - static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); -- cgit v1.2.3-58-ga151