diff options
Diffstat (limited to 'net/ipv6/udp.c')
-rw-r--r-- | net/ipv6/udp.c | 320 |
1 files changed, 200 insertions, 120 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d2d97d07ef27..9cbf363172bd 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,6 +45,7 @@ #include <net/raw.h> #include <net/tcp_states.h> #include <net/ip6_checksum.h> +#include <net/ip6_tunnel.h> #include <net/xfrm.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> @@ -117,12 +118,16 @@ static int compute_score(struct sock *sk, struct net *net, { int score; struct inet_sock *inet; + bool dev_match; if (!net_eq(sock_net(sk), net) || udp_sk(sk)->udp_port_hash != hnum || sk->sk_family != PF_INET6) return -1; + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + score = 0; inet = inet_sk(sk); @@ -132,27 +137,16 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } - if (sk->sk_bound_dev_if || exact_dif) { - bool dev_match = (sk->sk_bound_dev_if == dif || - sk->sk_bound_dev_if == sdif); - - if (!dev_match) - return -1; - if (sk->sk_bound_dev_if) - score++; - } + dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); + if (!dev_match) + return -1; + score++; if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; @@ -200,66 +194,32 @@ struct sock *__udp6_lib_lookup(struct net *net, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { - struct sock *sk, *result; unsigned short hnum = ntohs(dport); - unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); - struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; + unsigned int hash2, slot2; + struct udp_hslot *hslot2; + struct sock *result; bool exact_dif = udp6_lib_exact_dif_match(net, skb); - int score, badness; - u32 hash = 0; - if (hslot->count > 10) { - hash2 = ipv6_portaddr_hash(net, daddr, hnum); + hash2 = ipv6_portaddr_hash(net, daddr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp6_lib_lookup2(net, saddr, sport, + daddr, hnum, dif, sdif, exact_dif, + hslot2, skb); + if (!result) { + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; - if (hslot->count < hslot2->count) - goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, sdif, exact_dif, - hslot2, skb); - if (!result) { - unsigned int old_slot2 = slot2; - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); - slot2 = hash2 & udptable->mask; - /* avoid searching the same slot again. */ - if (unlikely(slot2 == old_slot2)) - return result; - - hslot2 = &udptable->hash2[slot2]; - if (hslot->count < hslot2->count) - goto begin; - - result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, sdif, - exact_dif, hslot2, - skb); - } - if (unlikely(IS_ERR(result))) - return NULL; - return result; - } -begin: - result = NULL; - badness = -1; - sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, - sdif, exact_dif); - if (score > badness) { - if (sk->sk_reuseport) { - hash = udp6_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (unlikely(IS_ERR(result))) - return NULL; - if (result) - return result; - } - result = sk; - badness = score; - } + &in6addr_any, hnum, dif, sdif, + exact_dif, hslot2, + skb); } + if (unlikely(IS_ERR(result))) + return NULL; return result; } EXPORT_SYMBOL_GPL(__udp6_lib_lookup); @@ -329,6 +289,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; + struct udp_mib *mib; int is_udp4; if (flags & MSG_ERRQUEUE) @@ -352,6 +313,7 @@ try_again: msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); + mib = __UDPX_MIB(sk, is_udp4); /* * If checksum is needed at all, try to do it while copying the @@ -380,24 +342,13 @@ try_again: if (unlikely(err)) { if (!peeked) { atomic_inc(&sk->sk_drops); - if (is_udp4) - UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - is_udplite); - else - UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - is_udplite); + SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); return err; } - if (!peeked) { - if (is_udp4) - UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, - is_udplite); - else - UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, - is_udplite); - } + if (!peeked) + SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); sock_recv_ts_and_drops(msg, sk, skb); @@ -421,6 +372,9 @@ try_again: *addr_len = sizeof(*sin6); } + if (udp_sk(sk)->gro_enabled) + udp_cmsg_recv(msg, sk, skb); + if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); @@ -443,17 +397,8 @@ try_again: csum_copy_err: if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, udp_skb_destructor)) { - if (is_udp4) { - UDP_INC_STATS(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); - } else { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_CSUMERRORS, is_udplite); - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); - } + SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS); + SNMP_INC_STATS(mib, UDP_MIB_INERRORS); } kfree_skb(skb); @@ -463,15 +408,106 @@ csum_copy_err: goto try_again; } -void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info, - struct udp_table *udptable) +DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +void udpv6_encap_enable(void) +{ + static_branch_inc(&udpv6_encap_needed_key); +} +EXPORT_SYMBOL(udpv6_encap_enable); + +/* Handler for tunnels with arbitrary destination ports: no socket lookup, go + * through error handlers in encapsulations looking for a match. + */ +static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, + struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, u32 info) +{ + int i; + + for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) { + int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, u32 info); + + if (!ip6tun_encaps[i]) + continue; + handler = rcu_dereference(ip6tun_encaps[i]->err_handler); + if (handler && !handler(skb, opt, type, code, offset, info)) + return 0; + } + + return -ENOENT; +} + +/* Try to match ICMP errors to UDP tunnels by looking up a socket without + * reversing source and destination port: this will match tunnels that force the + * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that + * lwtunnels might actually break this assumption by being configured with + * different destination ports on endpoints, in this case we won't be able to + * trace ICMP messages back to them. + * + * If this doesn't match any socket, probe tunnels with arbitrary destination + * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port + * we've sent packets to won't necessarily match the local destination port. + * + * Then ask the tunnel implementation to match the error against a valid + * association. + * + * Return an error if we can't find a match, the socket if we need further + * processing, zero otherwise. + */ +static struct sock *__udp6_lib_err_encap(struct net *net, + const struct ipv6hdr *hdr, int offset, + struct udphdr *uh, + struct udp_table *udptable, + struct sk_buff *skb, + struct inet6_skb_parm *opt, + u8 type, u8 code, __be32 info) +{ + int network_offset, transport_offset; + struct sock *sk; + + network_offset = skb_network_offset(skb); + transport_offset = skb_transport_offset(skb); + + /* Network header needs to point to the outer IPv6 header inside ICMP */ + skb_reset_network_header(skb); + + /* Transport header needs to point to the UDP header */ + skb_set_transport_header(skb, offset); + + sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, + &hdr->saddr, uh->dest, + inet6_iif(skb), 0, udptable, skb); + if (sk) { + int (*lookup)(struct sock *sk, struct sk_buff *skb); + struct udp_sock *up = udp_sk(sk); + + lookup = READ_ONCE(up->encap_err_lookup); + if (!lookup || lookup(sk, skb)) + sk = NULL; + } + + if (!sk) { + sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code, + offset, info)); + } + + skb_set_transport_header(skb, transport_offset); + skb_set_network_header(skb, network_offset); + + return sk; +} + +int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info, + struct udp_table *udptable) { struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); + bool tunnel = false; struct sock *sk; int harderr; int err; @@ -480,9 +516,23 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; + /* No socket for error: try tunnels before discarding */ + sk = ERR_PTR(-ENOENT); + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + sk = __udp6_lib_err_encap(net, hdr, offset, uh, + udptable, skb, + opt, type, code, info); + if (!sk) + return 0; + } + + if (IS_ERR(sk)) { + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); + return PTR_ERR(sk); + } + + tunnel = true; } harderr = icmpv6_err_convert(type, code, &err); @@ -496,10 +546,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, harderr = 1; } if (type == NDISC_REDIRECT) { - ip6_sk_redirect(skb, sk); + if (tunnel) { + ip6_redirect(skb, sock_net(sk), inet6_iif(skb), + sk->sk_mark, sk->sk_uid); + } else { + ip6_sk_redirect(skb, sk); + } goto out; } + /* Tunnels don't have an application socket: don't pass errors back */ + if (tunnel) + goto out; + if (!np->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; @@ -510,7 +569,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk->sk_err = err; sk->sk_error_report(sk); out: - return; + return 0; } static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -541,21 +600,14 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static __inline__ void udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, u8 type, - u8 code, int offset, __be32 info) +static __inline__ int udpv6_err(struct sk_buff *skb, + struct inet6_skb_parm *opt, u8 type, + u8 code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); + return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -void udpv6_encap_enable(void) -{ - static_branch_enable(&udpv6_encap_needed_key); -} -EXPORT_SYMBOL(udpv6_encap_enable); - -static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -638,10 +690,32 @@ drop: return -1; } +static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *next, *segs; + int ret; + + if (likely(!udp_unexpected_gso(sk, skb))) + return udpv6_queue_rcv_one_skb(sk, skb); + + __skb_push(skb, -skb_mac_offset(skb)); + segs = udp_rcv_segment(sk, skb, false); + for (skb = segs; skb; skb = next) { + next = skb->next; + __skb_pull(skb, skb_transport_offset(skb)); + + ret = udpv6_queue_rcv_one_skb(sk, skb); + if (ret > 0) + ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, + true); + } + return 0; +} + static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, - int dif, unsigned short hnum) + int dif, int sdif, unsigned short hnum) { struct inet_sock *inet = inet_sk(sk); @@ -653,7 +727,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) || + !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) || (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; @@ -687,6 +761,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, unsigned int offset = offsetof(typeof(*sk), sk_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); int dif = inet6_iif(skb); + int sdif = inet6_sdif(skb); struct hlist_node *node; struct sk_buff *nskb; @@ -701,7 +776,8 @@ start_lookup: sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr, - uh->source, saddr, dif, hnum)) + uh->source, saddr, dif, sdif, + hnum)) continue; /* If zero checksum and no_check is not on for * the socket then skip it. @@ -1458,11 +1534,15 @@ void udpv6_destroy_sock(struct sock *sk) udp_v6_flush_pending_frames(sk); release_sock(sk); - if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { - void (*encap_destroy)(struct sock *sk); - encap_destroy = READ_ONCE(up->encap_destroy); - if (encap_destroy) - encap_destroy(sk); + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + if (up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = READ_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + if (up->encap_enabled) + static_branch_dec(&udpv6_encap_needed_key); } inet6_destroy_sock(sk); |