diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-08 21:40:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-08 21:40:54 -0400 |
commit | 35a9ad8af0bb0fa3525e6d0d20e32551d226f38e (patch) | |
tree | 15b4b33206818886d9cff371fd2163e073b70568 /net/ipv6 | |
parent | d5935b07da53f74726e2a65dd4281d0f2c70e5d4 (diff) | |
parent | 64b1f00a0830e1c53874067273a096b228d83d36 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Most notable changes in here:
1) By far the biggest accomplishment, thanks to a large range of
contributors, is the addition of multi-send for transmit. This is
the result of discussions back in Chicago, and the hard work of
several individuals.
Now, when the ->ndo_start_xmit() method of a driver sees
skb->xmit_more as true, it can choose to defer the doorbell
telling the driver to start processing the new TX queue entires.
skb->xmit_more means that the generic networking is guaranteed to
call the driver immediately with another SKB to send.
There is logic added to the qdisc layer to dequeue multiple
packets at a time, and the handling mis-predicted offloads in
software is now done with no locks held.
Finally, pktgen is extended to have a "burst" parameter that can
be used to test a multi-send implementation.
Several drivers have xmit_more support: i40e, igb, ixgbe, mlx4,
virtio_net
Adding support is almost trivial, so export more drivers to
support this optimization soon.
I want to thank, in no particular or implied order, Jesper
Dangaard Brouer, Eric Dumazet, Alexander Duyck, Tom Herbert, Jamal
Hadi Salim, John Fastabend, Florian Westphal, Daniel Borkmann,
David Tat, Hannes Frederic Sowa, and Rusty Russell.
2) PTP and timestamping support in bnx2x, from Michal Kalderon.
3) Allow adjusting the rx_copybreak threshold for a driver via
ethtool, and add rx_copybreak support to enic driver. From
Govindarajulu Varadarajan.
4) Significant enhancements to the generic PHY layer and the bcm7xxx
driver in particular (EEE support, auto power down, etc.) from
Florian Fainelli.
5) Allow raw buffers to be used for flow dissection, allowing drivers
to determine the optimal "linear pull" size for devices that DMA
into pools of pages. The objective is to get exactly the
necessary amount of headers into the linear SKB area pre-pulled,
but no more. The new interface drivers use is eth_get_headlen().
From WANG Cong, with driver conversions (several had their own
by-hand duplicated implementations) by Alexander Duyck and Eric
Dumazet.
6) Support checksumming more smoothly and efficiently for
encapsulations, and add "foo over UDP" facility. From Tom
Herbert.
7) Add Broadcom SF2 switch driver to DSA layer, from Florian
Fainelli.
8) eBPF now can load programs via a system call and has an extensive
testsuite. Alexei Starovoitov and Daniel Borkmann.
9) Major overhaul of the packet scheduler to use RCU in several major
areas such as the classifiers and rate estimators. From John
Fastabend.
10) Add driver for Intel FM10000 Ethernet Switch, from Alexander
Duyck.
11) Rearrange TCP_SKB_CB() to reduce cache line misses, from Eric
Dumazet.
12) Add Datacenter TCP congestion control algorithm support, From
Florian Westphal.
13) Reorganize sk_buff so that __copy_skb_header() is significantly
faster. From Eric Dumazet"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1558 commits)
netlabel: directly return netlbl_unlabel_genl_init()
net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers
net: description of dma_cookie cause make xmldocs warning
cxgb4: clean up a type issue
cxgb4: potential shift wrapping bug
i40e: skb->xmit_more support
net: fs_enet: Add NAPI TX
net: fs_enet: Remove non NAPI RX
r8169:add support for RTL8168EP
net_sched: copy exts->type in tcf_exts_change()
wimax: convert printk to pr_foo()
af_unix: remove 0 assignment on static
ipv6: Do not warn for informational ICMP messages, regardless of type.
Update Intel Ethernet Driver maintainers list
bridge: Save frag_max_size between PRE_ROUTING and POST_ROUTING
tipc: fix bug in multicast congestion handling
net: better IFF_XMIT_DST_RELEASE support
net/mlx4_en: remove NETDEV_TX_BUSY
3c59x: fix bad split of cpu_to_le32(pci_map_single())
net: bcmgenet: fix Tx ring priority programming
...
Diffstat (limited to 'net/ipv6')
56 files changed, 1506 insertions, 1042 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2fe68364bb20..2e8c06108ab9 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -45,3 +45,7 @@ obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o + +ifneq ($(CONFIG_IPV6),) +obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o +endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3e118dfddd02..725c763270a0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -180,7 +180,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, - .use_tempaddr = 0, + .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, .regen_max_retry = REGEN_MAX_RETRY, @@ -1105,8 +1105,8 @@ retry: spin_unlock_bh(&ifp->lock); regen_advance = idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + idev->cnf.dad_transmits * + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred @@ -1725,7 +1725,7 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) return; - ipv6_dev_ac_inc(ifp->idev->dev, &addr); + __ipv6_dev_ac_inc(ifp->idev, &addr); } /* caller must hold RTNL */ @@ -2844,6 +2844,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (dev->flags & IFF_SLAVE) break; + if (idev && idev->cnf.disable_ipv6) + break; + if (event == NETDEV_UP) { if (!addrconf_qdisc_ok(dev)) { /* device is not ready yet. */ @@ -3030,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct hlist_head *h = &inet6_addr_lst[i]; spin_lock_bh(&addrconf_hash_lock); - restart: +restart: hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); @@ -3544,8 +3547,8 @@ static void __net_exit if6_proc_net_exit(struct net *net) } static struct pernet_operations if6_proc_net_ops = { - .init = if6_proc_net_init, - .exit = if6_proc_net_exit, + .init = if6_proc_net_init, + .exit = if6_proc_net_exit, }; int __init if6_proc_init(void) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2daa3a133e49..e8c4400f23e9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -7,15 +7,15 @@ * * Adapted from linux/net/ipv4/af_inet.c * - * Fixes: + * Fixes: * piggy, Karl Knutson : Socket protocol table - * Hideaki YOSHIFUJI : sin6_scope_id support - * Arnaldo Melo : check proc_net_create return, cleanups + * Hideaki YOSHIFUJI : sin6_scope_id support + * Arnaldo Melo : check proc_net_create return, cleanups * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "IPv6: " fmt @@ -302,7 +302,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type(net, v4addr); - if (!sysctl_ip_nonlocal_bind && + if (!net->ipv4.sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && v4addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && @@ -672,10 +672,10 @@ int inet6_sk_rebuild_header(struct sock *sk) } EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); -bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) +bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, + const struct inet6_skb_parm *opt) { const struct ipv6_pinfo *np = inet6_sk(sk); - const struct inet6_skb_parm *opt = IP6CB(skb); if (np->rxopt.all) { if ((opt->hop && (np->rxopt.bits.hopopts || @@ -766,7 +766,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = 0; - atomic_set(&net->ipv6.rt_genid, 0); + atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); if (err) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 72a4930bdc0a..6d16eb0e0c7f 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -17,10 +17,10 @@ * Authors * * Mitsuru KANDA @USAGI : IPv6 Support - * Kazunori MIYAZAWA @USAGI : - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * Kazunori MIYAZAWA @USAGI : + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * - * This file is derived from net/ipv4/ah.c. + * This file is derived from net/ipv4/ah.c. */ #define pr_fmt(fmt) "IPv6: " fmt @@ -284,7 +284,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) ipv6_rearrange_rthdr(iph, exthdr.rth); break; - default : + default: return 0; } @@ -478,7 +478,7 @@ static void ah6_input_done(struct crypto_async_request *base, int err) auth_data = ah_tmp_auth(work_iph, hdr_len); icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out; @@ -622,7 +622,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) goto out_free; } - err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out_free; @@ -647,8 +647,8 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); - struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; - struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset); + struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && @@ -713,8 +713,6 @@ static int ah6_init_state(struct xfrm_state *x) ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; ahp->icv_trunc_len = x->aalg->alg_trunc_len/8; - BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); switch (x->props.mode) { @@ -755,11 +753,10 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err) return 0; } -static const struct xfrm_type ah6_type = -{ +static const struct xfrm_type ah6_type = { .description = "AH6", .owner = THIS_MODULE, - .proto = IPPROTO_AH, + .proto = IPPROTO_AH, .flags = XFRM_TYPE_REPLAY_PROT, .init_state = ah6_init_state, .destructor = ah6_destroy, diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 9a386842fd62..f5e319a8d4e2 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -46,10 +46,6 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); -/* Big ac list lock for all the sockets */ -static DEFINE_SPINLOCK(ipv6_sk_ac_lock); - - /* * socket join an anycast group */ @@ -78,7 +74,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac->acl_addr = *addr; rtnl_lock(); - rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -91,11 +86,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags_rcu(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = __dev_get_by_flags(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } } else - dev = dev_get_by_index_rcu(net, ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -127,17 +122,14 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } - err = ipv6_dev_ac_inc(dev, addr); + err = __ipv6_dev_ac_inc(idev, addr); if (!err) { - spin_lock_bh(&ipv6_sk_ac_lock); pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; - spin_unlock_bh(&ipv6_sk_ac_lock); pac = NULL; } error: - rcu_read_unlock(); rtnl_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); @@ -154,7 +146,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - spin_lock_bh(&ipv6_sk_ac_lock); + rtnl_lock(); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -163,7 +155,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) prev_pac = pac; } if (!pac) { - spin_unlock_bh(&ipv6_sk_ac_lock); + rtnl_unlock(); return -ENOENT; } if (prev_pac) @@ -171,14 +163,9 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - spin_unlock_bh(&ipv6_sk_ac_lock); - - rtnl_lock(); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - rcu_read_unlock(); rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); @@ -196,19 +183,16 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - spin_lock_bh(&ipv6_sk_ac_lock); + rtnl_lock(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - spin_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; - rtnl_lock(); - rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + dev = __dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -216,10 +200,14 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } - rcu_read_unlock(); rtnl_unlock(); } +static void aca_get(struct ifacaddr6 *aca) +{ + atomic_inc(&aca->aca_refcnt); +} + static void aca_put(struct ifacaddr6 *ac) { if (atomic_dec_and_test(&ac->aca_refcnt)) { @@ -229,23 +217,40 @@ static void aca_put(struct ifacaddr6 *ac) } } +static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, + const struct in6_addr *addr) +{ + struct inet6_dev *idev = rt->rt6i_idev; + struct ifacaddr6 *aca; + + aca = kzalloc(sizeof(*aca), GFP_ATOMIC); + if (aca == NULL) + return NULL; + + aca->aca_addr = *addr; + in6_dev_hold(idev); + aca->aca_idev = idev; + aca->aca_rt = rt; + aca->aca_users = 1; + /* aca_tstamp should be updated upon changes */ + aca->aca_cstamp = aca->aca_tstamp = jiffies; + atomic_set(&aca->aca_refcnt, 1); + spin_lock_init(&aca->aca_lock); + + return aca; +} + /* * device anycast group inc (add if not found) */ -int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr) +int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca; - struct inet6_dev *idev; struct rt6_info *rt; int err; ASSERT_RTNL(); - idev = in6_dev_get(dev); - - if (idev == NULL) - return -EINVAL; - write_lock_bh(&idev->lock); if (idev->dead) { err = -ENODEV; @@ -260,46 +265,35 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr) } } - /* - * not found: create a new one. - */ - - aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC); - - if (aca == NULL) { - err = -ENOMEM; - goto out; - } - rt = addrconf_dst_alloc(idev, addr, true); if (IS_ERR(rt)) { - kfree(aca); err = PTR_ERR(rt); goto out; } - - aca->aca_addr = *addr; - aca->aca_idev = idev; - aca->aca_rt = rt; - aca->aca_users = 1; - /* aca_tstamp should be updated upon changes */ - aca->aca_cstamp = aca->aca_tstamp = jiffies; - atomic_set(&aca->aca_refcnt, 2); - spin_lock_init(&aca->aca_lock); + aca = aca_alloc(rt, addr); + if (aca == NULL) { + ip6_rt_put(rt); + err = -ENOMEM; + goto out; + } aca->aca_next = idev->ac_list; idev->ac_list = aca; + + /* Hold this for addrconf_join_solict() below before we unlock, + * it is already exposed via idev->ac_list. + */ + aca_get(aca); write_unlock_bh(&idev->lock); ip6_ins_rt(rt); - addrconf_join_solict(dev, &aca->aca_addr); + addrconf_join_solict(idev->dev, &aca->aca_addr); aca_put(aca); return 0; out: write_unlock_bh(&idev->lock); - in6_dev_put(idev); return err; } @@ -341,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } -/* called with rcu_read_lock() */ +/* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2753319524f1..2cdc38338be3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -43,13 +43,13 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p, final; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; struct flowi6 fl6; struct ip6_flowlabel *flowlabel = NULL; - struct ipv6_txoptions *opt; + struct ipv6_txoptions *opt; int addr_type; int err; @@ -332,7 +332,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; - struct sk_buff *skb, *skb2; + struct sk_buff *skb; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name); struct { struct sock_extended_err ee; @@ -342,7 +342,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int copied; err = -EAGAIN; - skb = skb_dequeue(&sk->sk_error_queue); + skb = sock_dequeue_err_skb(sk); if (skb == NULL) goto out; @@ -415,17 +415,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_ERRQUEUE; err = copied; - /* Reset and regenerate socket error */ - spin_lock_bh(&sk->sk_error_queue.lock); - sk->sk_err = 0; - if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { - sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; - spin_unlock_bh(&sk->sk_error_queue.lock); - sk->sk_error_report(sk); - } else { - spin_unlock_bh(&sk->sk_error_queue.lock); - } - out_free_skb: kfree_skb(skb); out: diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d15da1377149..83fc3a385a26 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -17,10 +17,10 @@ * Authors * * Mitsuru KANDA @USAGI : IPv6 Support - * Kazunori MIYAZAWA @USAGI : - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * Kazunori MIYAZAWA @USAGI : + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * - * This file is derived from net/ipv4/esp.c + * This file is derived from net/ipv4/esp.c */ #define pr_fmt(fmt) "IPv6: " fmt @@ -598,7 +598,7 @@ static int esp6_init_state(struct xfrm_state *x) case XFRM_MODE_BEET: if (x->sel.family != AF_INET6) x->props.header_len += IPV4_BEET_PHMAXLEN + - (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); break; case XFRM_MODE_TRANSPORT: break; @@ -621,11 +621,10 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err) return 0; } -static const struct xfrm_type esp6_type = -{ +static const struct xfrm_type esp6_type = { .description = "ESP6", - .owner = THIS_MODULE, - .proto = IPPROTO_ESP, + .owner = THIS_MODULE, + .proto = IPPROTO_ESP, .flags = XFRM_TYPE_REPLAY_PROT, .init_state = esp6_init_state, .destructor = esp6_destroy, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8d67900aa003..bfde361b6134 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -142,7 +142,7 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) default: /* Other TLV code so scan list */ if (optlen > len) goto bad; - for (curr=procs; curr->type >= 0; curr++) { + for (curr = procs; curr->type >= 0; curr++) { if (curr->type == nh[off]) { /* type specific length/alignment checks will be performed in the diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 06ba3e58320b..97ae70077a4f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -170,11 +170,11 @@ static bool is_ineligible(const struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi6 *fl6) +static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, + struct flowi6 *fl6) { - struct dst_entry *dst; struct net *net = sock_net(sk); + struct dst_entry *dst; bool res = false; /* Informational messages are not limited. */ @@ -199,16 +199,20 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; - struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - res = inet_peer_xrlim_allow(peer, tmo); - if (peer) - inet_putpeer(peer); + if (icmp_global_allow()) { + struct inet_peer *peer; + + peer = inet_getpeer_v6(net->ipv6.peers, + &rt->rt6i_dst.addr, 1); + res = inet_peer_xrlim_allow(peer, tmo); + if (peer) + inet_putpeer(peer); + } } dst_release(dst); return res; @@ -503,7 +507,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) msg.type = type; len = skb->len - msg.offset; - len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); + len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); goto out_dst_release; @@ -636,7 +640,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) /* now skip over extension headers */ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if (inner_offset<0) + if (inner_offset < 0) goto out; } else { inner_offset = sizeof(struct ipv6hdr); @@ -773,12 +777,12 @@ static int icmpv6_rcv(struct sk_buff *skb) break; default: - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); - /* informational */ if (type & ICMPV6_INFOMSG_MASK) break; + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); + /* * error of unknown type. * must pass to upper level @@ -808,7 +812,7 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, memset(fl6, 0, sizeof(*fl6)); fl6->saddr = *saddr; fl6->daddr = *daddr; - fl6->flowi6_proto = IPPROTO_ICMPV6; + fl6->flowi6_proto = IPPROTO_ICMPV6; fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; @@ -875,8 +879,8 @@ static void __net_exit icmpv6_sk_exit(struct net *net) } static struct pernet_operations icmpv6_sk_ops = { - .init = icmpv6_sk_init, - .exit = icmpv6_sk_exit, + .init = icmpv6_sk_init, + .exit = icmpv6_sk_exit, }; int __init icmpv6_init(void) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index a245e5ddffbd..29b32206e494 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -63,7 +63,6 @@ int inet6_csk_bind_conflict(const struct sock *sk, return sk2 != NULL; } - EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); struct dst_entry *inet6_csk_route_req(struct sock *sk, @@ -144,7 +143,6 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, return NULL; } - EXPORT_SYMBOL_GPL(inet6_csk_search_req); void inet6_csk_reqsk_queue_hash_add(struct sock *sk, @@ -160,10 +158,9 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); inet_csk_reqsk_queue_added(sk, timeout); } - EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); -void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; @@ -175,7 +172,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, sk->sk_bound_dev_if); } - EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); static inline diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 262e13c02ec2..051dffb49c90 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -6,7 +6,7 @@ * Generic INET6 transport hashtables * * Authors: Lotsa people, from code originally in tcp, generalised here - * by Arnaldo Carvalho de Melo <acme@mandriva.com> + * by Arnaldo Carvalho de Melo <acme@mandriva.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -198,7 +198,7 @@ begin: } } else if (score == hiscore && reuseport) { matches++; - if (((u64)phash * matches) >> 32 == 0) + if (reciprocal_scale(phash, matches) == 0) result = sk; phash = next_pseudo_random32(phash); } @@ -222,7 +222,6 @@ begin: rcu_read_unlock(); return result; } - EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -238,7 +237,6 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, return sk; } - EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, @@ -324,5 +322,4 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), __inet6_check_established, __inet6_hash); } - EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 97b9fa8de377..b2d1838897c9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -46,20 +46,11 @@ static struct kmem_cache *fib6_node_kmem __read_mostly; -enum fib_walk_state_t { -#ifdef CONFIG_IPV6_SUBTREES - FWS_S, -#endif - FWS_L, - FWS_R, - FWS_C, - FWS_U -}; - -struct fib6_cleaner_t { - struct fib6_walker_t w; +struct fib6_cleaner { + struct fib6_walker w; struct net *net; int (*func)(struct rt6_info *, void *arg); + int sernum; void *arg; }; @@ -74,8 +65,8 @@ static DEFINE_RWLOCK(fib6_walker_lock); static void fib6_prune_clones(struct net *net, struct fib6_node *fn); static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); -static int fib6_walk(struct fib6_walker_t *w); -static int fib6_walk_continue(struct fib6_walker_t *w); +static int fib6_walk(struct fib6_walker *w); +static int fib6_walk_continue(struct fib6_walker *w); /* * A routing update causes an increase of the serial number on the @@ -84,34 +75,41 @@ static int fib6_walk_continue(struct fib6_walker_t *w); * result of redirects, path MTU changes, etc. */ -static __u32 rt_sernum; - static void fib6_gc_timer_cb(unsigned long arg); static LIST_HEAD(fib6_walkers); #define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) -static inline void fib6_walker_link(struct fib6_walker_t *w) +static void fib6_walker_link(struct fib6_walker *w) { write_lock_bh(&fib6_walker_lock); list_add(&w->lh, &fib6_walkers); write_unlock_bh(&fib6_walker_lock); } -static inline void fib6_walker_unlink(struct fib6_walker_t *w) +static void fib6_walker_unlink(struct fib6_walker *w) { write_lock_bh(&fib6_walker_lock); list_del(&w->lh); write_unlock_bh(&fib6_walker_lock); } -static __inline__ u32 fib6_new_sernum(void) + +static int fib6_new_sernum(struct net *net) { - u32 n = ++rt_sernum; - if ((__s32)n <= 0) - rt_sernum = n = 1; - return n; + int new, old; + + do { + old = atomic_read(&net->ipv6.fib6_sernum); + new = old < INT_MAX ? old + 1 : 1; + } while (atomic_cmpxchg(&net->ipv6.fib6_sernum, + old, new) != old); + return new; } +enum { + FIB6_NO_SERNUM_CHANGE = 0, +}; + /* * Auxiliary address test functions for the radix tree. * @@ -128,7 +126,7 @@ static __inline__ u32 fib6_new_sernum(void) # define BITOP_BE32_SWIZZLE 0 #endif -static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) +static __be32 addr_bit_set(const void *token, int fn_bit) { const __be32 *addr = token; /* @@ -142,7 +140,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } -static __inline__ struct fib6_node *node_alloc(void) +static struct fib6_node *node_alloc(void) { struct fib6_node *fn; @@ -151,12 +149,12 @@ static __inline__ struct fib6_node *node_alloc(void) return fn; } -static __inline__ void node_free(struct fib6_node *fn) +static void node_free(struct fib6_node *fn) { kmem_cache_free(fib6_node_kmem, fn); } -static __inline__ void rt6_release(struct rt6_info *rt) +static void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) dst_free(&rt->dst); @@ -267,7 +265,7 @@ static void __net_init fib6_tables_init(struct net *net) #endif -static int fib6_dump_node(struct fib6_walker_t *w) +static int fib6_dump_node(struct fib6_walker *w) { int res; struct rt6_info *rt; @@ -287,7 +285,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) static void fib6_dump_end(struct netlink_callback *cb) { - struct fib6_walker_t *w = (void *)cb->args[2]; + struct fib6_walker *w = (void *)cb->args[2]; if (w) { if (cb->args[4]) { @@ -310,7 +308,7 @@ static int fib6_dump_done(struct netlink_callback *cb) static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, struct netlink_callback *cb) { - struct fib6_walker_t *w; + struct fib6_walker *w; int res; w = (void *)cb->args[2]; @@ -355,7 +353,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; - struct fib6_walker_t *w; + struct fib6_walker *w; struct fib6_table *tb; struct hlist_head *head; int res = 0; @@ -423,14 +421,13 @@ out: static struct fib6_node *fib6_add_1(struct fib6_node *root, struct in6_addr *addr, int plen, int offset, int allow_create, - int replace_required) + int replace_required, int sernum) { struct fib6_node *fn, *in, *ln; struct fib6_node *pn = NULL; struct rt6key *key; int bit; __be32 dir = 0; - __u32 sernum = fib6_new_sernum(); RT6_TRACE("fib6_add_1\n"); @@ -627,7 +624,7 @@ insert_above: return ln; } -static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +static bool rt6_qualify_for_ecmp(struct rt6_info *rt) { return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == RTF_GATEWAY; @@ -820,7 +817,7 @@ add: return 0; } -static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) +static void fib6_start_gc(struct net *net, struct rt6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) @@ -848,6 +845,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, int err = -ENOMEM; int allow_create = 1; int replace_required = 0; + int sernum = fib6_new_sernum(info->nl_net); if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -860,7 +858,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, - replace_required); + replace_required, sernum); if (IS_ERR(fn)) { err = PTR_ERR(fn); fn = NULL; @@ -894,14 +892,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, sfn->leaf = info->nl_net->ipv6.ip6_null_entry; atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); sfn->fn_flags = RTN_ROOT; - sfn->fn_sernum = fib6_new_sernum(); + sfn->fn_sernum = sernum; /* Now add the first leaf node to new subtree */ sn = fib6_add_1(sfn, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required); + allow_create, replace_required, sernum); if (IS_ERR(sn)) { /* If it is failed, discard just allocated @@ -920,7 +918,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required); + allow_create, replace_required, sernum); if (IS_ERR(sn)) { err = PTR_ERR(sn); @@ -1174,7 +1172,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, int children; int nstate; struct fib6_node *child, *pn; - struct fib6_walker_t *w; + struct fib6_walker *w; int iter = 0; for (;;) { @@ -1276,7 +1274,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, struct nl_info *info) { - struct fib6_walker_t *w; + struct fib6_walker *w; struct rt6_info *rt = *rtp; struct net *net = info->nl_net; @@ -1414,7 +1412,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * <0 -> walk is terminated by an error. */ -static int fib6_walk_continue(struct fib6_walker_t *w) +static int fib6_walk_continue(struct fib6_walker *w) { struct fib6_node *fn, *pn; @@ -1498,7 +1496,7 @@ skip: } } -static int fib6_walk(struct fib6_walker_t *w) +static int fib6_walk(struct fib6_walker *w) { int res; @@ -1512,15 +1510,25 @@ static int fib6_walk(struct fib6_walker_t *w) return res; } -static int fib6_clean_node(struct fib6_walker_t *w) +static int fib6_clean_node(struct fib6_walker *w) { int res; struct rt6_info *rt; - struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); + struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, }; + if (c->sernum != FIB6_NO_SERNUM_CHANGE && + w->node->fn_sernum != c->sernum) + w->node->fn_sernum = c->sernum; + + if (!c->func) { + WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); + w->leaf = NULL; + return 0; + } + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { res = c->func(rt, c->arg); if (res < 0) { @@ -1554,9 +1562,9 @@ static int fib6_clean_node(struct fib6_walker_t *w) static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) + bool prune, int sernum, void *arg) { - struct fib6_cleaner_t c; + struct fib6_cleaner c; c.w.root = root; c.w.func = fib6_clean_node; @@ -1564,14 +1572,16 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, c.w.count = 0; c.w.skip = 0; c.func = func; + c.sernum = sernum; c.arg = arg; c.net = net; fib6_walk(&c.w); } -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), - void *arg) +static void __fib6_clean_all(struct net *net, + int (*func)(struct rt6_info *, void *), + int sernum, void *arg) { struct fib6_table *table; struct hlist_head *head; @@ -1583,13 +1593,19 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, 0, arg); + func, false, sernum, arg); write_unlock_bh(&table->tb6_lock); } } rcu_read_unlock(); } +void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), + void *arg) +{ + __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); +} + static int fib6_prune_clone(struct rt6_info *rt, void *arg) { if (rt->rt6i_flags & RTF_CACHE) { @@ -1602,25 +1618,15 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg) static void fib6_prune_clones(struct net *net, struct fib6_node *fn) { - fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); -} - -static int fib6_update_sernum(struct rt6_info *rt, void *arg) -{ - __u32 sernum = *(__u32 *)arg; - - if (rt->rt6i_node && - rt->rt6i_node->fn_sernum != sernum) - rt->rt6i_node->fn_sernum = sernum; - - return 0; + fib6_clean_tree(net, fn, fib6_prune_clone, true, + FIB6_NO_SERNUM_CHANGE, NULL); } static void fib6_flush_trees(struct net *net) { - __u32 new_sernum = fib6_new_sernum(); + int new_sernum = fib6_new_sernum(net); - fib6_clean_all(net, fib6_update_sernum, &new_sernum); + __fib6_clean_all(net, NULL, new_sernum, NULL); } /* @@ -1828,10 +1834,10 @@ void fib6_gc_cleanup(void) struct ipv6_route_iter { struct seq_net_private p; - struct fib6_walker_t w; + struct fib6_walker w; loff_t skip; struct fib6_table *tbl; - __u32 sernum; + int sernum; }; static int ipv6_route_seq_show(struct seq_file *seq, void *v) @@ -1859,7 +1865,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) return 0; } -static int ipv6_route_yield(struct fib6_walker_t *w) +static int ipv6_route_yield(struct fib6_walker *w) { struct ipv6_route_iter *iter = w->args; @@ -1980,7 +1986,7 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) { - struct fib6_walker_t *w = &iter->w; + struct fib6_walker *w = &iter->w; return w->node && !(w->state == FWS_U && w->node == w->root); } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 4052694c6f2c..3dd7d4ebd7cd 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -136,7 +136,7 @@ static void ip6_fl_gc(unsigned long dummy) spin_lock(&ip6_fl_lock); - for (i=0; i<=FL_HASH_MASK; i++) { + for (i = 0; i <= FL_HASH_MASK; i++) { struct ip6_flowlabel *fl; struct ip6_flowlabel __rcu **flp; @@ -239,7 +239,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, /* Socket flowlabel lists */ -struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) +struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) { struct ipv6_fl_socklist *sfl; struct ipv6_pinfo *np = inet6_sk(sk); @@ -259,7 +259,6 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label) rcu_read_unlock_bh(); return NULL; } - EXPORT_SYMBOL_GPL(fl6_sock_lookup); void fl6_free_socklist(struct sock *sk) @@ -293,11 +292,11 @@ void fl6_free_socklist(struct sock *sk) following rthdr. */ -struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, - struct ip6_flowlabel * fl, - struct ipv6_txoptions * fopt) +struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, + struct ip6_flowlabel *fl, + struct ipv6_txoptions *fopt) { - struct ipv6_txoptions * fl_opt = fl->opt; + struct ipv6_txoptions *fl_opt = fl->opt; if (fopt == NULL || fopt->opt_flen == 0) return fl_opt; @@ -388,7 +387,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, goto done; msg.msg_controllen = olen; - msg.msg_control = (void*)(fl->opt+1); + msg.msg_control = (void *)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, @@ -517,7 +516,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; - struct ipv6_fl_socklist *sfl1=NULL; + struct ipv6_fl_socklist *sfl1 = NULL; struct ipv6_fl_socklist *sfl; struct ipv6_fl_socklist __rcu **sflp; struct ip6_flowlabel *fl, *fl1 = NULL; @@ -542,7 +541,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference(*sflp))!=NULL; + (sfl = rcu_dereference(*sflp)) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f304471477dc..12c3c8ef3849 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -618,6 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, int err = -1; u8 proto; struct sk_buff *new_skb; + __be16 protocol; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -734,8 +735,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ipv6h->daddr = fl6->daddr; ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; - ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; + protocol = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; + ((__be16 *)(ipv6h + 1))[1] = protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); @@ -756,6 +758,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } } + skb_set_inner_protocol(skb, protocol); + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); @@ -782,7 +786,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv4_get_dsfield(iph); @@ -832,7 +836,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -1238,7 +1242,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->iflink = 0; dev->addr_len = sizeof(struct in6_addr); - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + netif_keep_dst(dev); } static int ip6gre_tunnel_init(struct net_device *dev) diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 4578e23834f7..14dacc544c3e 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -13,7 +13,7 @@ static ip6_icmp_send_t __rcu *ip6_icmp_send; int inet6_register_icmp_sender(ip6_icmp_send_t *fn) { return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ? - 0 : -EBUSY; + 0 : -EBUSY; } EXPORT_SYMBOL(inet6_register_icmp_sender); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 51d54dc376f3..a3084ab5df6c 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -15,8 +15,8 @@ */ /* Changes * - * Mitsuru KANDA @USAGI and - * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs(). + * Mitsuru KANDA @USAGI and + * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs(). */ #include <linux/errno.h> @@ -65,7 +65,7 @@ int ip6_rcv_finish(struct sk_buff *skb) int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { const struct ipv6hdr *hdr; - u32 pkt_len; + u32 pkt_len; struct inet6_dev *idev; struct net *net = dev_net(skb->dev); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 65eda2a8af48..9034f76ae013 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -53,31 +53,6 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) return proto; } -static int ipv6_gso_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - const struct net_offload *ops; - int err = -EINVAL; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - err = -EPROTONOSUPPORT; - - ops = rcu_dereference(inet6_offloads[ - ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); - - if (likely(ops && ops->callbacks.gso_send_check)) { - skb_reset_transport_header(skb); - err = ops->callbacks.gso_send_check(skb); - } - -out: - return err; -} - static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -244,7 +219,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, continue; iph2 = (struct ipv6hdr *)(p->data + off); - first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; + first_word = *(__be32 *)iph ^ *(__be32 *)iph2; /* All fields must match except length and Traffic Class. * XXX skbs on the gro_list have all been parsed and pulled @@ -261,6 +236,9 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, /* flush if Traffic Class fields are different */ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; + + /* Clear flush_id, there's really no concept of ID in IPv6. */ + NAPI_GRO_CB(p)->flush_id = 0; } NAPI_GRO_CB(skb)->flush |= flush; @@ -303,7 +281,6 @@ out_unlock: static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .callbacks = { - .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, .gro_receive = ipv6_gro_receive, .gro_complete = ipv6_gro_complete, @@ -312,8 +289,9 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { - .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, }, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0a3448b2888f..8e950c250ada 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -20,7 +20,7 @@ * etc. * * H. von Brand : Added missing #include <linux/string.h> - * Imran Patel : frag id should be in NBO + * Imran Patel : frag id should be in NBO * Kazunori MIYAZAWA @USAGI * : add ip6_append_data and related functions * for datagram xmit @@ -233,7 +233,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EMSGSIZE; } - EXPORT_SYMBOL(ip6_xmit); static int ip6_call_ra_chain(struct sk_buff *skb, int sel) @@ -555,14 +554,14 @@ static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; - struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; int hroom, troom; __be32 frag_id = 0; - int ptr, offset = 0, err=0; + int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; struct net *net = dev_net(skb_dst(skb)->dev); @@ -637,7 +636,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } __skb_pull(skb, hlen); - fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr)); + fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); @@ -662,7 +661,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag) { frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); - fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr)); + fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), tmp_hdr, @@ -681,7 +680,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } err = output(skb); - if(!err) + if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -702,11 +701,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) return 0; } - while (frag) { - skb = frag->next; - kfree_skb(frag); - frag = skb; - } + kfree_skb_list(frag); IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); @@ -742,7 +737,7 @@ slow_path: /* * Keep copying data until we run out. */ - while(left > 0) { + while (left > 0) { len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) @@ -865,7 +860,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, - * and MSG_DONTROUTE --ANK (980726) + * and MSG_DONTROUTE --ANK (980726) * * 1. ip6_rt_check(): If route was host route, * check that cached destination is current. @@ -1049,7 +1044,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu,unsigned int flags, + int transhdrlen, int mtu, unsigned int flags, struct rt6_info *rt) { @@ -1072,7 +1067,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_reserve(skb, hh_len); /* create space for UDP/IP header */ - skb_put(skb,fragheaderlen + transhdrlen); + skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 69a84b464009..9409887fb664 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -412,12 +412,12 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; __u8 nexthdr = ipv6h->nexthdr; - __u16 off = sizeof (*ipv6h); + __u16 off = sizeof(*ipv6h); while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { __u16 optlen = 0; struct ipv6_opt_hdr *hdr; - if (raw + off + sizeof (*hdr) > skb->data && + if (raw + off + sizeof(*hdr) > skb->data && !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr))) break; @@ -534,7 +534,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { + if ((len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; @@ -995,7 +995,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - sizeof (*ipv6h); + mtu = dst_mtu(dst) - sizeof(*ipv6h); if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1087,7 +1087,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1139,7 +1139,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); @@ -1233,11 +1233,11 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + - sizeof (struct ipv6hdr); + sizeof(struct ipv6hdr); - dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr); + dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu-=8; + dev->mtu -= 8; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1354,7 +1354,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } @@ -1366,7 +1366,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) memset(&p, 0, sizeof(p)); } ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { err = -EFAULT; } break; @@ -1376,7 +1376,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && @@ -1411,7 +1411,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); @@ -1486,14 +1486,14 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); - dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); + dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr); t = netdev_priv(dev); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu-=8; + dev->mtu -= 8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + netif_keep_dst(dev); /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c new file mode 100644 index 000000000000..b04ed72c4542 --- /dev/null +++ b/net/ipv6/ip6_udp_tunnel.c @@ -0,0 +1,107 @@ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/socket.h> +#include <linux/udp.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/in6.h> +#include <net/udp.h> +#include <net/udp_tunnel.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/ip6_tunnel.h> +#include <net/ip6_checksum.h> + +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp) +{ + struct sockaddr_in6 udp6_addr; + int err; + struct socket *sock = NULL; + + err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + + sk_change_net(sock->sk, net); + + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp6_addr.sin6_family = AF_INET6; + memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, + sizeof(udp6_addr.sin6_addr)); + udp6_addr.sin6_port = cfg->peer_udp_port; + err = kernel_connect(sock, + (struct sockaddr *)&udp6_addr, + sizeof(udp6_addr), 0); + } + if (err < 0) + goto error; + + udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums); + udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums); + + *sockp = sock; + return 0; + +error: + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); + } + *sockp = NULL; + return err; +} +EXPORT_SYMBOL_GPL(udp_sock_create6); + +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, + struct sk_buff *skb, struct net_device *dev, + struct in6_addr *saddr, struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port) +{ + struct udphdr *uh; + struct ipv6hdr *ip6h; + struct sock *sk = sock->sk; + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + + uh->len = htons(skb->len); + uh->check = 0; + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED + | IPSKB_REROUTED); + skb_dst_set(skb, dst); + + udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, + &sk->sk_v6_daddr, skb->len); + + __skb_push(skb, sizeof(*ip6h)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6_flow_hdr(ip6h, prio, htonl(0)); + ip6h->payload_len = htons(skb->len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + ip6tunnel_xmit(skb, dev); + return 0; +} +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5833a2244467..d440bb585524 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -807,7 +807,7 @@ static void vti6_dev_setup(struct net_device *dev) dev->mtu = ETH_DATA_LEN; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + netif_keep_dst(dev); } /** diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f9a3fd320d1d..0171f08325c3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -845,7 +845,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) atomic_dec(&mrt->cache_resolve_queue_len); - while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { + while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); nlh->nlmsg_type = NLMSG_ERROR; @@ -1103,7 +1103,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, * Play the pending entries through our router */ - while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index d1c793cffcb5..1b9316e1386a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -181,8 +181,7 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) return 0; } -static const struct xfrm_type ipcomp6_type = -{ +static const struct xfrm_type ipcomp6_type = { .description = "IPCOMP6", .owner = THIS_MODULE, .proto = IPPROTO_COMP, @@ -193,8 +192,7 @@ static const struct xfrm_type ipcomp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static struct xfrm6_protocol ipcomp6_protocol = -{ +static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0c289982796d..e1a9583bb419 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -66,12 +66,12 @@ int ip6_ra_control(struct sock *sk, int sel) if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW) return -ENOPROTOOPT; - new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; + new_ra = (sel >= 0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; write_lock_bh(&ip6_ra_lock); - for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) { + for (rap = &ip6_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { - if (sel>=0) { + if (sel >= 0) { write_unlock_bh(&ip6_ra_lock); kfree(new_ra); return -EADDRINUSE; @@ -130,7 +130,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, int retv = -ENOPROTOOPT; if (optval == NULL) - val=0; + val = 0; else { if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) @@ -139,7 +139,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, val = 0; } - valbool = (val!=0); + valbool = (val != 0); if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); @@ -474,7 +474,7 @@ sticky_done: goto done; msg.msg_controllen = optlen; - msg.msg_control = (void*)(opt+1); + msg.msg_control = (void *)(opt+1); retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, &junk); @@ -687,7 +687,7 @@ done: retv = -ENOBUFS; break; } - gsf = kmalloc(optlen,GFP_KERNEL); + gsf = kmalloc(optlen, GFP_KERNEL); if (!gsf) { retv = -ENOBUFS; break; @@ -873,7 +873,6 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(ipv6_setsockopt); #ifdef CONFIG_COMPAT @@ -909,7 +908,6 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(compat_ipv6_setsockopt); #endif @@ -921,7 +919,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, if (!opt) return 0; - switch(optname) { + switch (optname) { case IPV6_HOPOPTS: hdr = opt->hopopt; break; @@ -1284,9 +1282,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); - if(put_user(len, optlen)) + if (put_user(len, optlen)) return -EFAULT; - if(copy_to_user(optval,&val,len)) + if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } @@ -1299,7 +1297,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if (level == SOL_IP && sk->sk_type != SOCK_RAW) return udp_prot.getsockopt(sk, level, optname, optval, optlen); - if(level != SOL_IPV6) + if (level != SOL_IPV6) return -ENOPROTOOPT; err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0); @@ -1321,7 +1319,6 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(ipv6_getsockopt); #ifdef CONFIG_COMPAT @@ -1364,7 +1361,6 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, #endif return err; } - EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a23b655a7627..9648de2b6745 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -64,15 +64,6 @@ #include <net/ip6_checksum.h> -/* Set to 3 to get tracing... */ -#define MCAST_DEBUG 2 - -#if MCAST_DEBUG >= 3 -#define MDBG(x) printk x -#else -#define MDBG(x) -#endif - /* Ensure that we have struct in6_addr aligned on 32bit word. */ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4), @@ -82,9 +73,6 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; -/* Big mc list lock for all the sockets */ -static DEFINE_SPINLOCK(ipv6_sk_mc_lock); - static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); static void igmp6_timer_handler(unsigned long data); @@ -121,6 +109,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define IPV6_MLD_MAX_MSF 64 int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; +int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; /* * socket join on multicast group @@ -173,7 +162,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->addr = *addr; rtnl_lock(); - rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); @@ -182,10 +170,9 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) ip6_rt_put(rt); } } else - dev = dev_get_by_index_rcu(net, ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev == NULL) { - rcu_read_unlock(); rtnl_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; @@ -203,18 +190,14 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_mc_inc(dev, addr); if (err) { - rcu_read_unlock(); rtnl_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; } - spin_lock(&ipv6_sk_mc_lock); mc_lst->next = np->ipv6_mc_list; rcu_assign_pointer(np->ipv6_mc_list, mc_lst); - spin_unlock(&ipv6_sk_mc_lock); - rcu_read_unlock(); rtnl_unlock(); return 0; @@ -234,20 +217,16 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; rtnl_lock(); - spin_lock(&ipv6_sk_mc_lock); for (lnk = &np->ipv6_mc_list; - (mc_lst = rcu_dereference_protected(*lnk, - lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + (mc_lst = rtnl_dereference(*lnk)) != NULL; lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { struct net_device *dev; *lnk = mc_lst->next; - spin_unlock(&ipv6_sk_mc_lock); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, mc_lst->ifindex); + dev = __dev_get_by_index(net, mc_lst->ifindex); if (dev != NULL) { struct inet6_dev *idev = __in6_dev_get(dev); @@ -256,7 +235,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &mc_lst->addr); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - rcu_read_unlock(); rtnl_unlock(); atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); @@ -264,7 +242,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return 0; } } - spin_unlock(&ipv6_sk_mc_lock); rtnl_unlock(); return -EADDRNOTAVAIL; @@ -311,16 +288,12 @@ void ipv6_sock_mc_close(struct sock *sk) return; rtnl_lock(); - spin_lock(&ipv6_sk_mc_lock); - while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, - lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { + while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; - spin_unlock(&ipv6_sk_mc_lock); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, mc_lst->ifindex); + dev = __dev_get_by_index(net, mc_lst->ifindex); if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); @@ -329,14 +302,11 @@ void ipv6_sock_mc_close(struct sock *sk) __ipv6_dev_mc_dec(idev, &mc_lst->addr); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - rcu_read_unlock(); atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); - spin_lock(&ipv6_sk_mc_lock); } - spin_unlock(&ipv6_sk_mc_lock); rtnl_unlock(); } @@ -400,7 +370,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!psl) goto done; /* err = -EADDRNOTAVAIL */ rv = !0; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) break; @@ -417,7 +387,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, /* update the interface filter */ ip6_mc_del_src(idev, group, omode, 1, source, 1); - for (j=i+1; j<psl->sl_count; j++) + for (j = i+1; j < psl->sl_count; j++) psl->sl_addr[j-1] = psl->sl_addr[j]; psl->sl_count--; err = 0; @@ -443,19 +413,19 @@ int ip6_mc_source(int add, int omode, struct sock *sk, newpsl->sl_max = count; newpsl->sl_count = count - IP6_SFBLOCK; if (psl) { - for (i=0; i<psl->sl_count; i++) + for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); } pmc->sflist = psl = newpsl; } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { rv = !ipv6_addr_equal(&psl->sl_addr[i], source); if (rv == 0) /* There is an error in the address. */ goto done; } - for (j=psl->sl_count-1; j>=i; j--) + for (j = psl->sl_count-1; j >= i; j--) psl->sl_addr[j+1] = psl->sl_addr[j]; psl->sl_addr[i] = *source; psl->sl_count++; @@ -524,7 +494,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) goto done; } newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc; - for (i=0; i<newpsl->sl_count; ++i) { + for (i = 0; i < newpsl->sl_count; ++i) { struct sockaddr_in6 *psin6; psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i]; @@ -586,9 +556,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, } err = -EADDRNOTAVAIL; - /* - * changes to the ipv6_mc_list require the socket lock and - * a read lock on ip6_sk_mc_lock. We have the socket lock, + /* changes to the ipv6_mc_list require the socket lock and + * rtnl lock. We have the socket lock and rcu read lock, * so reading the list is safe. */ @@ -612,11 +581,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { return -EFAULT; } - /* changes to psl require the socket lock, a read lock on - * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We - * have the socket lock, so reading here is safe. + /* changes to psl require the socket lock, and a write lock + * on pmc->sflock. We have the socket lock so reading here is safe. */ - for (i=0; i<copycount; i++) { + for (i = 0; i < copycount; i++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -658,7 +626,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, } else { int i; - for (i=0; i<psl->sl_count; i++) { + for (i = 0; i < psl->sl_count; i++) { if (ipv6_addr_equal(&psl->sl_addr[i], src_addr)) break; } @@ -673,14 +641,6 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, return rv; } -static void ma_put(struct ifmcaddr6 *mc) -{ - if (atomic_dec_and_test(&mc->mca_refcnt)) { - in6_dev_put(mc->idev); - kfree(mc); - } -} - static void igmp6_group_added(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; @@ -772,7 +732,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) pmc->mca_tomb = im->mca_tomb; pmc->mca_sources = im->mca_sources; im->mca_tomb = im->mca_sources = NULL; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = pmc->mca_crcount; } spin_unlock_bh(&im->mca_lock); @@ -790,7 +750,7 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; - for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) { + for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) break; pmc_prev = pmc; @@ -804,7 +764,7 @@ static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *pmca) spin_unlock_bh(&idev->mc_lock); if (pmc) { - for (psf=pmc->mca_tomb; psf; psf=psf_next) { + for (psf = pmc->mca_tomb; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -831,14 +791,14 @@ static void mld_clear_delrec(struct inet6_dev *idev) /* clear dead sources, too */ read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { struct ip6_sf_list *psf, *psf_next; spin_lock_bh(&pmc->mca_lock); psf = pmc->mca_tomb; pmc->mca_tomb = NULL; spin_unlock_bh(&pmc->mca_lock); - for (; psf; psf=psf_next) { + for (; psf; psf = psf_next) { psf_next = psf->sf_next; kfree(psf); } @@ -846,6 +806,48 @@ static void mld_clear_delrec(struct inet6_dev *idev) read_unlock_bh(&idev->lock); } +static void mca_get(struct ifmcaddr6 *mc) +{ + atomic_inc(&mc->mca_refcnt); +} + +static void ma_put(struct ifmcaddr6 *mc) +{ + if (atomic_dec_and_test(&mc->mca_refcnt)) { + in6_dev_put(mc->idev); + kfree(mc); + } +} + +static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, + const struct in6_addr *addr) +{ + struct ifmcaddr6 *mc; + + mc = kzalloc(sizeof(*mc), GFP_ATOMIC); + if (mc == NULL) + return NULL; + + setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); + + mc->mca_addr = *addr; + mc->idev = idev; /* reference taken by caller */ + mc->mca_users = 1; + /* mca_stamp should be updated upon changes */ + mc->mca_cstamp = mc->mca_tstamp = jiffies; + atomic_set(&mc->mca_refcnt, 1); + spin_lock_init(&mc->mca_lock); + + /* initial mode is (EX, empty) */ + mc->mca_sfmode = MCAST_EXCLUDE; + mc->mca_sfcount[MCAST_EXCLUDE] = 1; + + if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) || + IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) + mc->mca_flags |= MAF_NOREPORT; + + return mc; +} /* * device multicast group inc (add if not found) @@ -881,38 +883,20 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) } } - /* - * not found: create a new one. - */ - - mc = kzalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC); - - if (mc == NULL) { + mc = mca_alloc(idev, addr); + if (!mc) { write_unlock_bh(&idev->lock); in6_dev_put(idev); return -ENOMEM; } - setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); - - mc->mca_addr = *addr; - mc->idev = idev; /* (reference taken) */ - mc->mca_users = 1; - /* mca_stamp should be updated upon changes */ - mc->mca_cstamp = mc->mca_tstamp = jiffies; - atomic_set(&mc->mca_refcnt, 2); - spin_lock_init(&mc->mca_lock); - - /* initial mode is (EX, empty) */ - mc->mca_sfmode = MCAST_EXCLUDE; - mc->mca_sfcount[MCAST_EXCLUDE] = 1; - - if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) || - IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) - mc->mca_flags |= MAF_NOREPORT; - mc->next = idev->mc_list; idev->mc_list = mc; + + /* Hold this for the code below before we unlock, + * it is already exposed via idev->mc_list. + */ + mca_get(mc); write_unlock_bh(&idev->lock); mld_del_delrec(idev, &mc->mca_addr); @@ -931,7 +915,7 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) ASSERT_RTNL(); write_lock_bh(&idev->lock); - for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) { + for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) { if (ipv6_addr_equal(&ma->mca_addr, addr)) { if (--ma->mca_users == 0) { *map = ma->next; @@ -956,7 +940,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) struct inet6_dev *idev; int err; - rcu_read_lock(); + ASSERT_RTNL(); idev = __in6_dev_get(dev); if (!idev) @@ -964,7 +948,6 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) else err = __ipv6_dev_mc_dec(idev, addr); - rcu_read_unlock(); return err; } @@ -982,7 +965,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); - for (mc = idev->mc_list; mc; mc=mc->next) { + for (mc = idev->mc_list; mc; mc = mc->next) { if (ipv6_addr_equal(&mc->mca_addr, group)) break; } @@ -991,7 +974,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, struct ip6_sf_list *psf; spin_lock_bh(&mc->mca_lock); - for (psf=mc->mca_sources;psf;psf=psf->sf_next) { + for (psf = mc->mca_sources; psf; psf = psf->sf_next) { if (ipv6_addr_equal(&psf->sf_addr, src_addr)) break; } @@ -1000,7 +983,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, psf->sf_count[MCAST_EXCLUDE] != mc->mca_sfcount[MCAST_EXCLUDE]; else - rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0; + rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; spin_unlock_bh(&mc->mca_lock); } else rv = true; /* don't filter unspecified source */ @@ -1091,10 +1074,10 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, int i, scount; scount = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { /* skip inactive filters */ if (psf->sf_count[MCAST_INCLUDE] || pmc->mca_sfcount[MCAST_EXCLUDE] != @@ -1124,10 +1107,10 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, /* mark INCLUDE-mode sources */ scount = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (scount == nsrcs) break; - for (i=0; i<nsrcs; i++) { + for (i = 0; i < nsrcs; i++) { if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) { psf->sf_gsresp = 1; scount++; @@ -1205,15 +1188,16 @@ static void mld_update_qrv(struct inet6_dev *idev, * and SHOULD NOT be one. Catch this here if we ever run * into such a case in future. */ + const int min_qrv = min(MLD_QRV_DEFAULT, sysctl_mld_qrv); WARN_ON(idev->mc_qrv == 0); if (mlh2->mld2q_qrv > 0) idev->mc_qrv = mlh2->mld2q_qrv; - if (unlikely(idev->mc_qrv < 2)) { + if (unlikely(idev->mc_qrv < min_qrv)) { net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n", - idev->mc_qrv, MLD_QRV_DEFAULT); - idev->mc_qrv = MLD_QRV_DEFAULT; + idev->mc_qrv, min_qrv); + idev->mc_qrv = min_qrv; } } @@ -1253,7 +1237,7 @@ static void mld_update_qri(struct inet6_dev *idev, } static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld, - unsigned long *max_delay) + unsigned long *max_delay, bool v1_query) { unsigned long mldv1_md; @@ -1261,11 +1245,32 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld, if (mld_in_v2_mode_only(idev)) return -EINVAL; - /* MLDv1 router present */ mldv1_md = ntohs(mld->mld_maxdelay); + + /* When in MLDv1 fallback and a MLDv2 router start-up being + * unaware of current MLDv1 operation, the MRC == MRD mapping + * only works when the exponential algorithm is not being + * used (as MLDv1 is unaware of such things). + * + * According to the RFC author, the MLDv2 implementations + * he's aware of all use a MRC < 32768 on start up queries. + * + * Thus, should we *ever* encounter something else larger + * than that, just assume the maximum possible within our + * reach. + */ + if (!v1_query) + mldv1_md = min(mldv1_md, MLDV1_MRD_MAX_COMPAT); + *max_delay = max(msecs_to_jiffies(mldv1_md), 1UL); - mld_set_v1_mode(idev); + /* MLDv1 router present: we need to go into v1 mode *only* + * when an MLDv1 query is received as per section 9.12. of + * RFC3810! And we know from RFC2710 section 3.7 that MLDv1 + * queries MUST be of exactly 24 octets. + */ + if (v1_query) + mld_set_v1_mode(idev); /* cancel MLDv2 report timer */ mld_gq_stop_timer(idev); @@ -1280,10 +1285,6 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld, static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, unsigned long *max_delay) { - /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */ - if (mld_in_v1_mode(idev)) - return -EINVAL; - *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL); mld_update_qrv(idev, mld); @@ -1340,8 +1341,11 @@ int igmp6_event_query(struct sk_buff *skb) !(group_type&IPV6_ADDR_MULTICAST)) return -EINVAL; - if (len == MLD_V1_QUERY_LEN) { - err = mld_process_v1(idev, mld, &max_delay); + if (len < MLD_V1_QUERY_LEN) { + return -EINVAL; + } else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) { + err = mld_process_v1(idev, mld, &max_delay, + len == MLD_V1_QUERY_LEN); if (err < 0) return err; } else if (len >= MLD_V2_QUERY_LEN_MIN) { @@ -1373,18 +1377,19 @@ int igmp6_event_query(struct sk_buff *skb) mlh2 = (struct mld2_query *)skb_transport_header(skb); mark = 1; } - } else + } else { return -EINVAL; + } read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { - for (ma = idev->mc_list; ma; ma=ma->next) { + for (ma = idev->mc_list; ma; ma = ma->next) { spin_lock_bh(&ma->mca_lock); igmp6_group_queried(ma, max_delay); spin_unlock_bh(&ma->mca_lock); } } else { - for (ma = idev->mc_list; ma; ma=ma->next) { + for (ma = idev->mc_list; ma; ma = ma->next) { if (!ipv6_addr_equal(group, &ma->mca_addr)) continue; spin_lock_bh(&ma->mca_lock); @@ -1448,7 +1453,7 @@ int igmp6_event_report(struct sk_buff *skb) */ read_lock_bh(&idev->lock); - for (ma = idev->mc_list; ma; ma=ma->next) { + for (ma = idev->mc_list; ma; ma = ma->next) { if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { spin_lock(&ma->mca_lock); if (del_timer(&ma->mca_timer)) @@ -1512,7 +1517,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) struct ip6_sf_list *psf; int scount = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -1726,7 +1731,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } first = 1; psf_prev = NULL; - for (psf=*psf_list; psf; psf=psf_next) { + for (psf = *psf_list; psf; psf = psf_next) { struct in6_addr *psrc; psf_next = psf->sf_next; @@ -1805,7 +1810,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) read_lock_bh(&idev->lock); if (!pmc) { - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; spin_lock_bh(&pmc->mca_lock); @@ -1838,7 +1843,7 @@ static void mld_clear_zeros(struct ip6_sf_list **ppsf) struct ip6_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf=*ppsf; psf; psf = psf_next) { + for (psf = *ppsf; psf; psf = psf_next) { psf_next = psf->sf_next; if (psf->sf_crcount == 0) { if (psf_prev) @@ -1862,7 +1867,7 @@ static void mld_send_cr(struct inet6_dev *idev) /* deleted MCA's */ pmc_prev = NULL; - for (pmc=idev->mc_tomb; pmc; pmc=pmc_next) { + for (pmc = idev->mc_tomb; pmc; pmc = pmc_next) { pmc_next = pmc->next; if (pmc->mca_sfmode == MCAST_INCLUDE) { type = MLD2_BLOCK_OLD_SOURCES; @@ -1895,7 +1900,7 @@ static void mld_send_cr(struct inet6_dev *idev) spin_unlock(&idev->mc_lock); /* change recs */ - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) { type = MLD2_BLOCK_OLD_SOURCES; @@ -2032,7 +2037,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev) skb = NULL; read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_CHANGE_TO_EXCLUDE; @@ -2077,7 +2082,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; @@ -2118,7 +2123,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } @@ -2138,7 +2143,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfcount[sfmode]--; } err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]); changerec |= rv > 0; @@ -2154,7 +2159,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfmode = MCAST_INCLUDE; pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf=pmc->mca_sources; psf; psf = psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; mld_ifc_event(pmc->idev); } else if (sf_setstate(pmc) || changerec) @@ -2173,7 +2178,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, struct ip6_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; @@ -2198,7 +2203,7 @@ static void sf_markstate(struct ifmcaddr6 *pmc) struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) if (pmc->mca_sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && @@ -2215,7 +2220,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) int new_in, rv; rv = 0; - for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -2225,8 +2230,8 @@ static int sf_setstate(struct ifmcaddr6 *pmc) if (!psf->sf_oldin) { struct ip6_sf_list *prev = NULL; - for (dpsf=pmc->mca_tomb; dpsf; - dpsf=dpsf->sf_next) { + for (dpsf = pmc->mca_tomb; dpsf; + dpsf = dpsf->sf_next) { if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; @@ -2248,7 +2253,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf=pmc->mca_tomb; dpsf; dpsf=dpsf->sf_next) + for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next) if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; @@ -2282,7 +2287,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; read_lock_bh(&idev->lock); - for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + for (pmc = idev->mc_list; pmc; pmc = pmc->next) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } @@ -2298,7 +2303,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!delta) pmc->mca_sfcount[sfmode]++; err = 0; - for (i=0; i<sfcount; i++) { + for (i = 0; i < sfcount; i++) { err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; @@ -2308,7 +2313,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!delta) pmc->mca_sfcount[sfmode]--; - for (j=0; j<i; j++) + for (j = 0; j < i; j++) ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) { struct ip6_sf_list *psf; @@ -2322,7 +2327,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf=pmc->mca_sources; psf; psf = psf->sf_next) + for (psf = pmc->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; mld_ifc_event(idev); } else if (sf_setstate(pmc)) @@ -2336,12 +2341,12 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; - for (psf=pmc->mca_tomb; psf; psf=nextpsf) { + for (psf = pmc->mca_tomb; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } pmc->mca_tomb = NULL; - for (psf=pmc->mca_sources; psf; psf=nextpsf) { + for (psf = pmc->mca_sources; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } @@ -2380,7 +2385,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and a write lock on ipv6_sk_mc_lock, + /* callers have the socket lock and rtnl lock * so no other readers or writers of iml or its sflist */ if (!iml->sflist) { @@ -2485,13 +2490,21 @@ void ipv6_mc_down(struct inet6_dev *idev) mld_gq_stop_timer(idev); mld_dad_stop_timer(idev); - for (i = idev->mc_list; i; i=i->next) + for (i = idev->mc_list; i; i = i->next) igmp6_group_dropped(i); read_unlock_bh(&idev->lock); mld_clear_delrec(idev); } +static void ipv6_mc_reset(struct inet6_dev *idev) +{ + idev->mc_qrv = sysctl_mld_qrv; + idev->mc_qi = MLD_QI_DEFAULT; + idev->mc_qri = MLD_QRI_DEFAULT; + idev->mc_v1_seen = 0; + idev->mc_maxdelay = unsolicited_report_interval(idev); +} /* Device going up */ @@ -2502,7 +2515,8 @@ void ipv6_mc_up(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ read_lock_bh(&idev->lock); - for (i = idev->mc_list; i; i=i->next) + ipv6_mc_reset(idev); + for (i = idev->mc_list; i; i = i->next) igmp6_group_added(i); read_unlock_bh(&idev->lock); } @@ -2522,13 +2536,7 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) (unsigned long)idev); setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, (unsigned long)idev); - - idev->mc_qrv = MLD_QRV_DEFAULT; - idev->mc_qi = MLD_QI_DEFAULT; - idev->mc_qri = MLD_QRI_DEFAULT; - - idev->mc_maxdelay = unsolicited_report_interval(idev); - idev->mc_v1_seen = 0; + ipv6_mc_reset(idev); write_unlock_bh(&idev->lock); } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index db9b6cbc9db3..f61429d391d3 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -336,11 +336,10 @@ static void mip6_destopt_destroy(struct xfrm_state *x) { } -static const struct xfrm_type mip6_destopt_type = -{ +static const struct xfrm_type mip6_destopt_type = { .description = "MIP6DESTOPT", .owner = THIS_MODULE, - .proto = IPPROTO_DSTOPTS, + .proto = IPPROTO_DSTOPTS, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, .init_state = mip6_destopt_init_state, .destructor = mip6_destopt_destroy, @@ -469,11 +468,10 @@ static void mip6_rthdr_destroy(struct xfrm_state *x) { } -static const struct xfrm_type mip6_rthdr_type = -{ +static const struct xfrm_type mip6_rthdr_type = { .description = "MIP6RT", .owner = THIS_MODULE, - .proto = IPPROTO_ROUTING, + .proto = IPPROTO_ROUTING, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, .init_state = mip6_rthdr_init_state, .destructor = mip6_rthdr_destroy, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 339078f95d1b..4cb45c1079a2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -175,7 +175,7 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, type = cur->nd_opt_type; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); - } while(cur < end && cur->nd_opt_type != type); + } while (cur < end && cur->nd_opt_type != type); return cur <= end && cur->nd_opt_type == type ? cur : NULL; } @@ -192,7 +192,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, return NULL; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); - } while(cur < end && !ndisc_is_useropt(cur)); + } while (cur < end && !ndisc_is_useropt(cur)); return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; } @@ -284,7 +284,6 @@ int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, } return -EINVAL; } - EXPORT_SYMBOL(ndisc_mc_map); static u32 ndisc_hash(const void *pkey, @@ -296,7 +295,7 @@ static u32 ndisc_hash(const void *pkey, static int ndisc_constructor(struct neighbour *neigh) { - struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key; + struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; struct net_device *dev = neigh->dev; struct inet6_dev *in6_dev; struct neigh_parms *parms; @@ -344,7 +343,7 @@ static int ndisc_constructor(struct neighbour *neigh) static int pndisc_constructor(struct pneigh_entry *n) { - struct in6_addr *addr = (struct in6_addr*)&n->key; + struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; @@ -357,7 +356,7 @@ static int pndisc_constructor(struct pneigh_entry *n) static void pndisc_destructor(struct pneigh_entry *n) { - struct in6_addr *addr = (struct in6_addr*)&n->key; + struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; @@ -1065,7 +1064,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) int optlen; unsigned int pref = 0; - __u8 * opt = (__u8 *)(ra_msg + 1); + __u8 *opt = (__u8 *)(ra_msg + 1); optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); @@ -1319,7 +1318,7 @@ skip_linkparms: continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; - rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, + rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); } } @@ -1352,7 +1351,7 @@ skip_routeinfo: __be32 n; u32 mtu; - memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); + memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 2812816aabdc..6af874fc187f 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -40,18 +40,13 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -config NFT_CHAIN_NAT_IPV6 - depends on NF_TABLES_IPV6 - depends on NF_NAT_IPV6 && NFT_NAT - tristate "IPv6 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv6 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. +config NF_REJECT_IPV6 + tristate "IPv6 packet rejection" + default m if NETFILTER_ADVANCED=n config NFT_REJECT_IPV6 depends on NF_TABLES_IPV6 + select NF_REJECT_IPV6 default NFT_REJECT tristate @@ -70,6 +65,34 @@ config NF_NAT_IPV6 forms of full Network Address Port Translation. This can be controlled by iptables or nft. +if NF_NAT_IPV6 + +config NFT_CHAIN_NAT_IPV6 + depends on NF_TABLES_IPV6 + tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. + +config NF_NAT_MASQUERADE_IPV6 + tristate "IPv6 masquerade support" + help + This is the kernel functionality to provide NAT in the masquerade + flavour (automatic source address selection) for IPv6. + +config NFT_MASQ_IPV6 + tristate "IPv6 masquerade support for nf_tables" + depends on NF_TABLES_IPV6 + depends on NFT_MASQ + select NF_NAT_MASQUERADE_IPV6 + help + This is the expression that provides IPv4 masquerading support for + nf_tables. + +endif # NF_NAT_IPV6 + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 @@ -190,6 +213,7 @@ config IP6_NF_FILTER config IP6_NF_TARGET_REJECT tristate "REJECT target support" depends on IP6_NF_FILTER + select NF_REJECT_IPV6 default m if NETFILTER_ADVANCED=n help The REJECT target allows a filtering rule to specify that an ICMPv6 @@ -260,6 +284,7 @@ if IP6_NF_NAT config IP6_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" + select NF_NAT_MASQUERADE_IPV6 help Masquerading is a special case of NAT: all outgoing connections are changed to seem to come from a particular interface's address, and diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index c3d3286db4bb..fbb25f01143c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o +obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o @@ -26,11 +27,15 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o +# reject +obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o + # nf_tables obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o +obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 3e4e92d5e157..7f9f45d829d2 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -19,33 +19,12 @@ #include <net/netfilter/nf_nat.h> #include <net/addrconf.h> #include <net/ipv6.h> +#include <net/netfilter/ipv6/nf_nat_masquerade.h> static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct nf_nat_range *range = par->targinfo; - enum ip_conntrack_info ctinfo; - struct in6_addr src; - struct nf_conn *ct; - struct nf_nat_range newrange; - - ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); - - if (ipv6_dev_get_saddr(dev_net(par->out), par->out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) - return NF_DROP; - - nfct_nat(ct)->masq_index = par->out->ifindex; - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = src; - newrange.max_addr.in6 = src; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); + return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out); } static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) @@ -57,48 +36,6 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) return 0; } -static int device_cmp(struct nf_conn *ct, void *ifindex) -{ - const struct nf_conn_nat *nat = nfct_nat(ct); - - if (!nat) - return 0; - if (nf_ct_l3num(ct) != NFPROTO_IPV6) - return 0; - return nat->masq_index == (int)(long)ifindex; -} - -static int masq_device_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - const struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net *net = dev_net(dev); - - if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); - - return NOTIFY_DONE; -} - -static struct notifier_block masq_dev_notifier = { - .notifier_call = masq_device_event, -}; - -static int masq_inet_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct inet6_ifaddr *ifa = ptr; - struct netdev_notifier_info info; - - netdev_notifier_info_init(&info, ifa->idev->dev); - return masq_device_event(this, event, &info); -} - -static struct notifier_block masq_inet_notifier = { - .notifier_call = masq_inet_event, -}; - static struct xt_target masquerade_tg6_reg __read_mostly = { .name = "MASQUERADE", .family = NFPROTO_IPV6, @@ -115,17 +52,14 @@ static int __init masquerade_tg6_init(void) int err; err = xt_register_target(&masquerade_tg6_reg); - if (err == 0) { - register_netdevice_notifier(&masq_dev_notifier); - register_inet6addr_notifier(&masq_inet_notifier); - } + if (err == 0) + nf_nat_masquerade_ipv6_register_notifier(); return err; } static void __exit masquerade_tg6_exit(void) { - unregister_inet6addr_notifier(&masq_inet_notifier); - unregister_netdevice_notifier(&masq_dev_notifier); + nf_nat_masquerade_ipv6_unregister_notifier(); xt_unregister_target(&masquerade_tg6_reg); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 387d8b8fc18d..b0634ac996b7 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -30,222 +30,57 @@ static const struct xt_table nf_nat_ipv6_table = { .af = NFPROTO_IPV6, }; -static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) -{ - /* Force range to this IP; let proto decide mapping for - * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). - */ - struct nf_nat_range range; - - range.flags = 0; - pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, - HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); - - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); -} - -static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - struct nf_conn *ct) +static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - unsigned int ret; - ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); - if (ret == NF_ACCEPT) { - if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) - ret = alloc_null_binding(ct, hooknum); - } - return ret; + return ip6t_do_table(skb, ops->hooknum, in, out, net->ipv6.ip6table_nat); } -static unsigned int -nf_nat_ipv6_fn(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); - __be16 frag_off; - int hdrlen; - u8 nexthdr; - - ct = nf_ct_get(skb, &ctinfo); - /* Can't track? It's not due to stress, or conntrack would - * have dropped it. Hence it's the user's responsibilty to - * packet filter it out, or implement conntrack/NAT for that - * protocol. 8) --RR - */ - if (!ct) - return NF_ACCEPT; - - /* Don't try to NAT if this packet is not conntracked */ - if (nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) - return NF_ACCEPT; - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - nexthdr = ipv6_hdr(skb)->nexthdr; - hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - ops->hooknum, - hdrlen)) - return NF_DROP; - else - return NF_ACCEPT; - } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ - case IP_CT_NEW: - /* Seen it before? This can happen for loopback, retrans, - * or local packets. - */ - if (!nf_nat_initialized(ct, maniptype)) { - unsigned int ret; - - ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); - if (ret != NF_ACCEPT) - return ret; - } else { - pr_debug("Already setup manip %s for ct %p\n", - maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", - ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) - goto oif_changed; - } - break; - - default: - /* ESTABLISHED */ - NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) - goto oif_changed; - } - - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); - -oif_changed: - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; + return nf_nat_ipv6_fn(ops, skb, in, out, ip6table_nat_do_chain); } -static unsigned int -nf_nat_ipv6_in(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - unsigned int ret; - struct in6_addr daddr = ipv6_hdr(skb)->daddr; - - ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) - skb_dst_drop(skb); - - return ret; + return nf_nat_ipv6_in(ops, skb, in, out, ip6table_nat_do_chain); } -static unsigned int -nf_nat_ipv6_out(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { -#ifdef CONFIG_XFRM - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - int err; -#endif - unsigned int ret; - - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) - return NF_ACCEPT; - - ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3) || - (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } - } -#endif - return ret; + return nf_nat_ipv6_out(ops, skb, in, out, ip6table_nat_do_chain); } -static unsigned int -nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - int err; - - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) - return NF_ACCEPT; - - ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, - &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(skb); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#ifdef CONFIG_XFRM - else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && - ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(skb, AF_INET6); - if (err < 0) - ret = NF_DROP_ERR(err); - } -#endif - } - return ret; + return nf_nat_ipv6_local_fn(ops, skb, in, out, ip6table_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { /* Before packet filtering, change destination */ { - .hook = nf_nat_ipv6_in, + .hook = ip6table_nat_in, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, @@ -253,7 +88,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv6_out, + .hook = ip6table_nat_out, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, @@ -261,7 +96,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { }, /* Before packet filtering, change destination */ { - .hook = nf_nat_ipv6_local_fn, + .hook = ip6table_nat_local_fn, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, @@ -269,7 +104,7 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv6_fn, + .hook = ip6table_nat_fn, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 7b9a748c6bac..e70382e4dfb5 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -40,7 +40,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index fc8e49b2ff3e..c5812e1c1ffb 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -261,6 +261,205 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); +unsigned int +nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + ops->hooknum, + hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = do_chain(ops, skb, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + + if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) + break; + + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } else { + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + goto oif_changed; + } + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + goto oif_changed; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn); + +unsigned int +nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct)) +{ + unsigned int ret; + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv6_in); + +unsigned int +nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + int err; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) { + err = nf_xfrm_me_harder(skb, AF_INET6); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } +#endif + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv6_out); + +unsigned int +nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + unsigned int (*do_chain)(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + int err; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + err = ip6_route_me_harder(skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) { + err = nf_xfrm_me_harder(skb, AF_INET6); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#endif + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_ipv6_local_fn); + static int __init nf_nat_l3proto_ipv6_init(void) { int err; diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c new file mode 100644 index 000000000000..7745609665cd --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/atomic.h> +#include <linux/netdevice.h> +#include <linux/ipv6.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_nat.h> +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/netfilter/ipv6/nf_nat_masquerade.h> + +unsigned int +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + const struct net_device *out) +{ + enum ip_conntrack_info ctinfo; + struct in6_addr src; + struct nf_conn *ct; + struct nf_nat_range newrange; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + if (ipv6_dev_get_saddr(dev_net(out), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) + return NF_DROP; + + nfct_nat(ct)->masq_index = out->ifindex; + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = src; + newrange.max_addr.in6 = src; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); + +static int device_cmp(struct nf_conn *ct, void *ifindex) +{ + const struct nf_conn_nat *nat = nfct_nat(ct); + + if (!nat) + return 0; + if (nf_ct_l3num(ct) != NFPROTO_IPV6) + return 0; + return nat->masq_index == (int)(long)ifindex; +} + +static int masq_device_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + + if (event == NETDEV_DOWN) + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex, 0, 0); + + return NOTIFY_DONE; +} + +static struct notifier_block masq_dev_notifier = { + .notifier_call = masq_device_event, +}; + +static int masq_inet_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, ifa->idev->dev); + return masq_device_event(this, event, &info); +} + +static struct notifier_block masq_inet_notifier = { + .notifier_call = masq_inet_event, +}; + +static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0); + +void nf_nat_masquerade_ipv6_register_notifier(void) +{ + /* check if the notifier is already set */ + if (atomic_inc_return(&masquerade_notifier_refcount) > 1) + return; + + register_netdevice_notifier(&masq_dev_notifier); + register_inet6addr_notifier(&masq_inet_notifier); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier); + +void nf_nat_masquerade_ipv6_unregister_notifier(void) +{ + /* check if the notifier still has clients */ + if (atomic_dec_return(&masquerade_notifier_refcount) > 0) + return; + + unregister_inet6addr_notifier(&masq_inet_notifier); + unregister_netdevice_notifier(&masq_dev_notifier); +} +EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c new file mode 100644 index 000000000000..5f5f0438d74d --- /dev/null +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -0,0 +1,163 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/ip6_fib.h> +#include <net/ip6_checksum.h> +#include <linux/netfilter_ipv6.h> + +void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; + struct dst_entry *dst = NULL; + u8 proto; + __be16 frag_off; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + pr_debug("Cannot get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP, " + "or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + pr_debug("RST is set\n"); + return; + } + + /* Check checksum. */ + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { + pr_debug("TCP checksum is invalid\n"); + return; + } + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + + skb_put(nskb, sizeof(struct ipv6hdr)); + skb_reset_network_header(nskb); + ip6h = ipv6_hdr(nskb); + ip6_flow_hdr(ip6h, tclass, 0); + ip6h->hop_limit = ip6_dst_hoplimit(dst); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->saddr = oip6h->daddr; + ip6h->daddr = oip6h->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, + &ipv6_hdr(nskb)->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial(tcph, + sizeof(struct tcphdr), 0)); + + nf_ct_attach(nskb, oldskb); + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + /* If we use ip6_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + nskb->protocol = htons(ETH_P_IPV6); + ip6h->payload_len = htons(sizeof(struct tcphdr)); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + return; + dev_queue_xmit(nskb); + } else +#endif + ip6_local_out(nskb); +} +EXPORT_SYMBOL_GPL(nf_send_reset6); diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index d189fcb437fe..1c4b75dd425b 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -24,144 +24,53 @@ #include <net/netfilter/nf_nat_l3proto.h> #include <net/ipv6.h> -/* - * IPv6 NAT chains - */ - -static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) { - enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); - __be16 frag_off; - int hdrlen; - u8 nexthdr; struct nft_pktinfo pkt; - unsigned int ret; - - if (ct == NULL || nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) - return NF_ACCEPT; - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED + IP_CT_IS_REPLY: - nexthdr = ipv6_hdr(skb)->nexthdr; - hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - ops->hooknum, - hdrlen)) - return NF_DROP; - else - return NF_ACCEPT; - } - /* Fall through */ - case IP_CT_NEW: - if (nf_nat_initialized(ct, maniptype)) - break; - - nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); - ret = nft_do_chain(&pkt, ops); - if (ret != NF_ACCEPT) - return ret; - if (!nf_nat_initialized(ct, maniptype)) { - ret = nf_nat_alloc_null_binding(ct, ops->hooknum); - if (ret != NF_ACCEPT) - return ret; - } - default: - break; - } + nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + return nft_do_chain(&pkt, ops); } -static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct in6_addr daddr = ipv6_hdr(skb)->daddr; - unsigned int ret; - - ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) - skb_dst_drop(skb); - - return ret; + return nf_nat_ipv6_fn(ops, skb, in, out, nft_nat_do_chain); } -static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - enum ip_conntrack_info ctinfo __maybe_unused; - const struct nf_conn *ct __maybe_unused; - unsigned int ret; - - ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, - &ct->tuplehash[!dir].tuple.dst.u3) || - (ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all)) - if (nf_xfrm_me_harder(skb, AF_INET6) < 0) - ret = NF_DROP; - } -#endif - return ret; + return nf_nat_ipv6_in(ops, skb, in, out, nft_nat_do_chain); } -static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - enum ip_conntrack_info ctinfo; - const struct nf_conn *ct; - unsigned int ret; - - ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + return nf_nat_ipv6_out(ops, skb, in, out, nft_nat_do_chain); +} - if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, - &ct->tuplehash[!dir].tuple.src.u3)) { - if (ip6_route_me_harder(skb)) - ret = NF_DROP; - } -#ifdef CONFIG_XFRM - else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) - if (nf_xfrm_me_harder(skb, AF_INET6)) - ret = NF_DROP; -#endif - } - return ret; +static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return nf_nat_ipv6_local_fn(ops, skb, in, out, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv6 = { @@ -174,10 +83,10 @@ static const struct nf_chain_type nft_chain_nat_ipv6 = { (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .hooks = { - [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, - [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, - [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, - [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, + [NF_INET_PRE_ROUTING] = nft_nat_ipv6_in, + [NF_INET_POST_ROUTING] = nft_nat_ipv6_out, + [NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn, + [NF_INET_LOCAL_IN] = nft_nat_ipv6_fn, }, }; diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c new file mode 100644 index 000000000000..556262f40761 --- /dev/null +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nft_masq.h> +#include <net/netfilter/ipv6/nf_nat_masquerade.h> + +static void nft_masq_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_masq *priv = nft_expr_priv(expr); + struct nf_nat_range range; + unsigned int verdict; + + range.flags = priv->flags; + + verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); + + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_masq_ipv6_type; +static const struct nft_expr_ops nft_masq_ipv6_ops = { + .type = &nft_masq_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), + .eval = nft_masq_ipv6_eval, + .init = nft_masq_init, + .dump = nft_masq_dump, +}; + +static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "masq", + .ops = &nft_masq_ipv6_ops, + .policy = nft_masq_policy, + .maxattr = NFTA_MASQ_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_masq_ipv6_module_init(void) +{ + int ret; + + ret = nft_register_expr(&nft_masq_ipv6_type); + if (ret < 0) + return ret; + + nf_nat_masquerade_ipv6_register_notifier(); + + return ret; +} + +static void __exit nft_masq_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_masq_ipv6_type); + nf_nat_masquerade_ipv6_unregister_notifier(); +} + +module_init(nft_masq_ipv6_module_init); +module_exit(nft_masq_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 5ec867e4a8b7..fc24c390af05 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -35,7 +35,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) if (found_rhdr) return offset; break; - default : + default: return offset; } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 2d6f860e5c1e..1752cd0b4882 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -8,7 +8,7 @@ * except it reports the sockets in the INET6 address family. * * Authors: David S. Miller (davem@caip.rutgers.edu) - * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index e048cf1bb6a2..e3770abe688a 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -51,6 +51,7 @@ EXPORT_SYMBOL(inet6_del_protocol); #endif const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly; +EXPORT_SYMBOL(inet6_offloads); int inet6_add_offload(const struct net_offload *prot, unsigned char protocol) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 39d44226e402..896af8807979 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -889,7 +889,7 @@ back_from_confirm: else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, - len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, + len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, msg->msg_flags, dontfrag); if (err) @@ -902,7 +902,7 @@ done: dst_release(dst); out: fl6_sock_release(flowlabel); - return err<0?err:len; + return err < 0 ? err : len; do_confirm: dst_confirm(dst); if (!(msg->msg_flags & MSG_PROBE) || len) @@ -1045,7 +1045,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val, len; - if (get_user(len,optlen)) + if (get_user(len, optlen)) return -EFAULT; switch (optname) { @@ -1069,7 +1069,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval,&val,len)) + if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index c6557d9f7808..1a157ca2ebc1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -62,13 +62,12 @@ static const char ip6_frag_cache_name[] = "ip6-frags"; -struct ip6frag_skb_cb -{ +struct ip6frag_skb_cb { struct inet6_skb_parm h; int offset; }; -#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) +#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { @@ -289,7 +288,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, goto found; } prev = NULL; - for(next = fq->q.fragments; next != NULL; next = next->next) { + for (next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -529,7 +528,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ - if (hdr->payload_len==0) + if (hdr->payload_len == 0) goto fail_hdr; if (!pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -575,8 +574,7 @@ fail_hdr: return -1; } -static const struct inet6_protocol frag_protocol = -{ +static const struct inet6_protocol frag_protocol = { .handler = ipv6_frag_rcv, .flags = INET6_PROTO_NOPOLICY, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bafde82324c5..a318dd89b6d9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -812,7 +812,7 @@ out: } -struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, +struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags) { return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); @@ -842,7 +842,6 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, return NULL; } - EXPORT_SYMBOL(rt6_lookup); /* ip6_ins_rt is called with FREE table->tb6_lock. @@ -1023,7 +1022,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, +struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { int flags = 0; @@ -1040,7 +1039,6 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } - EXPORT_SYMBOL(ip6_route_output); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) @@ -1145,7 +1143,7 @@ static void ip6_link_failure(struct sk_buff *skb) static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { - struct rt6_info *rt6 = (struct rt6_info*)dst; + struct rt6_info *rt6 = (struct rt6_info *)dst; dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { @@ -1920,7 +1918,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, return NULL; read_lock_bh(&table->tb6_lock); - fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); + fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); if (!fn) goto out; @@ -1979,7 +1977,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev return NULL; read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) @@ -2064,7 +2062,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) struct in6_rtmsg rtmsg; int err; - switch(cmd) { + switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -2187,7 +2185,7 @@ int ip6_route_get_saddr(struct net *net, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); + struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt); int err = 0; if (rt->rt6i_prefsrc.plen) *saddr = rt->rt6i_prefsrc.addr; @@ -2482,7 +2480,7 @@ beginning: return last_err; } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct fib6_config cfg; int err; @@ -2497,7 +2495,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) return ip6_route_del(&cfg); } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct fib6_config cfg; int err; @@ -2689,7 +2687,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, 0, NLM_F_MULTI); } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6163f851dc01..6eab37cf5345 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -812,9 +812,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, const struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - unsigned int max_headroom; /* The extra header space needed */ + struct rtable *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; @@ -822,6 +822,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, int addr_type; u8 ttl; int err; + u8 protocol = IPPROTO_IPV6; + int t_hlen = tunnel->hlen + sizeof(struct iphdr); if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; @@ -911,8 +913,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } + skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); + if (IS_ERR(skb)) { + ip_rt_put(rt); + goto out; + } + if (df) { - mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - t_hlen; if (mtu < 68) { dev->stats.collisions++; @@ -947,7 +955,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, /* * Okay, now see if we can stuff it in the buffer as-is. */ - max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr); + max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { @@ -969,14 +977,15 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); - if (IS_ERR(skb)) { + if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) { ip_rt_put(rt); - goto out; + goto tx_error; } + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, - IPPROTO_IPV6, tos, ttl, df, + protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; @@ -999,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (IS_ERR(skb)) goto out; + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; out: @@ -1059,8 +1070,10 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); - dev->mtu = tdev->mtu - sizeof(struct iphdr); + dev->mtu = tdev->mtu - t_hlen; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } @@ -1123,7 +1136,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, #endif static int -ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; @@ -1307,7 +1320,10 @@ done: static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen) return -EINVAL; dev->mtu = new_mtu; return 0; @@ -1338,14 +1354,17 @@ static void ipip6_dev_free(struct net_device *dev) static void ipip6_tunnel_setup(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + dev->netdev_ops = &ipip6_netdev_ops; - dev->destructor = ipip6_dev_free; + dev->destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); - dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; dev->flags = IFF_NOARP; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + netif_keep_dst(dev); dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; @@ -1466,6 +1485,40 @@ static void ipip6_netlink_parms(struct nlattr *data[], } +/* This function returns true when ENCAP attributes are present in the nl msg */ +static bool ipip6_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *ipencap) +{ + bool ret = false; + + memset(ipencap, 0, sizeof(*ipencap)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_ENCAP_TYPE]) { + ret = true; + ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); + } + + if (data[IFLA_IPTUN_ENCAP_FLAGS]) { + ret = true; + ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); + } + + if (data[IFLA_IPTUN_ENCAP_SPORT]) { + ret = true; + ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]); + } + + if (data[IFLA_IPTUN_ENCAP_DPORT]) { + ret = true; + ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]); + } + + return ret; +} + #ifdef CONFIG_IPV6_SIT_6RD /* This function returns true when 6RD attributes are present in the nl msg */ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], @@ -1509,12 +1562,20 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, { struct net *net = dev_net(dev); struct ip_tunnel *nt; + struct ip_tunnel_encap ipencap; #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif int err; nt = netdev_priv(dev); + + if (ipip6_netlink_encap_parms(data, &ipencap)) { + err = ip_tunnel_encap_setup(nt, &ipencap); + if (err < 0) + return err; + } + ipip6_netlink_parms(data, &nt->parms); if (ipip6_tunnel_locate(net, &nt->parms, 0)) @@ -1537,15 +1598,23 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; + struct ip_tunnel_encap ipencap; struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif + int err; if (dev == sitn->fb_tunnel_dev) return -EINVAL; + if (ipip6_netlink_encap_parms(data, &ipencap)) { + err = ip_tunnel_encap_setup(t, &ipencap); + if (err < 0) + return err; + } + ipip6_netlink_parms(data, &p); if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || @@ -1599,6 +1668,14 @@ static size_t ipip6_get_size(const struct net_device *dev) /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */ nla_total_size(2) + #endif + /* IFLA_IPTUN_ENCAP_TYPE */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_FLAGS */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_SPORT */ + nla_total_size(2) + + /* IFLA_IPTUN_ENCAP_DPORT */ + nla_total_size(2) + 0; } @@ -1630,6 +1707,16 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; #endif + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, + tunnel->encap.type) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT, + tunnel->encap.sport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, + tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, + tunnel->encap.dport)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -1651,6 +1738,10 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 }, [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 }, #endif + [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, }; static void ipip6_dellink(struct net_device *dev, struct list_head *head) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 83cea1d39466..9a2838e93cc5 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,7 +24,7 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS]; +static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; /* RFC 2460, Section 8.3: * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] @@ -203,7 +203,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ir_num = ntohs(th->dest); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - if (ipv6_opt_accepted(sk, skb) || + if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 0c56c93619e0..c5c10fafcfe2 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,6 +16,8 @@ #include <net/addrconf.h> #include <net/inet_frag.h> +static int one = 1; + static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", @@ -63,6 +65,14 @@ static struct ctl_table ipv6_rotable[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "mld_qrv", + .data = &sysctl_mld_qrv, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 03a5d1ed3340..cf2e45ab2fa4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -92,13 +92,16 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - const struct rt6_info *rt = (const struct rt6_info *)dst; - dst_hold(dst); - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - if (rt->rt6i_node) - inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; + if (dst) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; + } } static void tcp_v6_hash(struct sock *sk) @@ -737,8 +740,9 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); - if (!TCP_SKB_CB(skb)->when && - (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || + if (!TCP_SKB_CB(skb)->tcp_tw_isn && + (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || np->repflow)) { atomic_inc(&skb->users); @@ -1363,7 +1367,7 @@ ipv6_pktoptions: np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (np->repflow) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); - if (ipv6_opt_accepted(sk, opt_skb)) { + if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); } else { @@ -1407,11 +1411,19 @@ static int tcp_v6_rcv(struct sk_buff *skb) th = tcp_hdr(skb); hdr = ipv6_hdr(skb); + /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() + * barrier() makes sure compiler wont play fool^Waliasing games. + */ + memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), + sizeof(struct inet6_skb_parm)); + barrier(); + TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->when = 0; + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 01b0ff9a0c2c..c1ab77105b4c 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -15,54 +15,17 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static int tcp_v6_gso_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - ipv6h = ipv6_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); - return 0; -} - static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct ipv6hdr *iph = skb_gro_network_header(skb); - __wsum wsum; - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (NAPI_GRO_CB(skb)->flush) - goto skip_csum; - - wsum = NAPI_GRO_CB(skb)->csum; - - switch (skb->ip_summed) { - case CHECKSUM_NONE: - wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), - wsum); - - /* fall through */ - - case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - wsum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - + if (!NAPI_GRO_CB(skb)->flush && + skb_gro_checksum_validate(skb, IPPROTO_TCP, + ip6_gro_compute_pseudo)) { NAPI_GRO_CB(skb)->flush = 1; return NULL; } -skip_csum: return tcp_gro_receive(head, skb); } @@ -78,10 +41,32 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff) return tcp_gro_complete(skb); } +struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return ERR_PTR(-EINVAL); + + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + /* Set up pseudo header, usually expect stack to have done + * this. + */ + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); + } + + return tcp_gso_segment(skb, features); +} static const struct net_offload tcpv6_offload = { .callbacks = { - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_gso_segment, + .gso_segment = tcp6_gso_segment, .gro_receive = tcp6_gro_receive, .gro_complete = tcp6_gro_complete, }, diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 2c4e4c5c7614..3c758007b327 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> - * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> */ #define pr_fmt(fmt) "IPv6: " fmt @@ -64,7 +64,6 @@ err: return ret; } - EXPORT_SYMBOL(xfrm6_tunnel_register); int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) @@ -92,7 +91,6 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) return ret; } - EXPORT_SYMBOL(xfrm6_tunnel_deregister); #define for_each_tunnel_rcu(head, handler) \ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4836af8f582d..f6ba535b6feb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -243,7 +243,7 @@ begin: goto exact_match; } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -323,7 +323,7 @@ begin: } } else if (score == badness && reuseport) { matches++; - if (((u64)hash * matches) >> 32 == 0) + if (reciprocal_scale(hash, matches) == 0) result = sk; hash = next_pseudo_random32(hash); } @@ -373,8 +373,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup); /* - * This should be easy, if there is something there we - * return it, otherwise we block. + * This should be easy, if there is something there we + * return it, otherwise we block. */ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, @@ -530,7 +530,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; const struct in6_addr *daddr = &hdr->daddr; - struct udphdr *uh = (struct udphdr*)(skb->data+offset); + struct udphdr *uh = (struct udphdr *)(skb->data+offset); struct sock *sk; int err; struct net *net = dev_net(skb->dev); @@ -596,7 +596,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, - u8 code, int offset, __be32 info ) + u8 code, int offset, __be32 info) { __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } @@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; } + if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_compute_pseudo); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -960,10 +964,10 @@ static void udp_v6_flush_pending_frames(struct sock *sk) } /** - * udp6_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on - * @skb: sk_buff containing the filled-in UDP header - * (checksum field must be zeroed out) + * udp6_hwcsum_outgoing - handle outgoing HW checksumming + * @sk: socket we are sending on + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) */ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, const struct in6_addr *saddr, @@ -1294,7 +1298,7 @@ do_append_data: getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, - (struct rt6_info*)dst, + (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) udp_v6_flush_pending_frames(sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 0ae3d98f83e0..6b8f543f6ac6 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -10,34 +10,13 @@ * UDPv6 GSO support */ #include <linux/skbuff.h> +#include <linux/netdevice.h> #include <net/protocol.h> #include <net/ipv6.h> #include <net/udp.h> #include <net/ip6_checksum.h> #include "ip6_offload.h" -static int udp6_ufo_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) - return -EINVAL; - - if (likely(!skb->encapsulation)) { - ipv6h = ipv6_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - } - - return 0; -} - static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -48,7 +27,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u8 *packet_start, *prevhdr; u8 nexthdr; u8 frag_hdr_sz = sizeof(struct frag_hdr); - int offset; __wsum csum; int tnl_hlen; @@ -80,15 +58,29 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) - segs = skb_udp_tunnel_segment(skb, features); + segs = skb_udp_tunnel_segment(skb, features, true); else { + const struct ipv6hdr *ipv6h; + struct udphdr *uh; + + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + + uh = udp_hdr(skb); + ipv6h = ipv6_hdr(skb); + + uh->check = 0; + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = udp_v6_check(skb->len, &ipv6h->saddr, + &ipv6h->daddr, csum); + + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + skb->ip_summed = CHECKSUM_NONE; /* Check if there is enough headroom to insert fragment header. */ @@ -127,10 +119,52 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, out: return segs; } + +static struct sk_buff **udp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_gro_udphdr(skb); + + if (unlikely(!uh)) + goto flush; + + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip; + + if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo)) + goto flush; + else if (uh->check) + skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + ip6_gro_compute_pseudo); + +skip: + NAPI_GRO_CB(skb)->is_ipv6 = 1; + return udp_gro_receive(head, skb, uh); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; +} + +static int udp6_gro_complete(struct sk_buff *skb, int nhoff) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); + + if (uh->check) + uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, + &ipv6h->daddr, 0); + + return udp_gro_complete(skb, nhoff); +} + static const struct net_offload udpv6_offload = { .callbacks = { - .gso_send_check = udp6_ufo_send_check, .gso_segment = udp6_ufo_fragment, + .gro_receive = udp6_gro_receive, + .gro_complete = udp6_gro_complete, }, }; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index f8c3cf842f53..f48fbe4d16f5 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -3,8 +3,8 @@ * * Authors: * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * YOSHIFUJI Hideaki @USAGI * IPv6 support */ @@ -52,7 +52,6 @@ int xfrm6_rcv(struct sk_buff *skb) return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], 0); } - EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, @@ -142,5 +141,4 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, drop: return -1; } - EXPORT_SYMBOL(xfrm6_input_addr); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 433672d07d0b..ca3f29b98ae5 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -25,7 +25,6 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, { return ip6_find_1stfragopt(skb, prevhdr); } - EXPORT_SYMBOL(xfrm6_find_1stfragopt); static int xfrm6_local_dontfrag(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2a0bbda2c76a..ac49f84fe2c3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -3,11 +3,11 @@ * * Authors: * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> - * IPv6 support - * YOSHIFUJI Hideaki - * Split up af-specific portion + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * IPv6 support + * YOSHIFUJI Hideaki + * Split up af-specific portion * */ @@ -84,7 +84,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info*)dst; + struct rt6_info *rt = (struct rt6_info *)dst; if (rt->rt6i_node) path->path_cookie = rt->rt6i_node->fn_sernum; } @@ -97,7 +97,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct rt6_info *rt = (struct rt6_info*)xdst->route; + struct rt6_info *rt = (struct rt6_info *)xdst->route; xdst->u.dst.dev = dev; dev_hold(dev); @@ -296,7 +296,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, - .get_saddr = xfrm6_get_saddr, + .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, .init_dst = xfrm6_init_dst, @@ -319,9 +319,9 @@ static void xfrm6_policy_fini(void) static struct ctl_table xfrm6_policy_table[] = { { .procname = "xfrm6_gc_thresh", - .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, - .maxlen = sizeof(int), - .mode = 0644, + .data = &init_net.xfrm.xfrm6_dst_ops.gc_thresh, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_dointvec, }, { } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 3fc970135fc6..8a1f9c0d2a13 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -3,11 +3,11 @@ * * Authors: * Mitsuru KANDA @USAGI - * Kazunori MIYAZAWA @USAGI - * Kunihiro Ishiguro <kunihiro@ipinfusion.com> - * IPv6 support - * YOSHIFUJI Hideaki @USAGI - * Split up af-specific portion + * Kazunori MIYAZAWA @USAGI + * Kunihiro Ishiguro <kunihiro@ipinfusion.com> + * IPv6 support + * YOSHIFUJI Hideaki @USAGI + * Split up af-specific portion * */ @@ -45,10 +45,10 @@ xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, const xfrm_address_t *daddr, const xfrm_address_t *saddr) { x->id = tmpl->id; - if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) + if (ipv6_addr_any((struct in6_addr *)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); - if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) + if (ipv6_addr_any((struct in6_addr *)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 1c66465a42dd..5743044cd660 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Authors Mitsuru KANDA <mk@linux-ipv6.org> - * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> * * Based on net/ipv4/xfrm4_tunnel.c * @@ -110,7 +110,6 @@ __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr) rcu_read_unlock_bh(); return htonl(spi); } - EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup); static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi) @@ -187,7 +186,6 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) return htonl(spi); } - EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi); static void x6spi_destroy_rcu(struct rcu_head *head) |