diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
commit | 5518b69b76680a4f2df96b1deca260059db0c2de (patch) | |
tree | f33cd1519c8efb4590500f2f9617400be233238c /net/ipv4/ipmr.c | |
parent | 8ad06e56dcbc1984ef0ff8f6e3c19982c5809f73 (diff) | |
parent | 0e72582270c07850b92cac351c8b97d4f9c123b9 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Reasonably busy this cycle, but perhaps not as busy as in the 4.12
merge window:
1) Several optimizations for UDP processing under high load from
Paolo Abeni.
2) Support pacing internally in TCP when using the sch_fq packet
scheduler for this is not practical. From Eric Dumazet.
3) Support mutliple filter chains per qdisc, from Jiri Pirko.
4) Move to 1ms TCP timestamp clock, from Eric Dumazet.
5) Add batch dequeueing to vhost_net, from Jason Wang.
6) Flesh out more completely SCTP checksum offload support, from
Davide Caratti.
7) More plumbing of extended netlink ACKs, from David Ahern, Pablo
Neira Ayuso, and Matthias Schiffer.
8) Add devlink support to nfp driver, from Simon Horman.
9) Add RTM_F_FIB_MATCH flag to RTM_GETROUTE queries, from Roopa
Prabhu.
10) Add stack depth tracking to BPF verifier and use this information
in the various eBPF JITs. From Alexei Starovoitov.
11) Support XDP on qed device VFs, from Yuval Mintz.
12) Introduce BPF PROG ID for better introspection of installed BPF
programs. From Martin KaFai Lau.
13) Add bpf_set_hash helper for TC bpf programs, from Daniel Borkmann.
14) For loads, allow narrower accesses in bpf verifier checking, from
Yonghong Song.
15) Support MIPS in the BPF selftests and samples infrastructure, the
MIPS eBPF JIT will be merged in via the MIPS GIT tree. From David
Daney.
16) Support kernel based TLS, from Dave Watson and others.
17) Remove completely DST garbage collection, from Wei Wang.
18) Allow installing TCP MD5 rules using prefixes, from Ivan
Delalande.
19) Add XDP support to Intel i40e driver, from Björn Töpel
20) Add support for TC flower offload in nfp driver, from Simon
Horman, Pieter Jansen van Vuuren, Benjamin LaHaise, Jakub
Kicinski, and Bert van Leeuwen.
21) IPSEC offloading support in mlx5, from Ilan Tayari.
22) Add HW PTP support to macb driver, from Rafal Ozieblo.
23) Networking refcount_t conversions, From Elena Reshetova.
24) Add sock_ops support to BPF, from Lawrence Brako. This is useful
for tuning the TCP sockopt settings of a group of applications,
currently via CGROUPs"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1899 commits)
net: phy: dp83867: add workaround for incorrect RX_CTRL pin strap
dt-bindings: phy: dp83867: provide a workaround for incorrect RX_CTRL pin strap
cxgb4: Support for get_ts_info ethtool method
cxgb4: Add PTP Hardware Clock (PHC) support
cxgb4: time stamping interface for PTP
nfp: default to chained metadata prepend format
nfp: remove legacy MAC address lookup
nfp: improve order of interfaces in breakout mode
net: macb: remove extraneous return when MACB_EXT_DESC is defined
bpf: add missing break in for the TCP_BPF_SNDCWND_CLAMP case
bpf: fix return in load_bpf_file
mpls: fix rtm policy in mpls_getroute
net, ax25: convert ax25_cb.refcount from atomic_t to refcount_t
net, ax25: convert ax25_route.refcount from atomic_t to refcount_t
net, ax25: convert ax25_uid_assoc.refcount from atomic_t to refcount_t
net, sctp: convert sctp_ep_common.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_transport.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_chunk.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_datamsg.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_auth_bytes.refcnt from atomic_t to refcount_t
...
Diffstat (limited to 'net/ipv4/ipmr.c')
-rw-r--r-- | net/ipv4/ipmr.c | 266 |
1 files changed, 260 insertions, 6 deletions
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8ae425cad818..bb909f1d7537 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -109,6 +109,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); +static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); @@ -669,7 +670,8 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { - struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + struct nlmsghdr *nlh = skb_pull(skb, + sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); @@ -972,7 +974,8 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, /* Play the pending entries through our router */ while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { - struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); + struct nlmsghdr *nlh = skb_pull(skb, + sizeof(struct iphdr)); if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - @@ -993,8 +996,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, } } -/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted - * expects the following bizarre scheme. +/* Bounce a cache query up to mrouted and netlink. * * Called under mrt_lock. */ @@ -1044,7 +1046,7 @@ static int ipmr_cache_report(struct mr_table *mrt, msg->im_vif = vifi; skb_dst_set(skb, dst_clone(skb_dst(pkt))); /* Add our header */ - igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); + igmp = skb_put(skb, sizeof(struct igmphdr)); igmp->type = assert; msg->im_msgtype = assert; igmp->code = 0; @@ -1060,6 +1062,8 @@ static int ipmr_cache_report(struct mr_table *mrt, return -EINVAL; } + igmpmsg_netlink_event(mrt, skb); + /* Deliver to mrouted */ ret = sock_queue_rcv_skb(mroute_sk, skb); rcu_read_unlock(); @@ -2339,6 +2343,130 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err); } +static size_t igmpmsg_netlink_msgsize(size_t payloadlen) +{ + size_t len = + NLMSG_ALIGN(sizeof(struct rtgenmsg)) + + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */ + + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */ + + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */ + + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */ + /* IPMRA_CREPORT_PKT */ + + nla_total_size(payloadlen) + ; + + return len; +} + +static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) +{ + struct net *net = read_pnet(&mrt->net); + struct nlmsghdr *nlh; + struct rtgenmsg *rtgenm; + struct igmpmsg *msg; + struct sk_buff *skb; + struct nlattr *nla; + int payloadlen; + + payloadlen = pkt->len - sizeof(struct igmpmsg); + msg = (struct igmpmsg *)skb_network_header(pkt); + + skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC); + if (!skb) + goto errout; + + nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT, + sizeof(struct rtgenmsg), 0); + if (!nlh) + goto errout; + rtgenm = nlmsg_data(nlh); + rtgenm->rtgen_family = RTNL_FAMILY_IPMR; + if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) || + nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif) || + nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR, + msg->im_src.s_addr) || + nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR, + msg->im_dst.s_addr)) + goto nla_put_failure; + + nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen); + if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg), + nla_data(nla), payloadlen)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC); + return; + +nla_put_failure: + nlmsg_cancel(skb, nlh); +errout: + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); +} + +static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[RTA_MAX + 1]; + struct sk_buff *skb = NULL; + struct mfc_cache *cache; + struct mr_table *mrt; + struct rtmsg *rtm; + __be32 src, grp; + u32 tableid; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + + src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; + grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; + tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; + + mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); + if (IS_ERR(mrt)) { + err = PTR_ERR(mrt); + goto errout_free; + } + + /* entries are added/deleted only under RTNL */ + rcu_read_lock(); + cache = ipmr_cache_find(mrt, src, grp); + rcu_read_unlock(); + if (!cache) { + err = -ENOENT; + goto errout_free; + } + + skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout_free; + } + + err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, cache, + RTM_NEWROUTE, 0); + if (err < 0) + goto errout_free; + + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + +errout: + return err; + +errout_free: + kfree_skb(skb); + goto errout; +} + static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -2526,6 +2654,129 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, return ipmr_mfc_delete(tbl, &mfcc, parent); } +static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) +{ + u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len); + + if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || + nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || + nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, + mrt->mroute_reg_vif_num) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, + mrt->mroute_do_assert) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim)) + return false; + + return true; +} + +static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) +{ + struct nlattr *vif_nest; + struct vif_device *vif; + + /* if the VIF doesn't exist just continue */ + if (!VIF_EXISTS(mrt, vifid)) + return true; + + vif = &mrt->vif_table[vifid]; + vif_nest = nla_nest_start(skb, IPMRA_VIF); + if (!vif_nest) + return false; + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || + nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || + nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, + IPMRA_VIFA_PAD) || + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, + IPMRA_VIFA_PAD) || + nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || + nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { + nla_nest_cancel(skb, vif_nest); + return false; + } + nla_nest_end(skb, vif_nest); + + return true; +} + +static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + unsigned int t = 0, s_t; + unsigned int e = 0, s_e; + struct mr_table *mrt; + + s_t = cb->args[0]; + s_e = cb->args[1]; + + ipmr_for_each_table(mrt, net) { + struct nlattr *vifs, *af; + struct ifinfomsg *hdr; + u32 i; + + if (t < s_t) + goto skip_table; + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWLINK, + sizeof(*hdr), NLM_F_MULTI); + if (!nlh) + break; + + hdr = nlmsg_data(nlh); + memset(hdr, 0, sizeof(*hdr)); + hdr->ifi_family = RTNL_FAMILY_IPMR; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) { + nlmsg_cancel(skb, nlh); + goto out; + } + + if (!ipmr_fill_table(mrt, skb)) { + nlmsg_cancel(skb, nlh); + goto out; + } + + vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); + if (!vifs) { + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + goto out; + } + for (i = 0; i < mrt->maxvif; i++) { + if (e < s_e) + goto skip_entry; + if (!ipmr_fill_vif(mrt, i, skb)) { + nla_nest_end(skb, vifs); + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + goto out; + } +skip_entry: + e++; + } + s_e = 0; + e = 0; + nla_nest_end(skb, vifs); + nla_nest_end(skb, af); + nlmsg_end(skb, nlh); +skip_table: + t++; + } + +out: + cb->args[1] = e; + cb->args[0] = t; + + return skb->len; +} + #ifdef CONFIG_PROC_FS /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif @@ -2863,11 +3114,14 @@ int __init ip_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - NULL, ipmr_rtm_dumproute, NULL); + ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, ipmr_rtm_route, NULL, NULL); rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, ipmr_rtm_route, NULL, NULL); + + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, + NULL, ipmr_rtm_dumplink, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 |