From 8d356b89f36d234a56434a110ae779e8ac389ca2 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 4 Jul 2018 16:46:29 -0700 Subject: rtnetlink: add rtnl_link_state check in rtnl_configure_link rtnl_configure_link sets dev->rtnl_link_state to RTNL_LINK_INITIALIZED and unconditionally calls __dev_notify_flags to notify user-space of dev flags. current call sequence for rtnl_configure_link rtnetlink_newlink rtnl_link_ops->newlink rtnl_configure_link (unconditionally notifies userspace of default and new dev flags) If a newlink handler wants to call rtnl_configure_link early, we will end up with duplicate notifications to user-space. This patch fixes rtnl_configure_link to check rtnl_link_state and call __dev_notify_flags with gchanges = 0 if already RTNL_LINK_INITIALIZED. Later in the series, this patch will help the following sequence where a driver implementing newlink can call rtnl_configure_link to initialize the link early. makes the following call sequence work: rtnetlink_newlink rtnl_link_ops->newlink (vxlan) -> rtnl_configure_link (initializes link and notifies user-space of default dev flags) rtnl_configure_link (updates dev flags if requested by user ifm and notifies user-space of new dev flags) Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5ef61222fdef..e3f743c141b3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) return err; } - dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - - __dev_notify_flags(dev, old_flags, ~0U); + if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { + __dev_notify_flags(dev, old_flags, 0U); + } else { + dev->rtnl_link_state = RTNL_LINK_INITIALIZED; + __dev_notify_flags(dev, old_flags, ~0U); + } return 0; } EXPORT_SYMBOL(rtnl_configure_link); -- cgit v1.2.3-58-ga151 From 25e20e730d56471cffa25419bf2a66078bd55330 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 4 Jul 2018 16:46:30 -0700 Subject: vxlan: add new fdb alloc and create helpers - Add new vxlan_fdb_alloc helper - rename existing vxlan_fdb_create into vxlan_fdb_update: because it really creates or updates an existing fdb entry - move new fdb creation into a separate vxlan_fdb_create Main motivation for this change is to introduce the ability to decouple vxlan fdb creation and notify, used in a later patch. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 91 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 29 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 601ae176808d..aa88bebd9d69 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -637,8 +637,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } -/* Add new entry to forwarding table -- assumes lock held */ +static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, + const u8 *mac, __u16 state, + __be32 src_vni, __u8 ndm_flags) +{ + struct vxlan_fdb *f; + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return NULL; + f->state = state; + f->flags = ndm_flags; + f->updated = f->used = jiffies; + f->vni = src_vni; + INIT_LIST_HEAD(&f->remotes); + memcpy(f->eth_addr, mac, ETH_ALEN); + + return f; +} + static int vxlan_fdb_create(struct vxlan_dev *vxlan, + const u8 *mac, union vxlan_addr *ip, + __u16 state, __be16 port, __be32 src_vni, + __be32 vni, __u32 ifindex, __u8 ndm_flags, + struct vxlan_fdb **fdb) +{ + struct vxlan_rdst *rd = NULL; + struct vxlan_fdb *f; + int rc; + + if (vxlan->cfg.addrmax && + vxlan->addrcnt >= vxlan->cfg.addrmax) + return -ENOSPC; + + netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); + f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags); + if (!f) + return -ENOMEM; + + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + if (rc < 0) { + kfree(f); + return rc; + } + + ++vxlan->addrcnt; + hlist_add_head_rcu(&f->hlist, + vxlan_fdb_head(vxlan, mac, src_vni)); + + *fdb = f; + + return 0; +} + +/* Add new entry to forwarding table -- assumes lock held */ +static int vxlan_fdb_update(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, __be16 port, __be32 src_vni, __be32 vni, @@ -688,37 +741,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, if (!(flags & NLM_F_CREATE)) return -ENOENT; - if (vxlan->cfg.addrmax && - vxlan->addrcnt >= vxlan->cfg.addrmax) - return -ENOSPC; - /* Disallow replace to add a multicast entry */ if ((flags & NLM_F_REPLACE) && (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac))) return -EOPNOTSUPP; netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); - f = kmalloc(sizeof(*f), GFP_ATOMIC); - if (!f) - return -ENOMEM; - - notify = 1; - f->state = state; - f->flags = ndm_flags; - f->updated = f->used = jiffies; - f->vni = src_vni; - INIT_LIST_HEAD(&f->remotes); - memcpy(f->eth_addr, mac, ETH_ALEN); - - rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); - if (rc < 0) { - kfree(f); + rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni, + vni, ifindex, ndm_flags, &f); + if (rc < 0) return rc; - } - - ++vxlan->addrcnt; - hlist_add_head_rcu(&f->hlist, - vxlan_fdb_head(vxlan, mac, src_vni)); + notify = 1; } if (notify) { @@ -864,7 +897,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EAFNOSUPPORT; spin_lock_bh(&vxlan->hash_lock); - err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, + err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, port, src_vni, vni, ifindex, ndm->ndm_flags); spin_unlock_bh(&vxlan->hash_lock); @@ -1007,7 +1040,7 @@ static bool vxlan_snoop(struct net_device *dev, /* close off race between vxlan_flush and incoming packets */ if (netif_running(dev)) - vxlan_fdb_create(vxlan, src_mac, src_ip, + vxlan_fdb_update(vxlan, src_mac, src_ip, NUD_REACHABLE, NLM_F_EXCL|NLM_F_CREATE, vxlan->cfg.dst_port, @@ -3172,7 +3205,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, /* create an fdb entry for a valid default destination */ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, + err = vxlan_fdb_update(vxlan, all_zeros_mac, &vxlan->default_dst.remote_ip, NUD_REACHABLE | NUD_PERMANENT, NLM_F_EXCL | NLM_F_CREATE, @@ -3452,7 +3485,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], old_dst.remote_ifindex, 0); if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, + err = vxlan_fdb_update(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, NLM_F_CREATE | NLM_F_APPEND, -- cgit v1.2.3-58-ga151 From 4c2438ba85cad3be282e19147782ef3a99717a1a Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 4 Jul 2018 16:46:31 -0700 Subject: vxlan: make netlink notify in vxlan_fdb_destroy optional Add a new option do_notify to vxlan_fdb_destroy to make sending netlink notify optional. Used by a later patch. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index aa88bebd9d69..794a9a7b235f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -775,13 +775,15 @@ static void vxlan_fdb_free(struct rcu_head *head) kfree(f); } -static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) +static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, + bool do_notify) { netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); --vxlan->addrcnt; - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); + if (do_notify) + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH); hlist_del_rcu(&f->hlist); call_rcu(&f->rcu, vxlan_fdb_free); @@ -931,7 +933,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, goto out; } - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); out: return 0; @@ -2399,7 +2401,7 @@ static void vxlan_cleanup(struct timer_list *t) "garbage collect %pM\n", f->eth_addr); f->state = NUD_STALE; - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); } else if (time_before(timeout, next_timer)) next_timer = timeout; } @@ -2450,7 +2452,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) spin_lock_bh(&vxlan->hash_lock); f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); if (f) - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); spin_unlock_bh(&vxlan->hash_lock); } @@ -2504,7 +2506,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all) continue; /* the all_zeros_mac entry is deleted at vxlan_uninit */ if (!is_zero_ether_addr(f->eth_addr)) - vxlan_fdb_destroy(vxlan, f); + vxlan_fdb_destroy(vxlan, f, true); } } spin_unlock_bh(&vxlan->hash_lock); -- cgit v1.2.3-58-ga151 From 0241b836732f5f43c3f0fd9e9073c1fb24ea6757 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 4 Jul 2018 16:46:32 -0700 Subject: vxlan: fix default fdb entry netlink notify ordering during netdev create Problem: In vxlan_newlink, a default fdb entry is added before register_netdev. The default fdb creation function also notifies user-space of the fdb entry on the vxlan device which user-space does not know about yet. (RTM_NEWNEIGH goes before RTM_NEWLINK for the same ifindex). This patch fixes the user-space netlink notification ordering issue with the following changes: - decouple fdb notify from fdb create. - Move fdb notify after register_netdev. - Call rtnl_configure_link in vxlan newlink handler to notify userspace about the newlink before fdb notify and hence avoiding the user-space race. Fixes: afbd8bae9c79 ("vxlan: add implicit fdb entry for default destination") Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 794a9a7b235f..ababba37d735 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3197,6 +3197,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb *f = NULL; int err; err = vxlan_dev_configure(net, dev, conf, false, extack); @@ -3207,27 +3208,38 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, /* create an fdb entry for a valid default destination */ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { - err = vxlan_fdb_update(vxlan, all_zeros_mac, + err = vxlan_fdb_create(vxlan, all_zeros_mac, &vxlan->default_dst.remote_ip, NUD_REACHABLE | NUD_PERMANENT, - NLM_F_EXCL | NLM_F_CREATE, vxlan->cfg.dst_port, vxlan->default_dst.remote_vni, vxlan->default_dst.remote_vni, vxlan->default_dst.remote_ifindex, - NTF_SELF); + NTF_SELF, &f); if (err) return err; } err = register_netdevice(dev); + if (err) + goto errout; + + err = rtnl_configure_link(dev, NULL); if (err) { - vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); - return err; + unregister_netdevice(dev); + goto errout; } + /* notify default fdb entry */ + if (f) + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); + list_add(&vxlan->next, &vn->vxlan_list); return 0; +errout: + if (f) + vxlan_fdb_destroy(vxlan, f, false); + return err; } static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], @@ -3462,6 +3474,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct vxlan_rdst *dst = &vxlan->default_dst; struct vxlan_rdst old_dst; struct vxlan_config conf; + struct vxlan_fdb *f = NULL; int err; err = vxlan_nl2conf(tb, data, @@ -3487,19 +3500,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], old_dst.remote_ifindex, 0); if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_update(vxlan, all_zeros_mac, + err = vxlan_fdb_create(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, - NLM_F_CREATE | NLM_F_APPEND, vxlan->cfg.dst_port, dst->remote_vni, dst->remote_vni, dst->remote_ifindex, - NTF_SELF); + NTF_SELF, &f); if (err) { spin_unlock_bh(&vxlan->hash_lock); return err; } + vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); } spin_unlock_bh(&vxlan->hash_lock); } -- cgit v1.2.3-58-ga151