summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/ip6_fib.h1
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--include/net/nexthop.h4
-rw-r--r--net/ipv4/fib_frontend.c19
-rw-r--r--net/ipv4/fib_semantics.c86
-rw-r--r--net/ipv4/nexthop.c250
-rw-r--r--net/ipv6/ip6_fib.c31
-rw-r--r--net/ipv6/route.c458
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_multiprefix.sh290
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh359
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh49
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh237
12 files changed, 1672 insertions, 113 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index ac0427c096f3..1e92f1500b87 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -49,6 +49,7 @@ struct fib6_config {
u16 fc_delete_all_nh : 1,
fc_ignore_dev_down:1,
__unused : 14;
+ u32 fc_nh_id;
struct in6_addr fc_dst;
struct in6_addr fc_src;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 4cdf8bc22efd..7e1e621a56df 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -40,6 +40,7 @@ struct fib_config {
u32 fc_flags;
u32 fc_priority;
__be32 fc_prefsrc;
+ u32 fc_nh_id;
struct nlattr *fc_mx;
struct rtnexthop *fc_mp;
int fc_mx_len;
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index e019ed9b3dc3..25f1f9a8419b 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -305,4 +305,8 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
res->nh = &nhi->fib6_nh;
}
}
+
+int nexthop_for_each_fib6_nh(struct nexthop *nh,
+ int (*cb)(struct fib6_nh *nh, void *arg),
+ void *arg);
#endif
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 33b0dbe84aa6..108191667531 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -671,6 +671,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
@@ -808,6 +809,18 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
if (err < 0)
goto errout;
break;
+ case RTA_NH_ID:
+ cfg->fc_nh_id = nla_get_u32(attr);
+ break;
+ }
+ }
+
+ if (cfg->fc_nh_id) {
+ if (cfg->fc_oif || cfg->fc_gw_family ||
+ cfg->fc_encap || cfg->fc_mp) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ return -EINVAL;
}
}
@@ -834,6 +847,12 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ err = -EINVAL;
+ goto errout;
+ }
+
tb = fib_get_table(net, cfg.fc_table);
if (!tb) {
NL_SET_ERR_MSG(extack, "FIB table does not exist");
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index afa4af1f9326..0de895cd0621 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -325,14 +325,32 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val)
(val >> (DEVINDEX_HASHBITS * 2))) & mask;
}
-static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
+static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
+ u32 prefsrc, u32 priority)
+{
+ unsigned int val = init_val;
+
+ val ^= (protocol << 8) | scope;
+ val ^= prefsrc;
+ val ^= priority;
+
+ return val;
+}
+
+static unsigned int fib_info_hashfn_result(unsigned int val)
{
unsigned int mask = (fib_info_hash_size - 1);
- unsigned int val = fi->fib_nhs;
- val ^= (fi->fib_protocol << 8) | fi->fib_scope;
- val ^= (__force u32)fi->fib_prefsrc;
- val ^= fi->fib_priority;
+ return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+}
+
+static inline unsigned int fib_info_hashfn(struct fib_info *fi)
+{
+ unsigned int val;
+
+ val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
+ fi->fib_scope, (__force u32)fi->fib_prefsrc,
+ fi->fib_priority);
if (fi->nh) {
val ^= fib_devindex_hashfn(fi->nh->id);
@@ -342,7 +360,40 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
} endfor_nexthops(fi)
}
- return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+ return fib_info_hashfn_result(val);
+}
+
+/* no metrics, only nexthop id */
+static struct fib_info *fib_find_info_nh(struct net *net,
+ const struct fib_config *cfg)
+{
+ struct hlist_head *head;
+ struct fib_info *fi;
+ unsigned int hash;
+
+ hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
+ cfg->fc_protocol, cfg->fc_scope,
+ (__force u32)cfg->fc_prefsrc,
+ cfg->fc_priority);
+ hash = fib_info_hashfn_result(hash);
+ head = &fib_info_hash[hash];
+
+ hlist_for_each_entry(fi, head, fib_hash) {
+ if (!net_eq(fi->fib_net, net))
+ continue;
+ if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
+ continue;
+ if (cfg->fc_protocol == fi->fib_protocol &&
+ cfg->fc_scope == fi->fib_scope &&
+ cfg->fc_prefsrc == fi->fib_prefsrc &&
+ cfg->fc_priority == fi->fib_priority &&
+ cfg->fc_type == fi->fib_type &&
+ cfg->fc_table == fi->fib_tb_id &&
+ !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK))
+ return fi;
+ }
+
+ return NULL;
}
static struct fib_info *fib_find_info(struct fib_info *nfi)
@@ -789,6 +840,12 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
return 1;
+ if (cfg->fc_nh_id) {
+ if (fi->nh && cfg->fc_nh_id == fi->nh->id)
+ return 0;
+ return 1;
+ }
+
if (cfg->fc_oif || cfg->fc_gw_family) {
struct fib_nh *nh = fib_info_nh(fi, 0);
@@ -1302,6 +1359,23 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
+ if (cfg->fc_nh_id) {
+ if (!cfg->fc_mx) {
+ fi = fib_find_info_nh(net, cfg);
+ if (fi) {
+ fi->fib_treeref++;
+ return fi;
+ }
+ }
+
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto err_inval;
+ }
+ nhs = 0;
+ }
+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5e48762b6b5f..5fe5a3981d43 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -517,6 +517,47 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);
+int nexthop_for_each_fib6_nh(struct nexthop *nh,
+ int (*cb)(struct fib6_nh *nh, void *arg),
+ void *arg)
+{
+ struct nh_info *nhi;
+ int err;
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+ int i;
+
+ nhg = rcu_dereference_rtnl(nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
+ err = cb(&nhi->fib6_nh, arg);
+ if (err)
+ return err;
+ }
+ } else {
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ err = cb(&nhi->fib6_nh, arg);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
+
+static int check_src_addr(const struct in6_addr *saddr,
+ struct netlink_ext_ack *extack)
+{
+ if (!ipv6_addr_any(saddr)) {
+ NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+ return -EINVAL;
+ }
+ return 0;
+}
+
int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -528,10 +569,8 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
* routing it can not use nexthop objects. mlxsw also does not allow
* fib6_src on routes.
*/
- if (!ipv6_addr_any(&cfg->fc_src)) {
- NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+ if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
return -EINVAL;
- }
if (nh->is_group) {
struct nh_group *nhg;
@@ -552,6 +591,25 @@ no_v4_nh:
}
EXPORT_SYMBOL_GPL(fib6_check_nexthop);
+/* if existing nexthop has ipv6 routes linked to it, need
+ * to verify this new spec works with ipv6
+ */
+static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_info *f6i;
+
+ if (list_empty(&old->f6i_list))
+ return 0;
+
+ list_for_each_entry(f6i, &old->f6i_list, nh_list) {
+ if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
+ return -EINVAL;
+ }
+
+ return fib6_check_nexthop(new, NULL, extack);
+}
+
static int nexthop_check_scope(struct nexthop *nh, u8 scope,
struct netlink_ext_ack *extack)
{
@@ -600,6 +658,21 @@ out:
return err;
}
+static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_info *fi;
+
+ list_for_each_entry(fi, &old->fi_list, nh_list) {
+ int err;
+
+ err = fib_check_nexthop(new, fi->fib_scope, extack);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static void nh_group_rebalance(struct nh_group *nhg)
{
int total = 0;
@@ -692,6 +765,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
}
}
+/* not called for nexthop replace */
static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
{
struct fib6_info *f6i, *tmp;
@@ -746,10 +820,171 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
nexthop_put(nh);
}
+/* if any FIB entries reference this nexthop, any dst entries
+ * need to be regenerated
+ */
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+{
+ struct fib6_info *f6i;
+
+ if (!list_empty(&nh->fi_list))
+ rt_cache_flush(net);
+
+ list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+ ipv6_stub->fib6_update_sernum(net, f6i);
+}
+
+static int replace_nexthop_grp(struct net *net, struct nexthop *old,
+ struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_group *oldg, *newg;
+ int i;
+
+ if (!new->is_group) {
+ NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
+ return -EINVAL;
+ }
+
+ oldg = rtnl_dereference(old->nh_grp);
+ newg = rtnl_dereference(new->nh_grp);
+
+ /* update parents - used by nexthop code for cleanup */
+ for (i = 0; i < newg->num_nh; i++)
+ newg->nh_entries[i].nh_parent = old;
+
+ rcu_assign_pointer(old->nh_grp, newg);
+
+ for (i = 0; i < oldg->num_nh; i++)
+ oldg->nh_entries[i].nh_parent = new;
+
+ rcu_assign_pointer(new->nh_grp, oldg);
+
+ return 0;
+}
+
+static int replace_nexthop_single(struct net *net, struct nexthop *old,
+ struct nexthop *new,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *oldi, *newi;
+
+ if (new->is_group) {
+ NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
+ return -EINVAL;
+ }
+
+ oldi = rtnl_dereference(old->nh_info);
+ newi = rtnl_dereference(new->nh_info);
+
+ newi->nh_parent = old;
+ oldi->nh_parent = new;
+
+ old->protocol = new->protocol;
+ old->nh_flags = new->nh_flags;
+
+ rcu_assign_pointer(old->nh_info, newi);
+ rcu_assign_pointer(new->nh_info, oldi);
+
+ return 0;
+}
+
+static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
+ struct nl_info *info)
+{
+ struct fib6_info *f6i;
+
+ if (!list_empty(&nh->fi_list)) {
+ struct fib_info *fi;
+
+ /* expectation is a few fib_info per nexthop and then
+ * a lot of routes per fib_info. So mark the fib_info
+ * and then walk the fib tables once
+ */
+ list_for_each_entry(fi, &nh->fi_list, nh_list)
+ fi->nh_updated = true;
+
+ fib_info_notify_update(net, info);
+
+ list_for_each_entry(fi, &nh->fi_list, nh_list)
+ fi->nh_updated = false;
+ }
+
+ list_for_each_entry(f6i, &nh->f6i_list, nh_list)
+ ipv6_stub->fib6_rt_update(net, f6i, info);
+}
+
+/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
+ * linked to this nexthop and for all groups that the nexthop
+ * is a member of
+ */
+static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
+ struct nl_info *info)
+{
+ struct nh_grp_entry *nhge;
+
+ __nexthop_replace_notify(net, nh, info);
+
+ list_for_each_entry(nhge, &nh->grp_list, nh_list)
+ __nexthop_replace_notify(net, nhge->nh_parent, info);
+}
+
static int replace_nexthop(struct net *net, struct nexthop *old,
struct nexthop *new, struct netlink_ext_ack *extack)
{
- return -EEXIST;
+ bool new_is_reject = false;
+ struct nh_grp_entry *nhge;
+ int err;
+
+ /* check that existing FIB entries are ok with the
+ * new nexthop definition
+ */
+ err = fib_check_nh_list(old, new, extack);
+ if (err)
+ return err;
+
+ err = fib6_check_nh_list(old, new, extack);
+ if (err)
+ return err;
+
+ if (!new->is_group) {
+ struct nh_info *nhi = rtnl_dereference(new->nh_info);
+
+ new_is_reject = nhi->reject_nh;
+ }
+
+ list_for_each_entry(nhge, &old->grp_list, nh_list) {
+ /* if new nexthop is a blackhole, any groups using this
+ * nexthop cannot have more than 1 path
+ */
+ if (new_is_reject &&
+ nexthop_num_path(nhge->nh_parent) > 1) {
+ NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
+ return -EINVAL;
+ }
+
+ err = fib_check_nh_list(nhge->nh_parent, new, extack);
+ if (err)
+ return err;
+
+ err = fib6_check_nh_list(nhge->nh_parent, new, extack);
+ if (err)
+ return err;
+ }
+
+ if (old->is_group)
+ err = replace_nexthop_grp(net, old, new, extack);
+ else
+ err = replace_nexthop_single(net, old, new, extack);
+
+ if (!err) {
+ nh_rt_cache_flush(net, old);
+
+ __remove_nexthop(net, new, NULL);
+ nexthop_put(new);
+ }
+
+ return err;
}
/* called with rtnl_lock held */
@@ -761,6 +996,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
bool create = !!(cfg->nlflags & NLM_F_CREATE);
u32 new_id = new_nh->id;
+ int replace_notify = 0;
int rc = -EEXIST;
pp = &root->rb_node;
@@ -780,8 +1016,10 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
pp = &next->rb_right;
} else if (replace) {
rc = replace_nexthop(net, nh, new_nh, extack);
- if (!rc)
+ if (!rc) {
new_nh = nh; /* send notification with old nh */
+ replace_notify = 1;
+ }
goto out;
} else {
/* id already exists and not a replace */
@@ -802,6 +1040,8 @@ out:
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
+ if (replace_notify)
+ nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
return rc;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 17dcc916eb63..1cce2082279c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -906,19 +906,42 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
}
}
+struct fib6_nh_pcpu_arg {
+ struct fib6_info *from;
+ const struct fib6_table *table;
+};
+
+static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_pcpu_arg *arg = _arg;
+
+ __fib6_drop_pcpu_from(nh, arg->from, arg->table);
+ return 0;
+}
+
static void fib6_drop_pcpu_from(struct fib6_info *f6i,
const struct fib6_table *table)
{
- struct fib6_nh *fib6_nh;
-
/* Make sure rt6_make_pcpu_route() wont add other percpu routes
* while we are cleaning them here.
*/
f6i->fib6_destroying = 1;
mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
- fib6_nh = f6i->fib6_nh;
- __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+ if (f6i->nh) {
+ struct fib6_nh_pcpu_arg arg = {
+ .from = f6i,
+ .table = table
+ };
+
+ nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
+ &arg);
+ } else {
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = f6i->fib6_nh;
+ __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+ }
}
static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fd0dc18ec574..f7257a56072a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -100,7 +100,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
int strict);
-static size_t rt6_nlmsg_size(struct fib6_info *rt);
+static size_t rt6_nlmsg_size(struct fib6_info *f6i);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
@@ -490,6 +490,45 @@ static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
return false;
}
+struct fib6_nh_dm_arg {
+ struct net *net;
+ const struct in6_addr *saddr;
+ int oif;
+ int flags;
+ struct fib6_nh *nh;
+};
+
+static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_dm_arg *arg = _arg;
+
+ arg->nh = nh;
+ return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
+ arg->flags);
+}
+
+/* returns fib6_nh from nexthop or NULL */
+static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
+ struct fib6_result *res,
+ const struct in6_addr *saddr,
+ int oif, int flags)
+{
+ struct fib6_nh_dm_arg arg = {
+ .net = net,
+ .saddr = saddr,
+ .oif = oif,
+ .flags = flags,
+ };
+
+ if (nexthop_is_blackhole(nh))
+ return NULL;
+
+ if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
+ return arg.nh;
+
+ return NULL;
+}
+
static void rt6_device_match(struct net *net, struct fib6_result *res,
const struct in6_addr *saddr, int oif, int flags)
{
@@ -510,8 +549,19 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
}
for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
- nh = spf6i->fib6_nh;
- if (__rt6_device_match(net, nh, saddr, oif, flags)) {
+ bool matched = false;
+
+ if (unlikely(spf6i->nh)) {
+ nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
+ oif, flags);
+ if (nh)
+ matched = true;
+ } else {
+ nh = spf6i->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags))
+ matched = true;
+ }
+ if (matched) {
res->f6i = spf6i;
goto out;
}
@@ -715,6 +765,24 @@ out:
return rc;
}
+struct fib6_nh_frl_arg {
+ u32 flags;
+ int oif;
+ int strict;
+ int *mpri;
+ bool *do_rr;
+ struct fib6_nh *nh;
+};
+
+static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_frl_arg *arg = _arg;
+
+ arg->nh = nh;
+ return find_match(nh, arg->flags, arg->oif, arg->strict,
+ arg->mpri, arg->do_rr);
+}
+
static void __find_rr_leaf(struct fib6_info *f6i_start,
struct fib6_info *nomatch, u32 metric,
struct fib6_result *res, struct fib6_info **cont,
@@ -725,6 +793,7 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
for (f6i = f6i_start;
f6i && f6i != nomatch;
f6i = rcu_dereference(f6i->fib6_next)) {
+ bool matched = false;
struct fib6_nh *nh;
if (cont && f6i->fib6_metric != metric) {
@@ -735,8 +804,34 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
if (fib6_check_expired(f6i))
continue;
- nh = f6i->fib6_nh;
- if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
+ if (unlikely(f6i->nh)) {
+ struct fib6_nh_frl_arg arg = {
+ .flags = f6i->fib6_flags,
+ .oif = oif,
+ .strict = strict,
+ .mpri = mpri,
+ .do_rr = do_rr
+ };
+
+ if (nexthop_is_blackhole(f6i->nh)) {
+ res->fib6_flags = RTF_REJECT;
+ res->fib6_type = RTN_BLACKHOLE;
+ res->f6i = f6i;
+ res->nh = nexthop_fib6_nh(f6i->nh);
+ return;
+ }
+ if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
+ &arg)) {
+ matched = true;
+ nh = arg.nh;
+ }
+ } else {
+ nh = f6i->fib6_nh;
+ if (find_match(nh, f6i->fib6_flags, oif, strict,
+ mpri, do_rr))
+ matched = true;
+ }
+ if (matched) {
res->f6i = f6i;
res->nh = nh;
res->fib6_flags = f6i->fib6_flags;
@@ -1647,9 +1742,22 @@ out:
spin_unlock_bh(&rt6_exception_lock);
}
+static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
+{
+ struct fib6_info *f6i = arg;
+
+ fib6_nh_flush_exceptions(nh, f6i);
+
+ return 0;
+}
+
void rt6_flush_exceptions(struct fib6_info *f6i)
{
- fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+ if (f6i->nh)
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
+ f6i);
+ else
+ fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
}
/* Find cached rt in the hash table inside passed in rt
@@ -1736,6 +1844,23 @@ static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
return err;
}
+struct fib6_nh_excptn_arg {
+ struct rt6_info *rt;
+ int plen;
+};
+
+static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_excptn_arg *arg = _arg;
+ int err;
+
+ err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
+ if (err == 0)
+ return 1;
+
+ return 0;
+}
+
static int rt6_remove_exception_rt(struct rt6_info *rt)
{
struct fib6_info *from;
@@ -1744,6 +1869,20 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
if (!from || !(rt->rt6i_flags & RTF_CACHE))
return -EINVAL;
+ if (from->nh) {
+ struct fib6_nh_excptn_arg arg = {
+ .rt = rt,
+ .plen = from->fib6_src.plen
+ };
+ int rc;
+
+ /* rc = 1 means an entry was found */
+ rc = nexthop_for_each_fib6_nh(from->nh,
+ rt6_nh_remove_exception_rt,
+ &arg);
+ return rc ? 0 : -ENOENT;
+ }
+
return fib6_nh_remove_exception(from->fib6_nh,
from->fib6_src.plen, rt);
}
@@ -1774,9 +1913,33 @@ static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
rt6_ex->stamp = jiffies;
}
+struct fib6_nh_match_arg {
+ const struct net_device *dev;
+ const struct in6_addr *gw;
+ struct fib6_nh *match;
+};
+
+/* determine if fib6_nh has given device and gateway */
+static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_match_arg *arg = _arg;
+
+ if (arg->dev != nh->fib_nh_dev ||
+ (arg->gw && !nh->fib_nh_gw_family) ||
+ (!arg->gw && nh->fib_nh_gw_family) ||
+ (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
+ return 0;
+
+ arg->match = nh;
+
+ /* found a match, break the loop */
+ return 1;
+}
+
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
struct fib6_info *from;
+ struct fib6_nh *fib6_nh;
rcu_read_lock();
@@ -1784,7 +1947,21 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
if (!from || !(rt->rt6i_flags & RTF_CACHE))
goto unlock;
- fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt);
+ if (from->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = rt->dst.dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
+
+ if (!arg.match)
+ return;
+ fib6_nh = arg.match;
+ } else {
+ fib6_nh = from->fib6_nh;
+ }
+ fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
@@ -1946,11 +2123,34 @@ static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
rcu_read_unlock_bh();
}
+struct fib6_nh_age_excptn_arg {
+ struct fib6_gc_args *gc_args;
+ unsigned long now;
+};
+
+static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_age_excptn_arg *arg = _arg;
+
+ fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
+ return 0;
+}
+
void rt6_age_exceptions(struct fib6_info *f6i,
struct fib6_gc_args *gc_args,
unsigned long now)
{
- fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+ if (f6i->nh) {
+ struct fib6_nh_age_excptn_arg arg = {
+ .gc_args = gc_args,
+ .now = now
+ };
+
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
+ &arg);
+ } else {
+ fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+ }
}
/* must be called with rcu lock held */
@@ -2479,10 +2679,31 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_unlock();
return;
}
- res.nh = res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt6->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev + gw. Should be impossible.
+ */
+ if (!arg.match) {
+ rcu_read_unlock();
+ return;
+ }
+
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
@@ -2589,6 +2810,21 @@ static bool ip6_redirect_nh_match(const struct fib6_result *res,
return true;
}
+struct fib6_nh_rd_arg {
+ struct fib6_result *res;
+ struct flowi6 *fl6;
+ const struct in6_addr *gw;
+ struct rt6_info **ret;
+};
+
+static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_rd_arg *arg = _arg;
+
+ arg->res->nh = nh;
+ return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
@@ -2604,6 +2840,12 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
struct rt6_info *ret = NULL;
struct fib6_result res = {};
+ struct fib6_nh_rd_arg arg = {
+ .res = &res,
+ .fl6 = fl6,
+ .gw = &rdfl->gateway,
+ .ret = &ret
+ };
struct fib6_info *rt;
struct fib6_node *fn;
@@ -2628,14 +2870,24 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
restart:
for_each_fib6_node_rt_rcu(fn) {
res.f6i = rt;
- res.nh = rt->fib6_nh;
-
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
- goto out;
+ if (unlikely(rt->nh)) {
+ if (nexthop_is_blackhole(rt->nh))
+ continue;
+ /* on match, res->nh is filled in and potentially ret */
+ if (nexthop_for_each_fib6_nh(rt->nh,
+ fib6_nh_redirect_match,
+ &arg))
+ goto out;
+ } else {
+ res.nh = rt->fib6_nh;
+ if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
+ &ret))
+ goto out;
+ }
}
if (!rt)
@@ -3279,6 +3531,16 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
#endif
+ if (cfg->fc_nh_id) {
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto out;
+ }
+ err = fib6_check_nexthop(nh, cfg, extack);
+ if (err)
+ goto out;
+ }
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
@@ -3510,6 +3772,30 @@ static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
return 0;
}
+struct fib6_nh_del_cached_rt_arg {
+ struct fib6_config *cfg;
+ struct fib6_info *f6i;
+};
+
+static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_del_cached_rt_arg *arg = _arg;
+ int rc;
+
+ rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
+ return rc != -ESRCH ? rc : 0;
+}
+
+static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
+{
+ struct fib6_nh_del_cached_rt_arg arg = {
+ .cfg = cfg,
+ .f6i = f6i
+ };
+
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
+}
+
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -3535,11 +3821,20 @@ static int ip6_route_del(struct fib6_config *cfg,
for_each_fib6_node_rt_rcu(fn) {
struct fib6_nh *nh;
- nh = rt->fib6_nh;
- if (cfg->fc_flags & RTF_CACHE) {
- int rc;
+ if (rt->nh && rt->nh->id != cfg->fc_nh_id)
+ continue;
- rc = ip6_del_cached_rt(cfg, rt, nh);
+ if (cfg->fc_flags & RTF_CACHE) {
+ int rc = 0;
+
+ if (rt->nh) {
+ rc = ip6_del_cached_rt_nh(cfg, rt);
+ } else if (cfg->fc_nh_id) {
+ continue;
+ } else {
+ nh = rt->fib6_nh;
+ rc = ip6_del_cached_rt(cfg, rt, nh);
+ }
if (rc != -ESRCH) {
rcu_read_unlock();
return rc;
@@ -3547,6 +3842,23 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
}
+ if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
+ continue;
+ if (cfg->fc_protocol &&
+ cfg->fc_protocol != rt->fib6_protocol)
+ continue;
+
+ if (rt->nh) {
+ if (!fib6_info_hold_safe(rt))
+ continue;
+ rcu_read_unlock();
+
+ return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ }
+ if (cfg->fc_nh_id)
+ continue;
+
+ nh = rt->fib6_nh;
if (cfg->fc_ifindex &&
(!nh->fib_nh_dev ||
nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
@@ -3554,10 +3866,6 @@ static int ip6_route_del(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
continue;
- if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
- continue;
- if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
- continue;
if (!fib6_info_hold_safe(rt))
continue;
rcu_read_unlock();
@@ -3668,7 +3976,25 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (!res.f6i)
goto out;
- res.nh = res.f6i->fib6_nh;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev. Should be impossible
+ */
+ if (!arg.match)
+ goto out;
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
@@ -4401,6 +4727,12 @@ static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
return 0;
arg->f6i = f6i;
+ if (f6i->nh) {
+ /* fib6_nh_mtu_change only returns 0, so this is safe */
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
+ arg);
+ }
+
return fib6_nh_mtu_change(f6i->fib6_nh, arg);
}
@@ -4433,6 +4765,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4479,6 +4812,16 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+ if (tb[RTA_NH_ID]) {
+ if (tb[RTA_GATEWAY] || tb[RTA_OIF] ||
+ tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ goto errout;
+ }
+ cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
+ }
+
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
@@ -4813,6 +5156,12 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (cfg.fc_nh_id &&
+ !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ return -EINVAL;
+ }
+
if (cfg.fc_mp)
return ip6_route_multipath_del(&cfg, extack);
else {
@@ -4840,20 +5189,46 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
return ip6_route_add(&cfg, GFP_KERNEL, extack);
}
-static size_t rt6_nlmsg_size(struct fib6_info *rt)
+/* add the overhead of this fib6_nh to nexthop_len */
+static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
{
- int nexthop_len = 0;
+ int *nexthop_len = arg;
- if (rt->nh)
- nexthop_len += nla_total_size(4); /* RTA_NH_ID */
+ *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16); /* RTA_GATEWAY */
+
+ if (nh->fib_nh_lws) {
+ /* RTA_ENCAP_TYPE */
+ *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+ /* RTA_ENCAP */
+ *nexthop_len += nla_total_size(2);
+ }
- if (rt->fib6_nsiblings) {
- nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
- + NLA_ALIGN(sizeof(struct rtnexthop))
- + nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws);
+ return 0;
+}
- nexthop_len *= rt->fib6_nsiblings;
+static size_t rt6_nlmsg_size(struct fib6_info *f6i)
+{
+ int nexthop_len;
+
+ if (f6i->nh) {
+ nexthop_len = nla_total_size(4); /* RTA_NH_ID */
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
+ &nexthop_len);
+ } else {
+ struct fib6_nh *nh = f6i->fib6_nh;
+
+ nexthop_len = 0;
+ if (f6i->fib6_nsiblings) {
+ nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + lwtunnel_get_encap_size(nh->fib_nh_lws);
+
+ nexthop_len *= f6i->fib6_nsiblings;
+ }
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
return NLMSG_ALIGN(sizeof(struct rtmsg))
@@ -4869,7 +5244,6 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws)
+ nexthop_len;
}
@@ -5070,9 +5444,27 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
+{
+ const struct net_device *dev = arg;
+
+ if (nh->fib_nh_dev == dev)
+ return 1;
+
+ return 0;
+}
+
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
+ if (f6i->nh) {
+ struct net_device *_dev = (struct net_device *)dev;
+
+ return !!nexthop_for_each_fib6_nh(f6i->nh,
+ fib6_info_nh_uses_dev,
+ _dev);
+ }
+
if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
new file mode 100755
index 000000000000..e6828732843e
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Validate cached routes in fib{6}_nh that is used by multiple prefixes.
+# Validate a different # exception is generated in h0 for each remote host.
+#
+# h1
+# /
+# h0 - r1 - h2
+# \
+# h3
+#
+# routing in h0 to hN is done with nexthop objects.
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# config
+
+create_ns()
+{
+ local ns=${1}
+
+ ip netns del ${ns} 2>/dev/null
+
+ ip netns add ${ns}
+ ip -netns ${ns} addr add 127.0.0.1/8 dev lo
+ ip -netns ${ns} link set lo up
+
+ ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ case ${ns} in
+ h*)
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ;;
+ r*)
+ ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ ;;
+ esac
+}
+
+setup()
+{
+ local ns
+ local i
+
+ #set -e
+
+ for ns in h0 r1 h1 h2 h3
+ do
+ create_ns ${ns}
+ done
+
+ #
+ # create interconnects
+ #
+
+ for i in 0 1 2 3
+ do
+ ip -netns h${i} li add eth0 type veth peer name r1h${i}
+ ip -netns h${i} li set eth0 up
+ ip -netns h${i} li set r1h${i} netns r1 name eth${i} up
+
+ ip -netns h${i} addr add dev eth0 172.16.10${i}.1/24
+ ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64
+ ip -netns r1 addr add dev eth${i} 172.16.10${i}.254/24
+ ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64
+ done
+
+ ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0
+ ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0
+
+ # routing from h0 to h1-h3 and back
+ for i in 1 2 3
+ do
+ ip -netns h0 ro add 172.16.10${i}.0/24 nhid 4
+ ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254
+
+ ip -netns h0 -6 ro add 2001:db8:10${i}::/64 nhid 6
+ ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
+ done
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo
+ echo "host 1 config"
+ ip -netns h0 li sh
+ ip -netns h0 ro sh
+ ip -netns h0 -6 ro sh
+ fi
+
+ #set +e
+}
+
+cleanup()
+{
+ for n in h1 r1 h2 h3 h4
+ do
+ ip netns del ${n} 2>/dev/null
+ done
+}
+
+change_mtu()
+{
+ local hostid=$1
+ local mtu=$2
+
+ run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu}
+ run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu}
+}
+
+################################################################################
+# validate exceptions
+
+validate_v4_exception()
+{
+ local i=$1
+ local mtu=$2
+ local ping_sz=$3
+ local dst="172.16.10${i}.1"
+ local h0=172.16.100.1
+ local r1=172.16.100.254
+ local rc
+
+ if [ ${ping_sz} != "0" ]; then
+ run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst}
+ fi
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "Route get"
+ ip -netns h0 ro get ${dst}
+ echo "Searching for:"
+ echo " cache .* mtu ${mtu}"
+ echo
+ fi
+
+ ip -netns h0 ro get ${dst} | \
+ grep -q "cache .* mtu ${mtu}"
+ rc=$?
+
+ log_test $rc 0 "IPv4: host 0 to host ${i}, mtu ${mtu}"
+}
+
+validate_v6_exception()
+{
+ local i=$1
+ local mtu=$2
+ local ping_sz=$3
+ local dst="2001:db8:10${i}::1"
+ local h0=2001:db8:100::1
+ local r1=2001:db8:100::64
+ local rc
+
+ if [ ${ping_sz} != "0" ]; then
+ run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst}
+ fi
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "Route get"
+ ip -netns h0 -6 ro get ${dst}
+ echo "Searching for:"
+ echo " ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+ echo
+ fi
+
+ ip -netns h0 -6 ro get ${dst} | \
+ grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+ rc=$?
+
+ log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}"
+}
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ esac
+done
+
+cleanup
+setup
+sleep 2
+
+cpus=$(cat /sys/devices/system/cpu/online)
+cpus="$(seq ${cpus/-/ })"
+ret=0
+for i in 1 2 3
+do
+ # generate a cached route per-cpu
+ for c in ${cpus}; do
+ run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
+ [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
+
+ run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1
+ [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
+
+ [ $ret -ne 0 ] && break
+ done
+ [ $ret -ne 0 ] && break
+done
+
+if [ $ret -eq 0 ]; then
+ # generate different exceptions in h0 for h1, h2 and h3
+ change_mtu 1 1300
+ validate_v4_exception 1 1300 1350
+ validate_v6_exception 1 1300 1350
+ echo
+
+ change_mtu 2 1350
+ validate_v4_exception 2 1350 1400
+ validate_v6_exception 2 1350 1400
+ echo
+
+ change_mtu 3 1400
+ validate_v4_exception 3 1400 1450
+ validate_v6_exception 3 1400 1450
+ echo
+
+ validate_v4_exception 1 1300 0
+ validate_v6_exception 1 1300 0
+ echo
+
+ validate_v4_exception 2 1350 0
+ validate_v6_exception 2 1350 0
+ echo
+
+ validate_v4_exception 3 1400 0
+ validate_v6_exception 3 1400 0
+
+ # targeted deletes to trigger cleanup paths in kernel
+ ip -netns h0 ro del 172.16.102.0/24 nhid 4
+ ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6
+
+ ip -netns h0 nexthop del id 4
+ ip -netns h0 nexthop del id 6
+fi
+
+cleanup
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
new file mode 100755
index 000000000000..cf3d26c233e8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -0,0 +1,359 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip nexthop del id 103
+ ip nexthop del id 101
+ ip nexthop del id 102
+ ip nexthop del id 106
+ ip nexthop del id 104
+ ip nexthop del id 105
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip nexthop del id 201
+ ip nexthop del id 202
+ ip nexthop del id 204
+ ip nexthop del id 205
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+ ip nexthop add id 101 via 169.254.2.22 dev $rp12
+ ip nexthop add id 102 via 169.254.3.23 dev $rp13
+ ip nexthop add id 103 group 101/102
+ ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+ ip nexthop add id 104 via fe80:2::22 dev $rp12
+ ip nexthop add id 105 via fe80:3::23 dev $rp13
+ ip nexthop add id 106 group 104/105
+ ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+ ip nexthop add id 201 via 169.254.2.12 dev $rp22
+ ip nexthop add id 202 via 169.254.3.13 dev $rp23
+ ip nexthop add id 203 group 201/202
+ ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+ ip nexthop add id 204 via fe80:2::12 dev $rp22
+ ip nexthop add id 205 via fe80:3::13 dev $rp23
+ ip nexthop add id 206 group 204/205
+ ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight_rp12/102,$weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip nexthop replace id 103 group 101/102
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip nexthop replace id 106 group 104/105
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip nexthop replace id 106 group 104/105
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+ routing_nh_obj
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "Nexthop objects not supported; skipping tests"
+ exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 76a7c4472dc3..18c5de53558a 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -331,6 +331,38 @@ run_ping()
run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
}
+replace_route_new()
+{
+ # r1 to h2 via r2 and eth0
+ run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
+ run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
+}
+
+reset_route_new()
+{
+ run_cmd ip -netns r1 nexthop flush
+ run_cmd ip -netns h1 nexthop flush
+
+ initial_route_new
+}
+
+initial_route_new()
+{
+ # r1 to h2 via r2 and eth1
+ run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
+ run_cmd ip -netns r1 ro add ${H2_N2} nhid 1
+
+ run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
+ run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2
+
+ # h1 to h2 via r1
+ run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0
+ run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
+
+ run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0
+ run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
+}
+
replace_route_legacy()
{
# r1 to h2 via r2 and eth0
@@ -479,6 +511,23 @@ WITH_VRF=yes
setup
do_test "legacy"
+cleanup
+log_section "Routing with nexthop objects"
+ip nexthop ls >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ WITH_VRF=no
+ setup
+ do_test "new"
+
+ cleanup
+ log_section "Routing with nexthop objects and VRF"
+ WITH_VRF=yes
+ setup
+ do_test "new"
+else
+ echo "Nexthop objects not supported; skipping tests"
+fi
+
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 4a1275990d7e..9e6d8b704186 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -123,34 +123,35 @@ TRACING=0
# Some systems don't have a ping6 binary anymore
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+# Name Description re-run with nh
tests="
- pmtu_ipv4_exception ipv4: PMTU exceptions
- pmtu_ipv6_exception ipv6: PMTU exceptions
- pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions
- pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions
- pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions
- pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions
- pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions
- pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions
- pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions
- pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions
- pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions
- pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions
- pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions
- pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions
- pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions
- pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions
- pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions
- pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions
- pmtu_vti6_exception vti6: PMTU exceptions
- pmtu_vti4_exception vti4: PMTU exceptions
- pmtu_vti4_default_mtu vti4: default MTU assignment
- pmtu_vti6_default_mtu vti6: default MTU assignment
- pmtu_vti4_link_add_mtu vti4: MTU setting on link creation
- pmtu_vti6_link_add_mtu vti6: MTU setting on link creation
- pmtu_vti6_link_change_mtu vti6: MTU changes on link changes
- cleanup_ipv4_exception ipv4: cleanup of cached exceptions
- cleanup_ipv6_exception ipv6: cleanup of cached exceptions"
+ pmtu_ipv4_exception ipv4: PMTU exceptions 1
+ pmtu_ipv6_exception ipv6: PMTU exceptions 1
+ pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
+ pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
+ pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
+ pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions 1
+ pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions 1
+ pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
+ pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
+ pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
+ pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
+ pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
+ pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
+ pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions 1
+ pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions 1
+ pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1
+ pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1
+ pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1
+ pmtu_vti6_exception vti6: PMTU exceptions 0
+ pmtu_vti4_exception vti4: PMTU exceptions 0
+ pmtu_vti4_default_mtu vti4: default MTU assignment 0
+ pmtu_vti6_default_mtu vti6: default MTU assignment 0
+ pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
+ pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 0
+ pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0
+ cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
+ cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1"
NS_A="ns-A"
NS_B="ns-B"
@@ -194,6 +195,30 @@ routes="
B default ${prefix6}:${b_r1}::2
"
+USE_NH="no"
+# ns family nh id destination gateway
+nexthops="
+ A 4 41 ${prefix4}.${a_r1}.2 veth_A-R1
+ A 4 42 ${prefix4}.${a_r2}.2 veth_A-R2
+ B 4 41 ${prefix4}.${b_r1}.2 veth_B-R1
+
+ A 6 61 ${prefix6}:${a_r1}::2 veth_A-R1
+ A 6 62 ${prefix6}:${a_r2}::2 veth_A-R2
+ B 6 61 ${prefix6}:${b_r1}::2 veth_B-R1
+"
+
+# nexthop id correlates to id in nexthops config above
+# ns family prefix nh id
+routes_nh="
+ A 4 default 41
+ A 4 ${prefix4}.${b_r2}.1 42
+ B 4 default 41
+
+ A 6 default 61
+ A 6 ${prefix6}:${b_r2}::1 62
+ B 6 default 61
+"
+
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
veth4_mask="24"
@@ -448,6 +473,50 @@ setup_xfrm6() {
setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
}
+setup_routing_old() {
+ for i in ${routes}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${addr}" = "" ] && addr="${i}" && continue
+ [ "${gw}" = "" ] && gw="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} route add ${addr} via ${gw}
+
+ ns=""; addr=""; gw=""
+ done
+}
+
+setup_routing_new() {
+ for i in ${nexthops}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${fam}" = "" ] && fam="${i}" && continue
+ [ "${nhid}" = "" ] && nhid="${i}" && continue
+ [ "${gw}" = "" ] && gw="${i}" && continue
+ [ "${dev}" = "" ] && dev="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
+
+ ns=""; fam=""; nhid=""; gw=""; dev=""
+
+ done
+
+ for i in ${routes_nh}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${fam}" = "" ] && fam="${i}" && continue
+ [ "${addr}" = "" ] && addr="${i}" && continue
+ [ "${nhid}" = "" ] && nhid="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
+
+ ns=""; fam=""; addr=""; nhid=""
+ done
+}
+
setup_routing() {
for i in ${NS_R1} ${NS_R2}; do
ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
@@ -478,17 +547,13 @@ setup_routing() {
ns=""; peer=""; segment=""
done
- for i in ${routes}; do
- [ "${ns}" = "" ] && ns="${i}" && continue
- [ "${addr}" = "" ] && addr="${i}" && continue
- [ "${gw}" = "" ] && gw="${i}"
-
- ns_name="$(nsname ${ns})"
-
- ip -n ${ns_name} route add ${addr} via ${gw}
+ if [ "$USE_NH" = "yes" ]; then
+ setup_routing_new
+ else
+ setup_routing_old
+ fi
- ns=""; addr=""; gw=""
- done
+ return 0
}
setup() {
@@ -1090,6 +1155,53 @@ test_cleanup_ipv4_exception() {
test_cleanup_vxlanX_exception 4
}
+run_test() {
+ (
+ tname="$1"
+ tdesc="$2"
+
+ unset IFS
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\n##########################################################################\n\n"
+ fi
+
+ eval test_${tname}
+ ret=$?
+
+ if [ $ret -eq 0 ]; then
+ printf "TEST: %-60s [ OK ]\n" "${tdesc}"
+ elif [ $ret -eq 1 ]; then
+ printf "TEST: %-60s [FAIL]\n" "${tdesc}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "Pausing. Hit enter to continue"
+ read a
+ fi
+ err_flush
+ exit 1
+ elif [ $ret -eq 2 ]; then
+ printf "TEST: %-60s [SKIP]\n" "${tdesc}"
+ err_flush
+ fi
+
+ return $ret
+ )
+ ret=$?
+ [ $ret -ne 0 ] && exitcode=1
+
+ return $ret
+}
+
+run_test_nh() {
+ tname="$1"
+ tdesc="$2"
+
+ USE_NH=yes
+ run_test "${tname}" "${tdesc} - nexthop objects"
+ USE_NH=no
+}
+
usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
@@ -1136,8 +1248,20 @@ trap cleanup EXIT
# start clean
cleanup
+HAVE_NH=no
+ip nexthop ls >/dev/null 2>&1
+[ $? -eq 0 ] && HAVE_NH=yes
+
+name=""
+desc=""
+rerun_nh=0
for t in ${tests}; do
- [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+ [ "${name}" = "" ] && name="${t}" && continue
+ [ "${desc}" = "" ] && desc="${t}" && continue
+
+ if [ "${HAVE_NH}" = "yes" ]; then
+ rerun_nh="${t}"
+ fi
run_this=1
for arg do
@@ -1145,35 +1269,18 @@ for t in ${tests}; do
[ "${arg}" = "${name}" ] && run_this=1 && break
run_this=0
done
- [ $run_this -eq 0 ] && continue
-
- (
- unset IFS
+ if [ $run_this -eq 1 ]; then
+ run_test "${name}" "${desc}"
+ # if test was skipped no need to retry with nexthop objects
+ [ $? -eq 2 ] && rerun_nh=0
- if [ "$VERBOSE" = "1" ]; then
- printf "\n##########################################################################\n\n"
+ if [ "${rerun_nh}" = "1" ]; then
+ run_test_nh "${name}" "${desc}"
fi
-
- eval test_${name}
- ret=$?
-
- if [ $ret -eq 0 ]; then
- printf "TEST: %-60s [ OK ]\n" "${t}"
- elif [ $ret -eq 1 ]; then
- printf "TEST: %-60s [FAIL]\n" "${t}"
- if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
- echo
- echo "Pausing. Hit enter to continue"
- read a
- fi
- err_flush
- exit 1
- elif [ $ret -eq 2 ]; then
- printf "TEST: %-60s [SKIP]\n" "${t}"
- err_flush
- fi
- )
- [ $? -ne 0 ] && exitcode=1
+ fi
+ name=""
+ desc=""
+ rerun_nh=0
done
exit ${exitcode}