diff options
Diffstat (limited to 'net/ipv6/ip6_fib.c')
-rw-r--r-- | net/ipv6/ip6_fib.c | 639 |
1 files changed, 330 insertions, 309 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 01372dd74e38..7aa4c41a3bd9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly; struct fib6_cleaner { struct fib6_walker w; struct net *net; - int (*func)(struct rt6_info *, void *arg); + int (*func)(struct fib6_info *, void *arg); int sernum; void *arg; }; @@ -54,7 +54,7 @@ struct fib6_cleaner { #define FWS_INIT FWS_L #endif -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, @@ -105,13 +105,12 @@ enum { FIB6_NO_SERNUM_CHANGE = 0, }; -void fib6_update_sernum(struct rt6_info *rt) +void fib6_update_sernum(struct net *net, struct fib6_info *f6i) { - struct net *net = dev_net(rt->dst.dev); struct fib6_node *fn; - fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + fn = rcu_dereference_protected(f6i->fib6_node, + lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) fn->fn_sernum = fib6_new_sernum(net); } @@ -146,6 +145,69 @@ static __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) +{ + struct fib6_info *f6i; + + f6i = kzalloc(sizeof(*f6i), gfp_flags); + if (!f6i) + return NULL; + + f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); + if (!f6i->rt6i_pcpu) { + kfree(f6i); + return NULL; + } + + INIT_LIST_HEAD(&f6i->fib6_siblings); + f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; + + atomic_inc(&f6i->fib6_ref); + + return f6i; +} + +void fib6_info_destroy(struct fib6_info *f6i) +{ + struct rt6_exception_bucket *bucket; + struct dst_metrics *m; + + WARN_ON(f6i->fib6_node); + + bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); + if (bucket) { + f6i->rt6i_exception_bucket = NULL; + kfree(bucket); + } + + if (f6i->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } + } + + if (f6i->fib6_nh.nh_dev) + dev_put(f6i->fib6_nh.nh_dev); + + m = f6i->fib6_metrics; + if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) + kfree(m); + + kfree(f6i); +} +EXPORT_SYMBOL_GPL(fib6_info_destroy); + static struct fib6_node *node_alloc(struct net *net) { struct fib6_node *fn; @@ -176,28 +238,6 @@ static void node_free(struct net *net, struct fib6_node *fn) net->ipv6.rt6_stats->fib_nodes--; } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) -{ - int cpu; - - if (!non_pcpu_rt->rt6i_pcpu) - return; - - for_each_possible_cpu(cpu) { - struct rt6_info **ppcpu_rt; - struct rt6_info *pcpu_rt; - - ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); - pcpu_rt = *ppcpu_rt; - if (pcpu_rt) { - dst_dev_put(&pcpu_rt->dst); - dst_release(&pcpu_rt->dst); - *ppcpu_rt = NULL; - } - } -} -EXPORT_SYMBOL_GPL(rt6_free_pcpu); - static void fib6_free_table(struct fib6_table *table) { inetpeer_invalidate_tree(&table->tb6_peers); @@ -232,7 +272,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) if (table) { table->tb6_id = id; rcu_assign_pointer(table->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); } @@ -314,6 +354,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, return &rt->dst; } +/* called with rcu lock held; no reference taken on fib6_info */ +struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + int flags) +{ + return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags); +} + static void __net_init fib6_tables_init(struct net *net) { fib6_link_table(net, net->ipv6.fib6_main_tbl); @@ -340,7 +387,7 @@ unsigned int fib6_tables_seq_read(struct net *net) static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, - struct rt6_info *rt) + struct fib6_info *rt) { struct fib6_entry_notifier_info info = { .rt = rt, @@ -351,7 +398,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, static int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, - struct rt6_info *rt, + struct fib6_info *rt, struct netlink_ext_ack *extack) { struct fib6_entry_notifier_info info = { @@ -359,7 +406,7 @@ static int call_fib6_entry_notifiers(struct net *net, .rt = rt, }; - rt->rt6i_table->fib_seq++; + rt->fib6_table->fib_seq++; return call_fib6_notifiers(net, event_type, &info.info); } @@ -368,16 +415,16 @@ struct fib6_dump_arg { struct notifier_block *nb; }; -static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg) +static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { - if (rt == arg->net->ipv6.ip6_null_entry) + if (rt == arg->net->ipv6.fib6_null_entry) return; call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); } static int fib6_node_dump(struct fib6_walker *w) { - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) fib6_rt_dump(rt, w->args); @@ -426,7 +473,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb) static int fib6_dump_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) { res = rt6_dump_route(rt, w->args); @@ -441,10 +488,10 @@ static int fib6_dump_node(struct fib6_walker *w) * last sibling of this route (no need to dump the * sibling routes again) */ - if (rt->rt6i_nsiblings) - rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, - rt6i_siblings); + if (rt->fib6_nsiblings) + rt = list_last_entry(&rt->fib6_siblings, + struct fib6_info, + fib6_siblings); } w->leaf = NULL; return 0; @@ -579,6 +626,24 @@ out: return res; } +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) +{ + if (!f6i) + return; + + if (f6i->fib6_metrics == &dst_default_metrics) { + struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC); + + if (!p) + return; + + refcount_set(&p->refcnt, 1); + f6i->fib6_metrics = p; + } + + f6i->fib6_metrics->metrics[metric - 1] = val; +} + /* * Routing Table * @@ -608,7 +673,7 @@ static struct fib6_node *fib6_add_1(struct net *net, fn = root; do { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); key = (struct rt6key *)((u8 *)leaf + offset); @@ -637,11 +702,11 @@ static struct fib6_node *fib6_add_1(struct net *net, /* clean up an intermediate node */ if (!(fn->fn_flags & RTN_RTINFO)) { RCU_INIT_POINTER(fn->leaf, NULL); - rt6_release(leaf); + fib6_info_release(leaf); /* remove null_entry in the root node */ } else if (fn->fn_flags & RTN_TL_ROOT && rcu_access_pointer(fn->leaf) == - net->ipv6.ip6_null_entry) { + net->ipv6.fib6_null_entry) { RCU_INIT_POINTER(fn->leaf, NULL); } @@ -750,7 +815,7 @@ insert_above: RCU_INIT_POINTER(in->parent, pn); in->leaf = fn->leaf; atomic_inc(&rcu_dereference_protected(in->leaf, - lockdep_is_held(&table->tb6_lock))->rt6i_ref); + lockdep_is_held(&table->tb6_lock))->fib6_ref); /* update parent pointer */ if (dir) @@ -802,44 +867,37 @@ insert_above: return ln; } -static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) -{ - int i; - - for (i = 0; i < RTAX_MAX; i++) { - if (test_bit(i, mxc->mx_valid)) - mp[i] = mxc->mx[i]; - } -} - -static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) +static void fib6_drop_pcpu_from(struct fib6_info *f6i, + const struct fib6_table *table) { - if (!mxc->mx) - return 0; - - if (dst->flags & DST_HOST) { - u32 *mp = dst_metrics_write_ptr(dst); + int cpu; - if (unlikely(!mp)) - return -ENOMEM; + /* release the reference to this fib entry from + * all of its cached pcpu routes + */ + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; - fib6_copy_metrics(mp, mxc); - } else { - dst_init_metrics(dst, mxc->mx, false); + ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + struct fib6_info *from; - /* We've stolen mx now. */ - mxc->mx = NULL; + from = rcu_dereference_protected(pcpu_rt->from, + lockdep_is_held(&table->tb6_lock)); + rcu_assign_pointer(pcpu_rt->from, NULL); + fib6_info_release(from); + } } - - return 0; } -static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, +static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, struct net *net) { - struct fib6_table *table = rt->rt6i_table; + struct fib6_table *table = rt->fib6_table; - if (atomic_read(&rt->rt6i_ref) != 1) { + if (atomic_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, * which must be released in time. So, scan ascendant nodes @@ -847,18 +905,22 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * to still alive ones. */ while (fn) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_leaf; + struct fib6_info *new_leaf; if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { new_leaf = fib6_find_prefix(net, table, fn); - atomic_inc(&new_leaf->rt6i_ref); + atomic_inc(&new_leaf->fib6_ref); + rcu_assign_pointer(fn->leaf, new_leaf); - rt6_release(rt); + fib6_info_release(rt); } fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&table->tb6_lock)); } + + if (rt->rt6i_pcpu) + fib6_drop_pcpu_from(rt, table); } } @@ -866,37 +928,37 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * Insert routing information in a node. */ -static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, +static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - struct rt6_info *iter = NULL; - struct rt6_info __rcu **ins; - struct rt6_info __rcu **fallback_ins = NULL; + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&rt->fib6_table->tb6_lock)); + struct fib6_info *iter = NULL, *match = NULL; + struct fib6_info __rcu **ins; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); + int append = (info->nlh && + (info->nlh->nlmsg_flags & NLM_F_APPEND)); int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; - bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); u16 nlflags = NLM_F_EXCL; int err; - if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND)) + if (append) nlflags |= NLM_F_APPEND; ins = &fn->leaf; for (iter = leaf; iter; - iter = rcu_dereference_protected(iter->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock))) { + iter = rcu_dereference_protected(iter->fib6_next, + lockdep_is_held(&rt->fib6_table->tb6_lock))) { /* * Search for duplicates */ - if (iter->rt6i_metric == rt->rt6i_metric) { + if (iter->fib6_metric == rt->fib6_metric) { /* * Same priority level */ @@ -906,56 +968,32 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, nlflags &= ~NLM_F_EXCL; if (replace) { - if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { - found++; - break; - } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; - goto next_iter; + found++; + break; } if (rt6_duplicate_nexthop(iter, rt)) { - if (rt->rt6i_nsiblings) - rt->rt6i_nsiblings = 0; - if (!(iter->rt6i_flags & RTF_EXPIRES)) + if (rt->fib6_nsiblings) + rt->fib6_nsiblings = 0; + if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; - if (!(rt->rt6i_flags & RTF_EXPIRES)) - rt6_clean_expires(iter); + if (!(rt->fib6_flags & RTF_EXPIRES)) + fib6_clean_expires(iter); else - rt6_set_expires(iter, rt->dst.expires); - iter->rt6i_pmtu = rt->rt6i_pmtu; + fib6_set_expires(iter, rt->expires); + fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu); return -EEXIST; } - /* If we have the same destination and the same metric, - * but not the same gateway, then the route we try to - * add is sibling to this route, increment our counter - * of siblings, and later we will add our route to the - * list. - * Only static routes (which don't have flag - * RTF_EXPIRES) are used for ECMPv6. - * - * To avoid long list, we only had siblings if the - * route have a gateway. - */ - if (rt_can_ecmp && - rt6_qualify_for_ecmp(iter)) - rt->rt6i_nsiblings++; + + /* first route that matches */ + if (!match) + match = iter; } - if (iter->rt6i_metric > rt->rt6i_metric) + if (iter->fib6_metric > rt->fib6_metric) break; -next_iter: - ins = &iter->rt6_next; - } - - if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ - ins = fallback_ins; - iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - found++; + ins = &iter->fib6_next; } /* Reset round-robin state, if necessary */ @@ -963,59 +1001,56 @@ next_iter: fn->rr_ptr = NULL; /* Link this route to others same route. */ - if (rt->rt6i_nsiblings) { - unsigned int rt6i_nsiblings; - struct rt6_info *sibling, *temp_sibling; - - /* Find the first route that have the same metric */ - sibling = leaf; - while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric && - rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->rt6i_siblings, - &sibling->rt6i_siblings); - break; - } - sibling = rcu_dereference_protected(sibling->rt6_next, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + if (append && match) { + struct fib6_info *sibling, *temp_sibling; + + if (rt->fib6_flags & RTF_REJECT) { + NL_SET_ERR_MSG(extack, + "Can not append a REJECT route"); + return -EINVAL; + } else if (match->fib6_flags & RTF_REJECT) { + NL_SET_ERR_MSG(extack, + "Can not append to a REJECT route"); + return -EINVAL; } + rt->fib6_nsiblings = match->fib6_nsiblings; + list_add_tail(&rt->fib6_siblings, &match->fib6_siblings); + match->fib6_nsiblings++; + /* For each sibling in the list, increment the counter of * siblings. BUG() if counters does not match, list of siblings * is broken! */ - rt6i_nsiblings = 0; list_for_each_entry_safe(sibling, temp_sibling, - &rt->rt6i_siblings, rt6i_siblings) { - sibling->rt6i_nsiblings++; - BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); - rt6i_nsiblings++; + &match->fib6_siblings, fib6_siblings) { + sibling->fib6_nsiblings++; + BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings); } - BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); - rt6_multipath_rebalance(temp_sibling); + + rt6_multipath_rebalance(match); } /* * insert node */ if (!replace) { + enum fib_event_type event; + if (!add) pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: nlflags |= NLM_F_CREATE; - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; - err = call_fib6_entry_notifiers(info->nl_net, - FIB_EVENT_ENTRY_ADD, - rt, extack); + event = append ? FIB_EVENT_ENTRY_APPEND : FIB_EVENT_ENTRY_ADD; + err = call_fib6_entry_notifiers(info->nl_net, event, rt, + extack); if (err) return err; - rcu_assign_pointer(rt->rt6_next, iter); - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(rt->rt6i_node, fn); + rcu_assign_pointer(rt->fib6_next, iter); + atomic_inc(&rt->fib6_ref); + rcu_assign_pointer(rt->fib6_node, fn); rcu_assign_pointer(*ins, rt); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); @@ -1027,7 +1062,7 @@ add: } } else { - int nsiblings; + struct fib6_info *tmp; if (!found) { if (add) @@ -1036,67 +1071,72 @@ add: return -ENOENT; } - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; - err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt, extack); if (err) return err; - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(rt->rt6i_node, fn); - rt->rt6_next = iter->rt6_next; + /* if route being replaced has siblings, set tmp to + * last one, otherwise tmp is current route. this is + * used to set fib6_next for new route + */ + if (iter->fib6_nsiblings) + tmp = list_last_entry(&iter->fib6_siblings, + struct fib6_info, + fib6_siblings); + else + tmp = iter; + + /* insert new route */ + atomic_inc(&rt->fib6_ref); + rcu_assign_pointer(rt->fib6_node, fn); + rt->fib6_next = tmp->fib6_next; rcu_assign_pointer(*ins, rt); + if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } - nsiblings = iter->rt6i_nsiblings; - iter->rt6i_node = NULL; - fib6_purge_rt(iter, fn, info->nl_net); - if (rcu_access_pointer(fn->rr_ptr) == iter) - fn->rr_ptr = NULL; - rt6_release(iter); - if (nsiblings) { + /* delete old route */ + rt = iter; + + if (rt->fib6_nsiblings) { + struct fib6_info *tmp; + /* Replacing an ECMP route, remove all siblings */ - ins = &rt->rt6_next; - iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - while (iter) { - if (iter->rt6i_metric > rt->rt6i_metric) - break; - if (rt6_qualify_for_ecmp(iter)) { - *ins = iter->rt6_next; - iter->rt6i_node = NULL; - fib6_purge_rt(iter, fn, info->nl_net); - if (rcu_access_pointer(fn->rr_ptr) == iter) - fn->rr_ptr = NULL; - rt6_release(iter); - nsiblings--; - info->nl_net->ipv6.rt6_stats->fib_rt_entries--; - } else { - ins = &iter->rt6_next; - } - iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings, + fib6_siblings) { + iter->fib6_node = NULL; + fib6_purge_rt(iter, fn, info->nl_net); + if (rcu_access_pointer(fn->rr_ptr) == iter) + fn->rr_ptr = NULL; + fib6_info_release(iter); + + rt->fib6_nsiblings--; + info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } - WARN_ON(nsiblings != 0); } + + WARN_ON(rt->fib6_nsiblings != 0); + + rt->fib6_node = NULL; + fib6_purge_rt(rt, fn, info->nl_net); + if (rcu_access_pointer(fn->rr_ptr) == rt) + fn->rr_ptr = NULL; + fib6_info_release(rt); } return 0; } -static void fib6_start_gc(struct net *net, struct rt6_info *rt) +static void fib6_start_gc(struct net *net, struct fib6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) + (rt->fib6_flags & RTF_EXPIRES)) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -1108,22 +1148,22 @@ void fib6_force_start_gc(struct net *net) jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } -static void __fib6_update_sernum_upto_root(struct rt6_info *rt, +static void __fib6_update_sernum_upto_root(struct fib6_info *rt, int sernum) { - struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); /* paired with smp_rmb() in rt6_get_cookie_safe() */ smp_wmb(); while (fn) { fn->fn_sernum = sernum; fn = rcu_dereference_protected(fn->parent, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); + lockdep_is_held(&rt->fib6_table->tb6_lock)); } } -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) { __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); } @@ -1135,22 +1175,16 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) * Need to own table->tb6_lock */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack) +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { - struct fib6_table *table = rt->rt6i_table; + struct fib6_table *table = rt->fib6_table; struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; int allow_create = 1; int replace_required = 0; int sernum = fib6_new_sernum(info->nl_net); - if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) - return -EINVAL; - if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) - return -EINVAL; - if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) allow_create = 0; @@ -1161,8 +1195,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); fn = fib6_add_1(info->nl_net, table, root, - &rt->rt6i_dst.addr, rt->rt6i_dst.plen, - offsetof(struct rt6_info, rt6i_dst), allow_create, + &rt->fib6_dst.addr, rt->fib6_dst.plen, + offsetof(struct fib6_info, fib6_dst), allow_create, replace_required, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); @@ -1173,7 +1207,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, pn = fn; #ifdef CONFIG_IPV6_SUBTREES - if (rt->rt6i_src.plen) { + if (rt->fib6_src.plen) { struct fib6_node *sn; if (!rcu_access_pointer(fn->subtree)) { @@ -1194,16 +1228,16 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!sfn) goto failure; - atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); + atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref); rcu_assign_pointer(sfn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); sfn->fn_flags = RTN_ROOT; /* Now add the first leaf node to new subtree */ sn = fib6_add_1(info->nl_net, table, sfn, - &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + &rt->fib6_src.addr, rt->fib6_src.plen, + offsetof(struct fib6_info, fib6_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1221,8 +1255,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, rcu_assign_pointer(fn->subtree, sfn); } else { sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn), - &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + &rt->fib6_src.addr, rt->fib6_src.plen, + offsetof(struct fib6_info, fib6_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1235,9 +1269,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (fn->fn_flags & RTN_TL_ROOT) { /* put back null_entry for root node */ rcu_assign_pointer(fn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); } else { - atomic_inc(&rt->rt6i_ref); + atomic_inc(&rt->fib6_ref); rcu_assign_pointer(fn->leaf, rt); } } @@ -1245,7 +1279,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, info, mxc, extack); + err = fib6_add_rt2node(fn, rt, info, extack); if (!err) { __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); @@ -1259,13 +1293,13 @@ out: * super-tree leaf node we have to find a new one for it. */ if (pn != fn) { - struct rt6_info *pn_leaf = + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); if (pn_leaf == rt) { pn_leaf = NULL; RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); + fib6_info_release(rt); } if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, @@ -1274,10 +1308,10 @@ out: if (!pn_leaf) { WARN_ON(!pn_leaf); pn_leaf = - info->nl_net->ipv6.ip6_null_entry; + info->nl_net->ipv6.fib6_null_entry; } #endif - atomic_inc(&pn_leaf->rt6i_ref); + fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } } @@ -1299,10 +1333,6 @@ failure: (fn->fn_flags & RTN_TL_ROOT && !rcu_access_pointer(fn->leaf)))) fib6_repair_tree(info->nl_net, table, fn); - /* Always release dst as dst->__refcnt is guaranteed - * to be taken before entering this function - */ - dst_release_immediate(&rt->dst); return err; } @@ -1312,12 +1342,12 @@ failure: */ struct lookup_args { - int offset; /* key offset on rt6_info */ + int offset; /* key offset on fib6_info */ const struct in6_addr *addr; /* search key */ }; -static struct fib6_node *fib6_lookup_1(struct fib6_node *root, - struct lookup_args *args) +static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root, + struct lookup_args *args) { struct fib6_node *fn; __be32 dir; @@ -1350,7 +1380,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, struct fib6_node *subtree = FIB6_SUBTREE(fn); if (subtree || fn->fn_flags & RTN_RTINFO) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; if (!leaf) @@ -1362,7 +1392,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (subtree) { struct fib6_node *sfn; - sfn = fib6_lookup_1(subtree, args + 1); + sfn = fib6_node_lookup_1(subtree, + args + 1); if (!sfn) goto backtrack; fn = sfn; @@ -1384,18 +1415,19 @@ backtrack: /* called with rcu_read_lock() held */ -struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, - const struct in6_addr *saddr) +struct fib6_node *fib6_node_lookup(struct fib6_node *root, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct fib6_node *fn; struct lookup_args args[] = { { - .offset = offsetof(struct rt6_info, rt6i_dst), + .offset = offsetof(struct fib6_info, fib6_dst), .addr = daddr, }, #ifdef CONFIG_IPV6_SUBTREES { - .offset = offsetof(struct rt6_info, rt6i_src), + .offset = offsetof(struct fib6_info, fib6_src), .addr = saddr, }, #endif @@ -1404,7 +1436,7 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad } }; - fn = fib6_lookup_1(root, daddr ? args : args + 1); + fn = fib6_node_lookup_1(root, daddr ? args : args + 1); if (!fn || fn->fn_flags & RTN_TL_ROOT) fn = root; @@ -1431,7 +1463,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; /* This node is being deleted */ @@ -1480,7 +1512,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, struct fib6_node *fn; fn = fib6_locate_1(root, daddr, dst_len, - offsetof(struct rt6_info, rt6i_dst), + offsetof(struct fib6_info, fib6_dst), exact_match); #ifdef CONFIG_IPV6_SUBTREES @@ -1491,7 +1523,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, if (subtree) { fn = fib6_locate_1(subtree, saddr, src_len, - offsetof(struct rt6_info, rt6i_src), + offsetof(struct fib6_info, fib6_src), exact_match); } } @@ -1510,14 +1542,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root, * */ -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn) { struct fib6_node *child_left, *child_right; if (fn->fn_flags & RTN_ROOT) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; while (fn) { child_left = rcu_dereference_protected(fn->left, @@ -1554,7 +1586,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, /* Set fn->leaf to null_entry for root node. */ if (fn->fn_flags & RTN_TL_ROOT) { - rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry); + rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry); return fn; } @@ -1569,11 +1601,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net, lockdep_is_held(&table->tb6_lock)); struct fib6_node *pn_l = rcu_dereference_protected(pn->left, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_fn_leaf; + struct fib6_info *new_fn_leaf; RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; @@ -1599,10 +1631,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, #if RT6_DEBUG >= 2 if (!new_fn_leaf) { WARN_ON(!new_fn_leaf); - new_fn_leaf = net->ipv6.ip6_null_entry; + new_fn_leaf = net->ipv6.fib6_null_entry; } #endif - atomic_inc(&new_fn_leaf->rt6i_ref); + fib6_info_hold(new_fn_leaf); rcu_assign_pointer(fn->leaf, new_fn_leaf); return pn; } @@ -1658,26 +1690,24 @@ static struct fib6_node *fib6_repair_tree(struct net *net, return pn; RCU_INIT_POINTER(pn->leaf, NULL); - rt6_release(pn_leaf); + fib6_info_release(pn_leaf); fn = pn; } } static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, - struct rt6_info __rcu **rtp, struct nl_info *info) + struct fib6_info __rcu **rtp, struct nl_info *info) { struct fib6_walker *w; - struct rt6_info *rt = rcu_dereference_protected(*rtp, + struct fib6_info *rt = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); - WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); - /* Unlink it */ - *rtp = rt->rt6_next; - rt->rt6i_node = NULL; + *rtp = rt->fib6_next; + rt->fib6_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@ -1689,14 +1719,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, fn->rr_ptr = NULL; /* Remove this entry from other siblings */ - if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + if (rt->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; list_for_each_entry_safe(sibling, next_sibling, - &rt->rt6i_siblings, rt6i_siblings) - sibling->rt6i_nsiblings--; - rt->rt6i_nsiblings = 0; - list_del_init(&rt->rt6i_siblings); + &rt->fib6_siblings, fib6_siblings) + sibling->fib6_nsiblings--; + rt->fib6_nsiblings = 0; + list_del_init(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } @@ -1705,7 +1735,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, FOR_WALKERS(net, w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rcu_dereference_protected(rt->rt6_next, + w->leaf = rcu_dereference_protected(rt->fib6_next, lockdep_is_held(&table->tb6_lock)); if (!w->leaf) w->state = FWS_U; @@ -1730,46 +1760,36 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); - rt6_release(rt); + fib6_info_release(rt); } /* Need to own table->tb6_lock */ -int fib6_del(struct rt6_info *rt, struct nl_info *info) +int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&rt->rt6i_table->tb6_lock)); - struct fib6_table *table = rt->rt6i_table; + struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&rt->fib6_table->tb6_lock)); + struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; - struct rt6_info __rcu **rtp; - struct rt6_info __rcu **rtp_next; + struct fib6_info __rcu **rtp; + struct fib6_info __rcu **rtp_next; -#if RT6_DEBUG >= 2 - if (rt->dst.obsolete > 0) { - WARN_ON(fn); - return -ENOENT; - } -#endif - if (!fn || rt == net->ipv6.ip6_null_entry) + if (!fn || rt == net->ipv6.fib6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - /* remove cached dst from exception table */ - if (rt->rt6i_flags & RTF_CACHE) - return rt6_remove_exception_rt(rt); - /* * Walk the leaf entries looking for ourself */ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) { - struct rt6_info *cur = rcu_dereference_protected(*rtp, + struct fib6_info *cur = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); if (rt == cur) { fib6_del_route(table, fn, rtp, info); return 0; } - rtp_next = &cur->rt6_next; + rtp_next = &cur->fib6_next; } return -ENOENT; } @@ -1907,7 +1927,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w) static int fib6_clean_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, @@ -1932,17 +1952,17 @@ static int fib6_clean_node(struct fib6_walker *w) #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", __func__, rt, - rcu_access_pointer(rt->rt6i_node), + rcu_access_pointer(rt->fib6_node), res); #endif continue; } return 0; } else if (res == -2) { - if (WARN_ON(!rt->rt6i_nsiblings)) + if (WARN_ON(!rt->fib6_nsiblings)) continue; - rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, rt6i_siblings); + rt = list_last_entry(&rt->fib6_siblings, + struct fib6_info, fib6_siblings); continue; } WARN_ON(res != 0); @@ -1961,7 +1981,7 @@ static int fib6_clean_node(struct fib6_walker *w) */ static void fib6_clean_tree(struct net *net, struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), + int (*func)(struct fib6_info *, void *arg), int sernum, void *arg) { struct fib6_cleaner c; @@ -1979,7 +1999,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } static void __fib6_clean_all(struct net *net, - int (*func)(struct rt6_info *, void *), + int (*func)(struct fib6_info *, void *), int sernum, void *arg) { struct fib6_table *table; @@ -1999,7 +2019,7 @@ static void __fib6_clean_all(struct net *net, rcu_read_unlock(); } -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), void *arg) { __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); @@ -2016,7 +2036,7 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct rt6_info *rt, void *arg) +static int fib6_age(struct fib6_info *rt, void *arg) { struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; @@ -2026,8 +2046,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) * Routes are expired even if they are in use. */ - if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { - if (time_after(now, rt->dst.expires)) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { + if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; } @@ -2110,7 +2130,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); @@ -2122,7 +2142,7 @@ static int __net_init fib6_net_init(struct net *net) goto out_fib6_main_tbl; net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); @@ -2211,25 +2231,26 @@ void fib6_gc_cleanup(void) #ifdef CONFIG_PROC_FS static int ipv6_route_seq_show(struct seq_file *seq, void *v) { - struct rt6_info *rt = v; + struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + const struct net_device *dev; - seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); + seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen); #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->rt6i_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &rt->rt6i_gateway); + if (rt->fib6_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); else seq_puts(seq, "00000000000000000000000000000000"); + dev = rt->fib6_nh.nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); + rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, + rt->fib6_flags, dev ? dev->name : ""); iter->w.leaf = NULL; return 0; } @@ -2243,7 +2264,7 @@ static int ipv6_route_yield(struct fib6_walker *w) do { iter->w.leaf = rcu_dereference_protected( - iter->w.leaf->rt6_next, + iter->w.leaf->fib6_next, lockdep_is_held(&iter->tbl->tb6_lock)); iter->skip--; if (!iter->skip && iter->w.leaf) @@ -2302,14 +2323,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int r; - struct rt6_info *n; + struct fib6_info *n; struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; if (!v) goto iter_table; - n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next); + n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next); if (n) { ++*pos; return n; |