diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 61 |
1 files changed, 39 insertions, 22 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f33ad1f383b6..98c6f3429593 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -84,6 +84,7 @@ #include <linux/jhash.h> #include <net/dst.h> #include <net/dst_metadata.h> +#include <net/inet_dscp.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> @@ -112,14 +113,13 @@ #define DEFAULT_MIN_PMTU (512 + 20 + 20) #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) - +#define DEFAULT_MIN_ADVMSS 256 static int ip_rt_max_size; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; -static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; @@ -458,7 +458,7 @@ static u32 *ip_tstamps __read_mostly; * if one generator is seldom used. This makes hard for an attacker * to infer how many packets were sent between two points in time. */ -u32 ip_idents_reserve(u32 hash, int segs) +static u32 ip_idents_reserve(u32 hash, int segs) { u32 bucket, old, now = (u32)jiffies; atomic_t *p_id; @@ -479,7 +479,6 @@ u32 ip_idents_reserve(u32 hash, int segs) */ return atomic_add_return(segs + delta, p_id) - segs; } -EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { @@ -499,6 +498,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); +static void ip_rt_fix_tos(struct flowi4 *fl4) +{ + __u8 tos = RT_FL_TOS(fl4); + + fl4->flowi4_tos = tos & IPTOS_RT_MASK; + fl4->flowi4_scope = tos & RTO_ONLINK ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; +} + static void __build_flow_key(const struct net *net, struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, @@ -824,6 +832,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); + ip_rt_fix_tos(&fl4); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1048,6 +1057,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); + ip_rt_fix_tos(&fl4); /* Don't make lookup fail for bridged encapsulations */ if (skb && netif_is_any_bridge_port(skb->dev)) @@ -1122,6 +1132,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; new = true; + } else { + ip_rt_fix_tos(&fl4); } __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); @@ -1298,9 +1310,10 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { + struct net *net = dev_net(dst->dev); unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, - ip_rt_min_advmss); + net->ipv4.ip_rt_min_advmss); return min(advmss, IPV4_MAX_PMTU - header_size); } @@ -1485,6 +1498,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) struct uncached_list { spinlock_t lock; struct list_head head; + struct list_head quarantine; }; static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); @@ -1506,7 +1520,7 @@ void rt_del_uncached_list(struct rtable *rt) struct uncached_list *ul = rt->rt_uncached_list; spin_lock_bh(&ul->lock); - list_del(&rt->rt_uncached); + list_del_init(&rt->rt_uncached); spin_unlock_bh(&ul->lock); } } @@ -1521,20 +1535,24 @@ static void ipv4_dst_destroy(struct dst_entry *dst) void rt_flush_dev(struct net_device *dev) { - struct rtable *rt; + struct rtable *rt, *safe; int cpu; for_each_possible_cpu(cpu) { struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); + if (list_empty(&ul->head)) + continue; + spin_lock_bh(&ul->lock); - list_for_each_entry(rt, &ul->head, rt_uncached) { + list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) { if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; dev_replace_track(dev, blackhole_netdev, &rt->dst.dev_tracker, GFP_ATOMIC); + list_move(&rt->rt_uncached, &ul->quarantine); } spin_unlock_bh(&ul->lock); } @@ -2258,6 +2276,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ + fl4.flowi4_l3mdev = 0; fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; @@ -2603,7 +2622,6 @@ add: struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, const struct sk_buff *skb) { - __u8 tos = RT_FL_TOS(fl4); struct fib_result res = { .type = RTN_UNSPEC, .fi = NULL, @@ -2613,9 +2631,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, struct rtable *rth; fl4->flowi4_iif = LOOPBACK_IFINDEX; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + ip_rt_fix_tos(fl4); rcu_read_lock(); rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); @@ -2733,8 +2749,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, res->fi = NULL; res->table = NULL; if (fl4->flowi4_oif && - (ipv4_is_multicast(fl4->daddr) || - !netif_index_is_l3_master(net, fl4->flowi4_oif))) { + (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { /* Apparently, routing tables are wrong. Assume, * that the destination is on link. * @@ -3392,7 +3407,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (fa->fa_slen == slen && fa->tb_id == fri.tb_id && - fa->fa_tos == fri.tos && + fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) && fa->fa_info == res.fi && fa->fa_type == fri.type) { fri.offload = READ_ONCE(fa->offload); @@ -3535,13 +3550,6 @@ static struct ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "min_adv_mss", - .data = &ip_rt_min_advmss, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; @@ -3569,6 +3577,13 @@ static struct ctl_table ipv4_route_netns_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "min_adv_mss", + .data = &init_net.ipv4.ip_rt_min_advmss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { }, }; @@ -3631,6 +3646,7 @@ static __net_init int netns_ip_rt_init(struct net *net) /* Set default value for namespaceified sysctls */ net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; + net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS; return 0; } @@ -3705,6 +3721,7 @@ int __init ip_rt_init(void) struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); INIT_LIST_HEAD(&ul->head); + INIT_LIST_HEAD(&ul->quarantine); spin_lock_init(&ul->lock); } #ifdef CONFIG_IP_ROUTE_CLASSID |