summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--net/core/dev.c28
-rw-r--r--net/ipv4/fib_frontend.c12
-rw-r--r--net/ipv4/fib_semantics.c50
-rw-r--r--net/ipv4/route.c7
6 files changed, 96 insertions, 9 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c7861e4b402c..d837dad24b4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2458,6 +2458,13 @@ struct netdev_notifier_info {
struct netlink_ext_ack *extack;
};
+struct netdev_notifier_info_ext {
+ struct netdev_notifier_info info; /* must be first */
+ union {
+ u32 mtu;
+ } ext;
+};
+
struct netdev_notifier_change_info {
struct netdev_notifier_info info; /* must be first */
unsigned int flags_changed;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 69c91d1934c1..c9b7b136939d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
int fib_sync_down_addr(struct net_device *dev, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
diff --git a/net/core/dev.c b/net/core/dev.c
index 82114e1111e6..93243479085f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1752,6 +1752,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
+/**
+ * call_netdevice_notifiers_mtu - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
+ * @arg: additional u32 argument passed to the notifier function
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for raw_notifier_call_chain().
+ */
+static int call_netdevice_notifiers_mtu(unsigned long val,
+ struct net_device *dev, u32 arg)
+{
+ struct netdev_notifier_info_ext info = {
+ .info.dev = dev,
+ .ext.mtu = arg,
+ };
+
+ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
+
+ return call_netdevice_notifiers_info(val, &info.info);
+}
+
#ifdef CONFIG_NET_INGRESS
static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
@@ -7574,14 +7596,16 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
err = __dev_set_mtu(dev, new_mtu);
if (!err) {
- err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ orig_mtu);
err = notifier_to_errno(err);
if (err) {
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
__dev_set_mtu(dev, orig_mtu);
- call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ new_mtu);
}
}
return err;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 2998b0e47d4b..0113993e9b2c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *upper_info = ptr;
+ struct netdev_notifier_info_ext *info_ext = ptr;
struct in_device *in_dev;
struct net *net = dev_net(dev);
unsigned int flags;
@@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_sync_up(dev, RTNH_F_LINKDOWN);
else
fib_sync_down_dev(dev, event, false);
- /* fall through */
+ rt_cache_flush(net);
+ break;
case NETDEV_CHANGEMTU:
+ fib_sync_mtu(dev, info_ext->ext.mtu);
rt_cache_flush(net);
break;
case NETDEV_CHANGEUPPER:
- info = ptr;
+ upper_info = ptr;
/* flush all routes if dev is linked to or unlinked from
* an L3 master device (e.g., VRF)
*/
- if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ if (upper_info->upper_dev &&
+ netif_is_l3_master(upper_info->upper_dev))
fib_disable_ip(dev, NETDEV_DOWN, true);
break;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f3c89ccf14c5..446204ca7406 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
return NOTIFY_DONE;
}
+/* Update the PMTU of exceptions when:
+ * - the new MTU of the first hop becomes smaller than the PMTU
+ * - the old MTU was the same as the PMTU, and it limited discovery of
+ * larger MTUs on the path. With that limit raised, we can now
+ * discover larger MTUs
+ * A special case is locked exceptions, for which the PMTU is smaller
+ * than the minimal accepted PMTU:
+ * - if the new MTU is greater than the PMTU, don't make any change
+ * - otherwise, unlock and set PMTU
+ */
+static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+{
+ struct fnhe_hash_bucket *bucket;
+ int i;
+
+ bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+ if (!bucket)
+ return;
+
+ for (i = 0; i < FNHE_HASH_SIZE; i++) {
+ struct fib_nh_exception *fnhe;
+
+ for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
+ fnhe;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
+ if (fnhe->fnhe_mtu_locked) {
+ if (new <= fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ fnhe->fnhe_mtu_locked = false;
+ }
+ } else if (new < fnhe->fnhe_pmtu ||
+ orig == fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ }
+ }
+ }
+}
+
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+ unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+ struct hlist_head *head = &fib_info_devhash[hash];
+ struct fib_nh *nh;
+
+ hlist_for_each_entry(nh, head, nh_hash) {
+ if (nh->nh_dev == dev)
+ nh_update_mtu(nh, dev->mtu, orig_mtu);
+ }
+}
+
/* Event force Flags Description
* NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b678466da451..8501554e96a4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1001,21 +1001,22 @@ out: kfree_skb(skb);
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
+ u32 old_mtu = ipv4_mtu(dst);
struct fib_result res;
bool lock = false;
if (ip_mtu_locked(dst))
return;
- if (ipv4_mtu(dst) < mtu)
+ if (old_mtu < mtu)
return;
if (mtu < ip_rt_min_pmtu) {
lock = true;
- mtu = ip_rt_min_pmtu;
+ mtu = min(old_mtu, ip_rt_min_pmtu);
}
- if (rt->rt_pmtu == mtu &&
+ if (rt->rt_pmtu == mtu && !lock &&
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
return;