diff options
author | David S. Miller <davem@davemloft.net> | 2021-03-30 13:29:39 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-03-30 13:29:39 -0700 |
commit | 212ee4b52f10aeedc335979c2d81c6ef0c1fce99 (patch) | |
tree | c04bd4f9b2c8d3b4b560c58b47351e36a8e5b3ee | |
parent | fdb5cc6ab3b6a1c0122d3644a63ef9dc7a610d35 (diff) | |
parent | d329ea5bd8845f0b196bf41b18b6173340d6e0e4 (diff) |
Merge branch 'rfc8335-probe'
Andreas Roeseler says:
====================
add support for RFC 8335 PROBE
The popular utility ping has several severe limitations, such as the
inability to query specific interfaces on a node and requiring
bidirectional connectivity between the probing and probed interfaces.
RFC 8335 attempts to solve these limitations by creating the new utility
PROBE which is a specialized ICMP message that makes use of the ICMP
Extension Structure outlined in RFC 4884.
This patchset adds definitions for the ICMP Extended Echo Request and
Reply (PROBE) types for both IPV4 and IPV6, adds a sysctl to enable
responses to PROBE messages, expands the list of supported ICMP messages
to accommodate PROBE types, adds ipv6_dev_find into ipv6_stubs, and adds
functionality to respond to PROBE requests.
Changes:
v1 -> v2:
- Add AFI definitions
- Switch to functions such as dev_get_by_name and ip_dev_find to lookup
net devices
v2 -> v3:
Suggested by Willem de Bruijn <willemdebruijn.kernel@gmail.com>
- Add verification of incoming messages before looking up netdev
- Add prefix for PROBE specific defined variables
- Use proc_dointvec_minmax with zero and one for sysctl
- Create struct icmp_ext_echo_iio for parsing incoming packets
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
- Include net/addrconf.h library for ipv6_dev_find
v3 -> v4:
- Use in_addr instead of __be32 for storing IPV4 addresses
- Use IFNAMSIZ to statically allocate space for name in
icmp_ext_echo_iio
Suggested by Willem de Bruijn <willemdebruijn.kernel@gmail.com>
- Use skb_header_pointer to verify fields in incoming message
- Add check to ensure that extobj_hdr.length is valid
- Check to ensure object payload is padded with ASCII NULL characters
when probing by name, as specified by RFC 8335
- Statically allocate buff using IFNAMSIZ
- Add rcu blocking around ipv6_dev_find
- Use __in_dev_get_rcu to access IPV4 addresses of identified
net_device
- Remove check for ICMPV6 PROBE types
v4 -> v5:
- Statically allocate buff to size IFNAMSIZ on declaration
- Remove goto probe in favor of single branch
- Remove strict check for incoming PROBE request padding to nearest
32-bit boundary
Reported-by: kernel test robot <lkp@intel.com>
v5 -> v6:
- Add documentation for icmp_echo_enable_probe sysctl
- Remove RCU locking around ipv6_dev_find()
- Assign iio based on ctype
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/networking/ip-sysctl.rst | 6 | ||||
-rw-r--r-- | include/net/ipv6_stubs.h | 2 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/icmp.h | 42 | ||||
-rw-r--r-- | include/uapi/linux/icmpv6.h | 3 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 134 | ||||
-rw-r--r-- | net/ipv4/ping.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv6/addrconf_core.c | 7 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 1 |
10 files changed, 195 insertions, 14 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c7952ac5bd2f..4130bce40765 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1143,6 +1143,12 @@ icmp_echo_ignore_all - BOOLEAN Default: 0 +icmp_echo_enable_probe - BOOLEAN + If set to one, then the kernel will respond to RFC 8335 PROBE + requests sent to it. + + Default: 0 + icmp_echo_ignore_broadcasts - BOOLEAN If set non-zero, then the kernel will ignore all ICMP ECHO and TIMESTAMP requests sent to it via broadcast/multicast. diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 8fce558b5fea..afbce90c4480 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -66,6 +66,8 @@ struct ipv6_stub { int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); + struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr, + struct net_device *dev); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d377266d133f..9c8dd424d79b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -84,6 +84,7 @@ struct netns_ipv4 { #endif u8 sysctl_icmp_echo_ignore_all; + u8 sysctl_icmp_echo_enable_probe; u8 sysctl_icmp_echo_ignore_broadcasts; u8 sysctl_icmp_ignore_bogus_error_responses; u8 sysctl_icmp_errors_use_inbound_ifaddr; diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h index fb169a50895e..222325d1d80e 100644 --- a/include/uapi/linux/icmp.h +++ b/include/uapi/linux/icmp.h @@ -20,6 +20,9 @@ #include <linux/types.h> #include <asm/byteorder.h> +#include <linux/in.h> +#include <linux/if.h> +#include <linux/in6.h> #define ICMP_ECHOREPLY 0 /* Echo Reply */ #define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ @@ -66,6 +69,23 @@ #define ICMP_EXC_TTL 0 /* TTL count exceeded */ #define ICMP_EXC_FRAGTIME 1 /* Fragment Reass time exceeded */ +/* Codes for EXT_ECHO (PROBE) */ +#define ICMP_EXT_ECHO 42 +#define ICMP_EXT_ECHOREPLY 43 +#define ICMP_EXT_MAL_QUERY 1 /* Malformed Query */ +#define ICMP_EXT_NO_IF 2 /* No such Interface */ +#define ICMP_EXT_NO_TABLE_ENT 3 /* No such Table Entry */ +#define ICMP_EXT_MULT_IFS 4 /* Multiple Interfaces Satisfy Query */ + +/* Constants for EXT_ECHO (PROBE) */ +#define EXT_ECHOREPLY_ACTIVE (1 << 2)/* active bit in reply message */ +#define EXT_ECHOREPLY_IPV4 (1 << 1)/* ipv4 bit in reply message */ +#define EXT_ECHOREPLY_IPV6 1 /* ipv6 bit in reply message */ +#define EXT_ECHO_CTYPE_NAME 1 +#define EXT_ECHO_CTYPE_INDEX 2 +#define EXT_ECHO_CTYPE_ADDR 3 +#define ICMP_AFI_IP 1 /* Address Family Identifier for ipv4 */ +#define ICMP_AFI_IP6 2 /* Address Family Identifier for ipv6 */ struct icmphdr { __u8 type; @@ -118,4 +138,26 @@ struct icmp_extobj_hdr { __u8 class_type; }; +/* RFC 8335: 2.1 Header for c-type 3 payload */ +struct icmp_ext_echo_ctype3_hdr { + __be16 afi; + __u8 addrlen; + __u8 reserved; +}; + +/* RFC 8335: 2.1 Interface Identification Object */ +struct icmp_ext_echo_iio { + struct icmp_extobj_hdr extobj_hdr; + union { + char name[IFNAMSIZ]; + __be32 ifindex; + struct { + struct icmp_ext_echo_ctype3_hdr ctype3_hdr; + union { + struct in_addr ipv4_addr; + struct in6_addr ipv6_addr; + } ip_addr; + } addr; + } ident; +}; #endif /* _UAPI_LINUX_ICMP_H */ diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 0564fd7ccde4..ecaece3af38d 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -140,6 +140,9 @@ struct icmp6hdr { #define ICMPV6_UNK_OPTION 2 #define ICMPV6_HDR_INCOMP 3 +/* Codes for EXT_ECHO (PROBE) */ +#define ICMPV6_EXT_ECHO_REQUEST 160 +#define ICMPV6_EXT_ECHO_REPLY 161 /* * constants for (set|get)sockopt */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 616e2dc1c8fa..76990e13a2f9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -971,7 +971,7 @@ static bool icmp_redirect(struct sk_buff *skb) } /* - * Handle ICMP_ECHO ("ping") requests. + * Handle ICMP_ECHO ("ping") and ICMP_EXT_ECHO ("PROBE") requests. * * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo * requests. @@ -979,27 +979,125 @@ static bool icmp_redirect(struct sk_buff *skb) * included in the reply. * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring * echo requests, MUST have default=NOT. + * RFC 8335: 8 MUST have a config option to enable/disable ICMP + * Extended Echo Functionality, MUST be disabled by default * See also WRT handling of options once they are done and working. */ static bool icmp_echo(struct sk_buff *skb) { + struct icmp_ext_hdr *ext_hdr, _ext_hdr; + struct icmp_ext_echo_iio *iio, _iio; + struct icmp_bxm icmp_param; + struct net_device *dev; + char buff[IFNAMSIZ]; struct net *net; + u16 ident_len; + u8 status; net = dev_net(skb_dst(skb)->dev); - if (!net->ipv4.sysctl_icmp_echo_ignore_all) { - struct icmp_bxm icmp_param; + /* should there be an ICMP stat for ignored echos? */ + if (net->ipv4.sysctl_icmp_echo_ignore_all) + return true; + + icmp_param.data.icmph = *icmp_hdr(skb); + icmp_param.skb = skb; + icmp_param.offset = 0; + icmp_param.data_len = skb->len; + icmp_param.head_len = sizeof(struct icmphdr); - icmp_param.data.icmph = *icmp_hdr(skb); + if (icmp_param.data.icmph.type == ICMP_ECHO) { icmp_param.data.icmph.type = ICMP_ECHOREPLY; - icmp_param.skb = skb; - icmp_param.offset = 0; - icmp_param.data_len = skb->len; - icmp_param.head_len = sizeof(struct icmphdr); - icmp_reply(&icmp_param, skb); + goto send_reply; } - /* should there be an ICMP stat for ignored echos? */ - return true; + if (!net->ipv4.sysctl_icmp_echo_enable_probe) + return true; + /* We currently only support probing interfaces on the proxy node + * Check to ensure L-bit is set + */ + if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1)) + return true; + /* Clear status bits in reply message */ + icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00); + icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY; + ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr); + /* Size of iio is class_type dependent. + * Only check header here and assign length based on ctype in the switch statement + */ + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio); + if (!ext_hdr || !iio) + goto send_mal_query; + if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr)) + goto send_mal_query; + ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr); + status = 0; + dev = NULL; + switch (iio->extobj_hdr.class_type) { + case EXT_ECHO_CTYPE_NAME: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); + if (ident_len >= IFNAMSIZ) + goto send_mal_query; + memset(buff, 0, sizeof(buff)); + memcpy(buff, &iio->ident.name, ident_len); + dev = dev_get_by_name(net, buff); + break; + case EXT_ECHO_CTYPE_INDEX: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + + sizeof(iio->ident.ifindex), &_iio); + if (ident_len != sizeof(iio->ident.ifindex)) + goto send_mal_query; + dev = dev_get_by_index(net, ntohl(iio->ident.ifindex)); + break; + case EXT_ECHO_CTYPE_ADDR: + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + iio->ident.addr.ctype3_hdr.addrlen) + goto send_mal_query; + switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) { + case ICMP_AFI_IP: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + + sizeof(struct in_addr), &_iio); + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + sizeof(struct in_addr)) + goto send_mal_query; + dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr); + break; +#if IS_ENABLED(CONFIG_IPV6) + case ICMP_AFI_IP6: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + sizeof(struct in6_addr)) + goto send_mal_query; + dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); + if (dev) + dev_hold(dev); + break; +#endif + default: + goto send_mal_query; + } + break; + default: + goto send_mal_query; + } + if (!dev) { + icmp_param.data.icmph.code = ICMP_EXT_NO_IF; + goto send_reply; + } + /* Fill bits in reply message */ + if (dev->flags & IFF_UP) + status |= EXT_ECHOREPLY_ACTIVE; + if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + status |= EXT_ECHOREPLY_IPV4; + if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + status |= EXT_ECHOREPLY_IPV6; + dev_put(dev); + icmp_param.data.icmph.un.echo.sequence |= htons(status); +send_reply: + icmp_reply(&icmp_param, skb); + return true; +send_mal_query: + icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY; + goto send_reply; } /* @@ -1088,6 +1186,16 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); ICMPMSGIN_INC_STATS(net, icmph->type); + + /* Check for ICMP Extended Echo (PROBE) messages */ + if (icmph->type == ICMP_EXT_ECHO) { + /* We can't use icmp_pointers[].handler() because it is an array of + * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42. + */ + success = icmp_echo(skb); + goto success_check; + } + /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1097,7 +1205,6 @@ int icmp_rcv(struct sk_buff *skb) if (icmph->type > NR_ICMP_TYPES) goto error; - /* * Parse the ICMP message */ @@ -1123,7 +1230,7 @@ int icmp_rcv(struct sk_buff *skb) } success = icmp_pointers[icmph->type].handler(skb); - +success_check: if (success) { consume_skb(skb); return NET_RX_SUCCESS; @@ -1340,6 +1447,7 @@ static int __net_init icmp_sk_init(struct net *net) /* Control parameters for ECHO replies. */ net->ipv4.sysctl_icmp_echo_ignore_all = 0; + net->ipv4.sysctl_icmp_echo_enable_probe = 0; net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8b943f85fff9..1c9f71a37258 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -453,7 +453,9 @@ EXPORT_SYMBOL_GPL(ping_bind); static inline int ping_supported(int family, int type, int code) { return (family == AF_INET && type == ICMP_ECHO && code == 0) || - (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); + (family == AF_INET && type == ICMP_EXT_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0) || + (family == AF_INET6 && type == ICMPV6_EXT_ECHO_REQUEST && code == 0); } /* diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3a7e5cf5d6cc..e3cb2d96b55e 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -599,6 +599,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, }, { + .procname = "icmp_echo_enable_probe", + .data = &init_net.ipv4.sysctl_icmp_echo_enable_probe, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, + { .procname = "icmp_echo_ignore_broadcasts", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, .maxlen = sizeof(u8), diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index a36626afbc02..1d4054bb345b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -198,6 +198,12 @@ static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct s return -EAFNOSUPPORT; } +static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr, + struct net_device *dev) +{ + return ERR_PTR(-EAFNOSUPPORT); +} + const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow, .ipv6_route_input = eafnosupport_ipv6_route_input, @@ -209,6 +215,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .fib6_nh_init = eafnosupport_fib6_nh_init, .ip6_del_rt = eafnosupport_ip6_del_rt, .ipv6_fragment = eafnosupport_ipv6_fragment, + .ipv6_dev_find = eafnosupport_ipv6_dev_find, }; EXPORT_SYMBOL_GPL(ipv6_stub); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3c9bacffc9c3..4f7ca5807046 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1032,6 +1032,7 @@ static const struct ipv6_stub ipv6_stub_impl = { #endif .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, + .ipv6_dev_find = ipv6_dev_find, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { |