diff options
author | David S. Miller <davem@davemloft.net> | 2023-08-14 08:01:07 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2023-08-14 08:01:07 +0100 |
commit | 76fa36355817705649563f80285ba22b06e456e3 (patch) | |
tree | f2fa93b3512bcb5f296149b7a54a174c18cbff0a | |
parent | afb0c19242a0c9a19fc2013dd1389b553acc0ede (diff) | |
parent | 4242029164d6e675d4695e3420399eece29ec599 (diff) |
Merge branch 'ovs-drop-reasons'
Adrian Moreno says:
====================
openvswitch: add drop reasons
There is currently a gap in drop visibility in the openvswitch module.
This series tries to improve this by adding a new drop reason subsystem
for OVS.
Apart from adding a new drop reasson subsystem and some common drop
reasons, this series takes Eric's preliminary work [1] on adding an
explicit drop action and integrates it into the same subsystem.
A limitation of this series is that it does not report upcall errors.
The reason is that there could be many sources of upcall drops and the
most common one, which is the netlink buffer overflow, cannot be
reported via kfree_skb() because the skb is freed in the netlink layer
(see [2]). Therefore, using a reason for the rare events and not the
common one would be even more misleading. I'd propose we add (in a
follow up patch) a tracepoint to better report upcall errors.
[1] https://lore.kernel.org/netdev/202306300609.tdRdZscy-lkp@intel.com/T/
[2] commit 1100248a5c5c ("openvswitch: Fix double reporting of drops in dropwatch")
---
v4 -> v5:
- Rebased
- Added a helper function to explicitly convert drop reason enum types
v3 -> v4:
- Changed names of errors following Ilya's suggestions
- Moved the ovs-dpctl.py changes from patch 7/7 to 3/7
- Added a test to ensure actions following a drop are rejected
rfc2 -> v3:
- Rebased on top of latest net-next
rfc1 -> rfc2:
- Fail when an explicit drop is not the last
- Added a drop reason for action errors
- Added braces around macros
- Dropped patch that added support for masks in ovs-dpctl.py as it's now
included in Aaron's series [2].
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/dropreason.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/openvswitch.h | 2 | ||||
-rw-r--r-- | net/openvswitch/actions.c | 42 | ||||
-rw-r--r-- | net/openvswitch/conntrack.c | 3 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 16 | ||||
-rw-r--r-- | net/openvswitch/drop.h | 41 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.c | 10 | ||||
-rwxr-xr-x | tools/testing/selftests/net/openvswitch/openvswitch.sh | 102 | ||||
-rw-r--r-- | tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 22 |
9 files changed, 226 insertions, 18 deletions
diff --git a/include/net/dropreason.h b/include/net/dropreason.h index 685fb37df8e8..56cb7be92244 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -23,6 +23,12 @@ enum skb_drop_reason_subsys { */ SKB_DROP_REASON_SUBSYS_MAC80211_MONITOR, + /** + * @SKB_DROP_REASON_SUBSYS_OPENVSWITCH: openvswitch drop reasons, + * see net/openvswitch/drop.h + */ + SKB_DROP_REASON_SUBSYS_OPENVSWITCH, + /** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */ SKB_DROP_REASON_SUBSYS_NUM }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index e94870e77ee9..efc82c318fa2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -965,6 +965,7 @@ struct check_pkt_len_arg { * start of the packet or at the start of the l3 header depending on the value * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. + * @OVS_ACTION_ATTR_DROP: Explicit drop action. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1002,6 +1003,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ + OVS_ACTION_ATTR_DROP, /* u32 error code. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index cab1e02b63e0..fd66014d8a76 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -27,6 +27,7 @@ #include <net/sctp/checksum.h> #include "datapath.h" +#include "drop.h" #include "flow.h" #include "conntrack.h" #include "vport.h" @@ -781,7 +782,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); return -ENOMEM; } @@ -852,6 +853,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, struct sk_buff *skb, u16 mru, struct sw_flow_key *key) { + enum ovs_drop_reason reason; u16 orig_network_offset = 0; if (eth_p_mpls(skb->protocol)) { @@ -861,6 +863,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); + reason = OVS_DROP_FRAG_L2_TOO_LONG; goto err; } @@ -901,12 +904,13 @@ static void ovs_fragment(struct net *net, struct vport *vport, WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", ovs_vport_name(vport), ntohs(key->eth.type), mru, vport->dev->mtu); + reason = OVS_DROP_FRAG_INVALID_PROTO; goto err; } return; err: - kfree_skb(skb); + ovs_kfree_skb_reason(skb, reason); } static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, @@ -933,10 +937,10 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, ovs_fragment(net, vport, skb, mru, key); } else { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); } } else { - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY); } } @@ -1010,7 +1014,7 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, return clone_execute(dp, skb, key, 0, nla_data(actions), nla_len(actions), true, false); - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_IP_TTL); return 0; } @@ -1036,7 +1040,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb, if ((arg->probability != U32_MAX) && (!arg->probability || get_random_u32() > arg->probability)) { if (last) - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); return 0; } @@ -1297,6 +1301,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, if (trace_ovs_do_execute_action_enabled()) trace_ovs_do_execute_action(dp, skb, key, a, rem); + /* Actions that rightfully have to consume the skb should do it + * and return directly. + */ switch (nla_type(a)) { case OVS_ACTION_ATTR_OUTPUT: { int port = nla_get_u32(a); @@ -1332,6 +1339,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, output_userspace(dp, skb, key, a, attr, len, OVS_CB(skb)->cutlen); OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } break; case OVS_ACTION_ATTR_HASH: @@ -1446,7 +1457,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_METER: if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_METER); return 0; } break; @@ -1477,15 +1488,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return dec_ttl_exception_handler(dp, skb, key, a); break; + + case OVS_ACTION_ATTR_DROP: { + enum ovs_drop_reason reason = nla_get_u32(a) + ? OVS_DROP_EXPLICIT_WITH_ERROR + : OVS_DROP_EXPLICIT; + + ovs_kfree_skb_reason(skb, reason); + return 0; + } } if (unlikely(err)) { - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_ACTION_ERROR); return err; } } - consume_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION); return 0; } @@ -1547,7 +1567,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, /* Out of per CPU action FIFO space. Drop the 'skb' and * log an error. */ - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_DEFERRED_LIMIT); if (net_ratelimit()) { if (actions) { /* Sample action */ @@ -1599,7 +1619,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_RECURSION_LIMIT); err = -ENETDOWN; goto out; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fa955e892210..0cfa1e9482e6 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -29,6 +29,7 @@ #include <net/netfilter/nf_conntrack_act_ct.h> #include "datapath.h" +#include "drop.h" #include "conntrack.h" #include "flow.h" #include "flow_netlink.h" @@ -1035,7 +1036,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, skb_push_rcsum(skb, nh_ofs); if (err) - kfree_skb(skb); + ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK); return err; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a6d2a0b1aa21..d33cb739883f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -41,6 +41,7 @@ #include <net/pkt_cls.h> #include "datapath.h" +#include "drop.h" #include "flow.h" #include "flow_table.h" #include "flow_netlink.h" @@ -2702,6 +2703,17 @@ static struct pernet_operations ovs_net_ops = { .size = sizeof(struct ovs_net), }; +static const char * const ovs_drop_reasons[] = { +#define S(x) (#x), + OVS_DROP_REASONS(S) +#undef S +}; + +static struct drop_reason_list drop_reason_list_ovs = { + .reasons = ovs_drop_reasons, + .n_reasons = ARRAY_SIZE(ovs_drop_reasons), +}; + static int __init dp_init(void) { int err; @@ -2743,6 +2755,9 @@ static int __init dp_init(void) if (err < 0) goto error_unreg_netdev; + drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH, + &drop_reason_list_ovs); + return 0; error_unreg_netdev: @@ -2769,6 +2784,7 @@ static void dp_cleanup(void) ovs_netdev_exit(); unregister_netdevice_notifier(&ovs_dp_device_notifier); unregister_pernet_device(&ovs_net_ops); + drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH); rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); diff --git a/net/openvswitch/drop.h b/net/openvswitch/drop.h new file mode 100644 index 000000000000..cedf9b7b5796 --- /dev/null +++ b/net/openvswitch/drop.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * OpenvSwitch drop reason list. + */ + +#ifndef OPENVSWITCH_DROP_H +#define OPENVSWITCH_DROP_H +#include <linux/skbuff.h> +#include <net/dropreason.h> + +#define OVS_DROP_REASONS(R) \ + R(OVS_DROP_LAST_ACTION) \ + R(OVS_DROP_ACTION_ERROR) \ + R(OVS_DROP_EXPLICIT) \ + R(OVS_DROP_EXPLICIT_WITH_ERROR) \ + R(OVS_DROP_METER) \ + R(OVS_DROP_RECURSION_LIMIT) \ + R(OVS_DROP_DEFERRED_LIMIT) \ + R(OVS_DROP_FRAG_L2_TOO_LONG) \ + R(OVS_DROP_FRAG_INVALID_PROTO) \ + R(OVS_DROP_CONNTRACK) \ + R(OVS_DROP_IP_TTL) \ + /* deliberate comment for trailing \ */ + +enum ovs_drop_reason { + __OVS_DROP_REASON = SKB_DROP_REASON_SUBSYS_OPENVSWITCH << + SKB_DROP_REASON_SUBSYS_SHIFT, +#define ENUM(x) x, + OVS_DROP_REASONS(ENUM) +#undef ENUM + + OVS_DROP_MAX, +}; + +static inline void +ovs_kfree_skb_reason(struct sk_buff *skb, enum ovs_drop_reason reason) +{ + kfree_skb_reason(skb, (u32)reason); +} + +#endif /* OPENVSWITCH_DROP_H */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 41116361433d..88965e2068ac 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -38,6 +38,7 @@ #include <net/tun_proto.h> #include <net/erspan.h> +#include "drop.h" #include "flow_netlink.h" struct ovs_len_tbl { @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_RECIRC: case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: + case OVS_ACTION_ATTR_DROP: break; case OVS_ACTION_ATTR_CT: @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) /* Whenever new actions are added, the need to update this * function should be considered. */ - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); if (!actions) return; @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, + [OVS_ACTION_ATTR_DROP] = sizeof(u32), }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3453,6 +3456,11 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_DROP: + if (!nla_is_last(a, rem)) + return -EINVAL; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index dced4f612a78..9c2012d70b08 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -16,7 +16,8 @@ tests=" connect_v4 ip4-xon: Basic ipv4 ping between two NS nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT netlink_checks ovsnl: validate netlink attrs and settings - upcall_interfaces ovs: test the upcall interfaces" + upcall_interfaces ovs: test the upcall interfaces + drop_reason drop: test drop reasons are emitted" info() { [ $VERBOSE = 0 ] || echo $* @@ -141,6 +142,25 @@ ovs_add_flow () { return 0 } +ovs_drop_record_and_run () { + local sbx=$1 + shift + + perf record -a -q -e skb:kfree_skb -o ${ovs_dir}/perf.data $* \ + >> ${ovs_dir}/stdout 2>> ${ovs_dir}/stderr + return $? +} + +ovs_drop_reason_count() +{ + local reason=$1 + + local perf_output=`perf script -i ${ovs_dir}/perf.data -F trace:event,trace` + local pattern="skb:kfree_skb:.*reason: $reason" + + return `echo "$perf_output" | grep "$pattern" | wc -l` +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -155,6 +175,76 @@ usage() { exit 1 } +# drop_reason test +# - drop packets and verify the right drop reason is reported +test_drop_reason() { + which perf >/dev/null 2>&1 || return $ksft_skip + + sbx_add "test_drop_reason" || return $? + + ovs_add_dp "test_drop_reason" dropreason || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_drop_reason" "dropreason" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + # Setup client namespace + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + + # Setup server namespace + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Allow ARP + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + + # Allow client ICMP traffic but drop return path + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" '2' + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' + + ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 + ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + if [[ "$?" -ne "2" ]]; then + info "Did not detect expected drops: $?" + return 1 + fi + + # Drop UDP 6000 traffic with an explicit action and an error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=6000)" \ + 'drop(42)' + # Drop UDP 7000 traffic with an explicit action with no error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=7000)" \ + 'drop(0)' + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 + ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit error drops: $?" + return 1 + fi + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 + ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit drops: $?" + return 1 + fi + + return 0 +} + # arp_ping test # - client has 1500 byte MTU # - server has 1500 byte MTU @@ -393,6 +483,16 @@ test_netlink_checks () { wc -l) == 2 ] || \ return 1 + ERR_MSG="Flow actions may not be safe on all matching packets" + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ + &> /dev/null && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - error not generated" + return 1 + fi return 0 } diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index fbdac15e3134..912dc8c49085 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -301,6 +301,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), + ("OVS_ACTION_ATTR_DROP", "uint32"), ) class ctact(nla): @@ -447,6 +448,8 @@ class ovsactions(nla): print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) elif field[0] == "OVS_ACTION_ATTR_TRUNC": print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop(%d)" % int(self.get_attr(field[0])) elif field[1] == "flag": if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": print_str += "ct_clear" @@ -468,10 +471,21 @@ class ovsactions(nla): while len(actstr) != 0: parsed = False if actstr.startswith("drop"): - # for now, drops have no explicit action, so we - # don't need to set any attributes. The final - # act of the processing chain will just drop the packet - return + # If no reason is provided, the implicit drop is used (i.e no + # action). If some reason is given, an explicit action is used. + actstr, reason = parse_extract_field( + actstr, + "drop(", + "([0-9]+)", + lambda x: int(x, 0), + False, + None, + ) + if reason is not None: + self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) + parsed = True + else: + return elif parse_starts_block(actstr, "^(\d+)", False, True): actstr, output = parse_extract_field( |