diff options
author | David S. Miller <davem@davemloft.net> | 2022-05-06 11:21:34 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-05-06 11:21:34 +0100 |
commit | beb21e3e8e261fc9f11050af17efe6de183b33d3 (patch) | |
tree | 9957cbbdf2728c8575e7d2c1636491b3719cc04a | |
parent | 76a8426959a6a70b0d00158d2d7a8661957878c8 (diff) | |
parent | a7da2a864a4fa63fe39c808eec8abfb76abf1dbd (diff) |
Merge branch 'nfp-flower-rework'
Simon Horman says:
====================
nfp: flower: decap neighbour table rework
Louis Peens says:
This patch series reworks the way in which flow rules that outputs to
OVS internal ports gets handled by the nfp driver.
Previously this made use of a small pre_tun_table, but this only used
destination MAC addresses, and made the implicit assumption that there is
only a single source MAC":"destination MAC" mapping per tunnel. In
hindsight this seems to be a pretty obvious oversight, but this was hidden
in plain sight for quite some time.
This series changes the implementation to make use of the same Neighbour
table for decap that is in use for the tunnel encap solution. It stores
any new Neighbour updates in this table. Previously this path was only
triggered for encapsulation candidates, and the entries were send and
forget, not saved on the host as it is after this series. It also keeps
track of any flow rule that outputs to OVS internal ports (and some
other criteria not worth mentioning here), very similar to how it was
done previously, except now these flows are kept track of in a list.
When a new Neighbour entry gets added this list gets iterated for
potential matches, in which case the table gets updated with a reference
to the flow, and the Neighbour entry on the card gets updated with the
relevant host_ctx. The same happens when a new qualifying flow gets
added - the Neighbour table gets iterated for applicable matches, and
once again the firmware gets updated with the host_ctx when any matches
are found.
Since this also requires a firmware change we add a new capability bit,
and keep the old behaviour in case of older firmware without this bit
set.
This series starts by doing some preparation, then adding the new list
and table entries. Next the functionality to link/unlink these entries
are added, and finally this new functionality is enabled by adding the
DECAP_V2 bit to the driver feature list.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/action.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/main.h | 110 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/metadata.c | 19 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/offload.c | 86 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 502 |
5 files changed, 453 insertions, 267 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1b9421e844a9..0147de405365 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -220,7 +220,8 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, } output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid); } else if (nfp_flower_internal_port_can_offload(app, out_dev)) { - if (!(priv->flower_ext_feats & NFP_FL_FEATS_PRE_TUN_RULES)) { + if (!(priv->flower_ext_feats & NFP_FL_FEATS_PRE_TUN_RULES) && + !(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules not supported in loaded firmware"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index fa902ce2dd82..cb799d18682d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -51,6 +51,7 @@ struct nfp_app; #define NFP_FL_FEATS_VLAN_QINQ BIT(8) #define NFP_FL_FEATS_QOS_PPS BIT(9) #define NFP_FL_FEATS_QOS_METER BIT(10) +#define NFP_FL_FEATS_DECAP_V2 BIT(11) #define NFP_FL_FEATS_HOST_ACK BIT(31) #define NFP_FL_ENABLE_FLOW_MERGE BIT(0) @@ -67,7 +68,8 @@ struct nfp_app; NFP_FL_FEATS_IPV6_TUN | \ NFP_FL_FEATS_VLAN_QINQ | \ NFP_FL_FEATS_QOS_PPS | \ - NFP_FL_FEATS_QOS_METER) + NFP_FL_FEATS_QOS_METER | \ + NFP_FL_FEATS_DECAP_V2) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@ -86,12 +88,8 @@ struct nfp_fl_stats_id { * @offloaded_macs: Hashtable of the offloaded MAC addresses * @ipv4_off_list: List of IPv4 addresses to offload * @ipv6_off_list: List of IPv6 addresses to offload - * @neigh_off_list_v4: List of IPv4 neighbour offloads - * @neigh_off_list_v6: List of IPv6 neighbour offloads * @ipv4_off_lock: Lock for the IPv4 address list * @ipv6_off_lock: Lock for the IPv6 address list - * @neigh_off_lock_v4: Lock for the IPv4 neighbour address list - * @neigh_off_lock_v6: Lock for the IPv6 neighbour address list * @mac_off_ids: IDA to manage id assignment for offloaded MACs * @neigh_nb: Notifier to monitor neighbour state */ @@ -99,17 +97,95 @@ struct nfp_fl_tunnel_offloads { struct rhashtable offloaded_macs; struct list_head ipv4_off_list; struct list_head ipv6_off_list; - struct list_head neigh_off_list_v4; - struct list_head neigh_off_list_v6; struct mutex ipv4_off_lock; struct mutex ipv6_off_lock; - spinlock_t neigh_off_lock_v4; - spinlock_t neigh_off_lock_v6; struct ida mac_off_ids; struct notifier_block neigh_nb; }; /** + * struct nfp_tun_neigh - basic neighbour data + * @dst_addr: Destination MAC address + * @src_addr: Source MAC address + * @port_id: NFP port to output packet on - associated with source IPv4 + */ +struct nfp_tun_neigh { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** + * struct nfp_tun_neigh_ext - extended neighbour data + * @vlan_tpid: VLAN_TPID match field + * @vlan_tci: VLAN_TCI match field + * @host_ctx: Host context ID to be saved here + */ +struct nfp_tun_neigh_ext { + __be16 vlan_tpid; + __be16 vlan_tci; + __be32 host_ctx; +}; + +/** + * struct nfp_tun_neigh_v4 - neighbour/route entry on the NFP for IPv4 + * @dst_ipv4: Destination IPv4 address + * @src_ipv4: Source IPv4 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + */ +struct nfp_tun_neigh_v4 { + __be32 dst_ipv4; + __be32 src_ipv4; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; +}; + +/** + * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP for IPv6 + * @dst_ipv6: Destination IPv6 address + * @src_ipv6: Source IPv6 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + */ +struct nfp_tun_neigh_v6 { + struct in6_addr dst_ipv6; + struct in6_addr src_ipv6; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; +}; + +/** + * struct nfp_neigh_entry + * @neigh_cookie: Cookie for hashtable lookup + * @ht_node: rhash_head entry for hashtable + * @list_head: Needed as member of linked_nn_entries list + * @payload: The neighbour info payload + * @flow: Linked flow rule + * @is_ipv6: Flag to indicate if payload is ipv6 or ipv4 + */ +struct nfp_neigh_entry { + unsigned long neigh_cookie; + struct rhash_head ht_node; + struct list_head list_head; + char *payload; + struct nfp_predt_entry *flow; + bool is_ipv6; +}; + +/** + * struct nfp_predt_entry + * @list_head: List head to attach to predt_list + * @flow_pay: Direct link to flow_payload + * @nn_list: List of linked nfp_neigh_entries + */ +struct nfp_predt_entry { + struct list_head list_head; + struct nfp_fl_payload *flow_pay; + struct list_head nn_list; +}; + +/** * struct nfp_mtu_conf - manage MTU setting * @portnum: NFP port number of repr with requested MTU change * @requested_val: MTU value requested for repr @@ -202,6 +278,9 @@ struct nfp_fl_internal_ports { * @ct_zone_table: Hash table used to store the different zones * @ct_zone_wc: Special zone entry for wildcarded zone matches * @ct_map_table: Hash table used to referennce ct flows + * @predt_list: List to keep track of decap pretun flows + * @neigh_table: Table to keep track of neighbor entries + * @predt_lock: Lock to serialise predt/neigh table updates */ struct nfp_flower_priv { struct nfp_app *app; @@ -241,6 +320,9 @@ struct nfp_flower_priv { struct rhashtable ct_zone_table; struct nfp_fl_ct_zone_entry *ct_zone_wc; struct rhashtable ct_map_table; + struct list_head predt_list; + struct rhashtable neigh_table; + spinlock_t predt_lock; /* Lock to serialise predt/neigh table updates */ }; /** @@ -344,9 +426,14 @@ struct nfp_fl_payload { struct list_head linked_flows; bool in_hw; struct { + struct nfp_predt_entry *predt; struct net_device *dev; + __be16 vlan_tpid; __be16 vlan_tci; __be16 port_idx; + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; + bool is_ipv6; } pre_tun_rule; }; @@ -369,6 +456,7 @@ struct nfp_fl_payload_link { extern const struct rhashtable_params nfp_flower_table_params; extern const struct rhashtable_params merge_table_params; +extern const struct rhashtable_params neigh_table_params; struct nfp_merge_info { u64 parent_ctx; @@ -580,6 +668,10 @@ void nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev); u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app, struct net_device *netdev); +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, struct nfp_fl_payload *flow); int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index f448c5682594..74e1b279c13b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -502,6 +502,12 @@ const struct rhashtable_params nfp_ct_map_params = { .automatic_shrinking = true, }; +const struct rhashtable_params neigh_table_params = { + .key_offset = offsetof(struct nfp_neigh_entry, neigh_cookie), + .head_offset = offsetof(struct nfp_neigh_entry, ht_node), + .key_len = sizeof(unsigned long), +}; + int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, unsigned int host_num_mems) { @@ -530,6 +536,12 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, if (err) goto err_free_ct_zone_table; + err = rhashtable_init(&priv->neigh_table, &neigh_table_params); + if (err) + goto err_free_ct_map_table; + + INIT_LIST_HEAD(&priv->predt_list); + get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed)); /* Init ring buffer and unallocated mask_ids. */ @@ -537,7 +549,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL); if (!priv->mask_ids.mask_id_free_list.buf) - goto err_free_ct_map_table; + goto err_free_neigh_table; priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1; @@ -565,6 +577,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, goto err_free_ring_buf; spin_lock_init(&priv->stats_lock); + spin_lock_init(&priv->predt_lock); return 0; @@ -574,6 +587,8 @@ err_free_last_used: kfree(priv->mask_ids.last_used); err_free_mask_id: kfree(priv->mask_ids.mask_id_free_list.buf); +err_free_neigh_table: + rhashtable_destroy(&priv->neigh_table); err_free_ct_map_table: rhashtable_destroy(&priv->ct_map_table); err_free_ct_zone_table: @@ -700,6 +715,8 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app) rhashtable_free_and_destroy(&priv->ct_map_table, nfp_free_map_table_entry, NULL); + rhashtable_free_and_destroy(&priv->neigh_table, + nfp_check_rhashtable_empty, NULL); kvfree(priv->stats); kfree(priv->mask_ids.mask_id_free_list.buf); kfree(priv->mask_ids.last_used); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 92e8ade4854e..9d65459bdba5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1170,6 +1170,11 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + if (key_layer & NFP_FLOWER_LAYER_IPV6) + flow->pre_tun_rule.is_ipv6 = true; + else + flow->pre_tun_rule.is_ipv6 = false; + /* Skip fields known to exist. */ mask += sizeof(struct nfp_flower_meta_tci); ext += sizeof(struct nfp_flower_meta_tci); @@ -1180,13 +1185,6 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, mask += sizeof(struct nfp_flower_in_port); ext += sizeof(struct nfp_flower_in_port); - /* Ensure destination MAC address matches pre_tun_dev. */ - mac = (struct nfp_flower_mac_mpls *)ext; - if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); - return -EOPNOTSUPP; - } - /* Ensure destination MAC address is fully matched. */ mac = (struct nfp_flower_mac_mpls *)mask; if (!is_broadcast_ether_addr(&mac->mac_dst[0])) { @@ -1194,11 +1192,36 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + /* Ensure source MAC address is fully matched. This is only needed + * for firmware with the DECAP_V2 feature enabled. Don't do this + * for firmware without this feature to keep old behaviour. + */ + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + mac = (struct nfp_flower_mac_mpls *)mask; + if (!is_broadcast_ether_addr(&mac->mac_src[0])) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: source MAC field must not be masked"); + return -EOPNOTSUPP; + } + } + if (mac->mpls_lse) { NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MPLS not supported"); return -EOPNOTSUPP; } + /* Ensure destination MAC address matches pre_tun_dev. */ + mac = (struct nfp_flower_mac_mpls *)ext; + if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); + return -EOPNOTSUPP; + } + + /* Save mac addresses in pre_tun_rule entry for later use */ + memcpy(&flow->pre_tun_rule.loc_mac, &mac->mac_dst[0], ETH_ALEN); + memcpy(&flow->pre_tun_rule.rem_mac, &mac->mac_src[0], ETH_ALEN); + mask += sizeof(struct nfp_flower_mac_mpls); ext += sizeof(struct nfp_flower_mac_mpls); if (key_layer & NFP_FLOWER_LAYER_IPV4 || @@ -1227,17 +1250,21 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, if ((priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_QINQ) { struct nfp_flower_vlan *vlan_tags; + u16 vlan_tpid; u16 vlan_tci; vlan_tags = (struct nfp_flower_vlan *)ext; vlan_tci = be16_to_cpu(vlan_tags->outer_tci); + vlan_tpid = be16_to_cpu(vlan_tags->outer_tpid); vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT; flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(vlan_tpid); vlan = true; } else { flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(0xffff); } } @@ -1362,11 +1389,30 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, goto err_release_metadata; } - if (flow_pay->pre_tun_rule.dev) - err = nfp_flower_xmit_pre_tun_flow(app, flow_pay); - else + if (flow_pay->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = kzalloc(sizeof(*predt), GFP_KERNEL); + if (!predt) { + err = -ENOMEM; + goto err_remove_rhash; + } + predt->flow_pay = flow_pay; + INIT_LIST_HEAD(&predt->nn_list); + spin_lock_bh(&priv->predt_lock); + list_add(&predt->list_head, &priv->predt_list); + flow_pay->pre_tun_rule.predt = predt; + nfp_tun_link_and_update_nn_entries(app, predt); + spin_unlock_bh(&priv->predt_lock); + } else { + err = nfp_flower_xmit_pre_tun_flow(app, flow_pay); + } + } else { err = nfp_flower_xmit_flow(app, flow_pay, NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + } + if (err) goto err_remove_rhash; @@ -1538,11 +1584,25 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, goto err_free_merge_flow; } - if (nfp_flow->pre_tun_rule.dev) - err = nfp_flower_xmit_pre_tun_del_flow(app, nfp_flow); - else + if (nfp_flow->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = nfp_flow->pre_tun_rule.predt; + if (predt) { + spin_lock_bh(&priv->predt_lock); + nfp_tun_unlink_and_update_nn_entries(app, predt); + list_del(&predt->list_head); + spin_unlock_bh(&priv->predt_lock); + kfree(predt); + } + } else { + err = nfp_flower_xmit_pre_tun_del_flow(app, nfp_flow); + } + } else { err = nfp_flower_xmit_flow(app, nfp_flow, NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + } /* Fall through on error. */ err_free_merge_flow: diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index c71bd555f482..9c37ed6943d0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -77,38 +77,6 @@ struct nfp_tun_active_tuns_v6 { }; /** - * struct nfp_tun_neigh - neighbour/route entry on the NFP - * @dst_ipv4: destination IPv4 address - * @src_ipv4: source IPv4 address - * @dst_addr: destination MAC address - * @src_addr: source MAC address - * @port_id: NFP port to output packet on - associated with source IPv4 - */ -struct nfp_tun_neigh { - __be32 dst_ipv4; - __be32 src_ipv4; - u8 dst_addr[ETH_ALEN]; - u8 src_addr[ETH_ALEN]; - __be32 port_id; -}; - -/** - * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP - * @dst_ipv6: destination IPv6 address - * @src_ipv6: source IPv6 address - * @dst_addr: destination MAC address - * @src_addr: source MAC address - * @port_id: NFP port to output packet on - associated with source IPv6 - */ -struct nfp_tun_neigh_v6 { - struct in6_addr dst_ipv6; - struct in6_addr src_ipv6; - u8 dst_addr[ETH_ALEN]; - u8 src_addr[ETH_ALEN]; - __be32 port_id; -}; - -/** * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup * @ingress_port: ingress port of packet that signalled request * @ipv4_addr: destination ipv4 address for route @@ -313,9 +281,15 @@ static int nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, gfp_t flag) { + struct nfp_flower_priv *priv = app->priv; struct sk_buff *skb; unsigned char *msg; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) && + (mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6)) + plen -= sizeof(struct nfp_tun_neigh_ext); + skb = nfp_flower_cmsg_alloc(app, plen, mtype, flag); if (!skb) return -ENOMEM; @@ -327,193 +301,260 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, return 0; } -static bool -__nfp_tun_has_route(struct list_head *route_list, spinlock_t *list_lock, - void *add, int add_len) -{ - struct nfp_offloaded_route *entry; - - spin_lock_bh(list_lock); - list_for_each_entry(entry, route_list, list) - if (!memcmp(entry->ip_add, add, add_len)) { - spin_unlock_bh(list_lock); - return true; - } - spin_unlock_bh(list_lock); - return false; -} - -static int -__nfp_tun_add_route_to_cache(struct list_head *route_list, - spinlock_t *list_lock, void *add, int add_len) -{ - struct nfp_offloaded_route *entry; - - spin_lock_bh(list_lock); - list_for_each_entry(entry, route_list, list) - if (!memcmp(entry->ip_add, add, add_len)) { - spin_unlock_bh(list_lock); - return 0; - } - - entry = kmalloc(struct_size(entry, ip_add, add_len), GFP_ATOMIC); - if (!entry) { - spin_unlock_bh(list_lock); - return -ENOMEM; - } - - memcpy(entry->ip_add, add, add_len); - list_add_tail(&entry->list, route_list); - spin_unlock_bh(list_lock); - - return 0; -} - static void -__nfp_tun_del_route_from_cache(struct list_head *route_list, - spinlock_t *list_lock, void *add, int add_len) +nfp_tun_mutual_link(struct nfp_predt_entry *predt, + struct nfp_neigh_entry *neigh) { - struct nfp_offloaded_route *entry; - - spin_lock_bh(list_lock); - list_for_each_entry(entry, route_list, list) - if (!memcmp(entry->ip_add, add, add_len)) { - list_del(&entry->list); - kfree(entry); - break; - } - spin_unlock_bh(list_lock); -} + struct nfp_fl_payload *flow_pay = predt->flow_pay; + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh *common; -static bool nfp_tun_has_route_v4(struct nfp_app *app, __be32 *ipv4_addr) -{ - struct nfp_flower_priv *priv = app->priv; + if (flow_pay->pre_tun_rule.is_ipv6 != neigh->is_ipv6) + return; - return __nfp_tun_has_route(&priv->tun.neigh_off_list_v4, - &priv->tun.neigh_off_lock_v4, ipv4_addr, - sizeof(*ipv4_addr)); -} + /* In the case of bonding it is possible that there might already + * be a flow linked (as the MAC address gets shared). If a flow + * is already linked just return. + */ + if (neigh->flow) + return; -static bool -nfp_tun_has_route_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) -{ - struct nfp_flower_priv *priv = app->priv; + common = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->common : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->common; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + + if (memcmp(flow_pay->pre_tun_rule.loc_mac, + common->src_addr, ETH_ALEN) || + memcmp(flow_pay->pre_tun_rule.rem_mac, + common->dst_addr, ETH_ALEN)) + return; - return __nfp_tun_has_route(&priv->tun.neigh_off_list_v6, - &priv->tun.neigh_off_lock_v6, ipv6_addr, - sizeof(*ipv6_addr)); + list_add(&neigh->list_head, &predt->nn_list); + neigh->flow = predt; + ext->host_ctx = flow_pay->meta.host_ctx_id; + ext->vlan_tci = flow_pay->pre_tun_rule.vlan_tci; + ext->vlan_tpid = flow_pay->pre_tun_rule.vlan_tpid; } static void -nfp_tun_add_route_to_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +nfp_tun_link_predt_entries(struct nfp_app *app, + struct nfp_neigh_entry *nn_entry) { struct nfp_flower_priv *priv = app->priv; + struct nfp_predt_entry *predt, *tmp; - __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v4, - &priv->tun.neigh_off_lock_v4, ipv4_addr, - sizeof(*ipv4_addr)); + list_for_each_entry_safe(predt, tmp, &priv->predt_list, list_head) { + nfp_tun_mutual_link(predt, nn_entry); + } } -static void -nfp_tun_add_route_to_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) { struct nfp_flower_priv *priv = app->priv; - - __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v6, - &priv->tun.neigh_off_lock_v6, ipv6_addr, - sizeof(*ipv6_addr)); + struct nfp_neigh_entry *nn_entry; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((nn_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(nn_entry)) + continue; + nfp_tun_mutual_link(predt, nn_entry); + neigh_size = nn_entry->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = nn_entry->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void -nfp_tun_del_route_from_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +static void nfp_tun_cleanup_nn_entries(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; - - __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v4, - &priv->tun.neigh_off_lock_v4, ipv4_addr, - sizeof(*ipv4_addr)); + struct nfp_neigh_entry *neigh; + struct nfp_tun_neigh_ext *ext; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((neigh = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(neigh)) + continue; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + + rhashtable_remove_fast(&priv->neigh_table, &neigh->ht_node, + neigh_table_params); + if (neigh->flow) + list_del(&neigh->list_head); + kfree(neigh); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void -nfp_tun_del_route_from_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) { - struct nfp_flower_priv *priv = app->priv; - - __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v6, - &priv->tun.neigh_off_lock_v6, ipv6_addr, - sizeof(*ipv6_addr)); + struct nfp_neigh_entry *neigh, *tmp; + struct nfp_tun_neigh_ext *ext; + size_t neigh_size; + u8 type; + + list_for_each_entry_safe(neigh, tmp, &predt->nn_list, list_head) { + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + neigh->flow = NULL; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + list_del(&neigh->list_head); + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + } } static void -nfp_tun_write_neigh_v4(struct net_device *netdev, struct nfp_app *app, - struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) +nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, + void *flow, struct neighbour *neigh, bool is_ipv6) { - struct nfp_tun_neigh payload; + bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead; + size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + unsigned long cookie = (unsigned long)neigh; + struct nfp_flower_priv *priv = app->priv; + struct nfp_neigh_entry *nn_entry; u32 port_id; + u8 mtype; port_id = nfp_flower_get_port_id_from_netdev(app, netdev); if (!port_id) return; - memset(&payload, 0, sizeof(struct nfp_tun_neigh)); - payload.dst_ipv4 = flow->daddr; + spin_lock_bh(&priv->predt_lock); + nn_entry = rhashtable_lookup_fast(&priv->neigh_table, &cookie, + neigh_table_params); + if (!nn_entry && !neigh_invalid) { + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh *common; + + nn_entry = kzalloc(sizeof(*nn_entry) + neigh_size, + GFP_ATOMIC); + if (!nn_entry) + goto err; + + nn_entry->payload = (char *)&nn_entry[1]; + nn_entry->neigh_cookie = cookie; + nn_entry->is_ipv6 = is_ipv6; + nn_entry->flow = NULL; + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + payload->src_ipv6 = flowi6->saddr; + payload->dst_ipv6 = flowi6->daddr; + common = &payload->common; + ext = &payload->ext; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; + + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + payload->src_ipv4 = flowi4->saddr; + payload->dst_ipv4 = flowi4->daddr; + common = &payload->common; + ext = &payload->ext; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + ether_addr_copy(common->src_addr, netdev->dev_addr); + neigh_ha_snapshot(common->dst_addr, neigh, netdev); + common->port_id = cpu_to_be32(port_id); + + if (rhashtable_insert_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params)) + goto err; + + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } else if (nn_entry && neigh_invalid) { + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v6)); + payload->dst_ipv6 = flowi6->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; - /* If entry has expired send dst IP with all other fields 0. */ - if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { - nfp_tun_del_route_from_cache_v4(app, &payload.dst_ipv4); + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v4)); + payload->dst_ipv4 = flowi4->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } /* Trigger ARP to verify invalid neighbour state. */ neigh_event_send(neigh, NULL); - goto send_msg; - } + rhashtable_remove_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params); - /* Have a valid neighbour so populate rest of entry. */ - payload.src_ipv4 = flow->saddr; - ether_addr_copy(payload.src_addr, netdev->dev_addr); - neigh_ha_snapshot(payload.dst_addr, neigh, netdev); - payload.port_id = cpu_to_be32(port_id); - /* Add destination of new route to NFP cache. */ - nfp_tun_add_route_to_cache_v4(app, &payload.dst_ipv4); - -send_msg: - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, - sizeof(struct nfp_tun_neigh), - (unsigned char *)&payload, flag); -} + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); -static void -nfp_tun_write_neigh_v6(struct net_device *netdev, struct nfp_app *app, - struct flowi6 *flow, struct neighbour *neigh, gfp_t flag) -{ - struct nfp_tun_neigh_v6 payload; - u32 port_id; - - port_id = nfp_flower_get_port_id_from_netdev(app, netdev); - if (!port_id) - return; - - memset(&payload, 0, sizeof(struct nfp_tun_neigh_v6)); - payload.dst_ipv6 = flow->daddr; - - /* If entry has expired send dst IP with all other fields 0. */ - if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { - nfp_tun_del_route_from_cache_v6(app, &payload.dst_ipv6); - /* Trigger probe to verify invalid neighbour state. */ - neigh_event_send(neigh, NULL); - goto send_msg; + if (nn_entry->flow) + list_del(&nn_entry->list_head); + kfree(nn_entry); } - /* Have a valid neighbour so populate rest of entry. */ - payload.src_ipv6 = flow->saddr; - ether_addr_copy(payload.src_addr, netdev->dev_addr); - neigh_ha_snapshot(payload.dst_addr, neigh, netdev); - payload.port_id = cpu_to_be32(port_id); - /* Add destination of new route to NFP cache. */ - nfp_tun_add_route_to_cache_v6(app, &payload.dst_ipv6); - -send_msg: - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, - sizeof(struct nfp_tun_neigh_v6), - (unsigned char *)&payload, flag); + spin_unlock_bh(&priv->predt_lock); + return; + +err: + kfree(nn_entry); + spin_unlock_bh(&priv->predt_lock); + nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n"); } static int @@ -526,7 +567,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, struct flowi6 flow6 = {}; struct neighbour *n; struct nfp_app *app; - struct rtable *rt; + bool neigh_invalid; bool ipv6 = false; int err; @@ -545,6 +586,8 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, if (n->tbl->family == AF_INET6) ipv6 = true; + neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; + if (ipv6) flow6.daddr = *(struct in6_addr *)n->primary_key; else @@ -557,38 +600,45 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, !nfp_flower_internal_port_can_offload(app, n->dev)) return NOTIFY_DONE; - /* Only concerned with changes to routes already added to NFP. */ - if ((ipv6 && !nfp_tun_has_route_v6(app, &flow6.daddr)) || - (!ipv6 && !nfp_tun_has_route_v4(app, &flow4.daddr))) - return NOTIFY_DONE; - #if IS_ENABLED(CONFIG_INET) if (ipv6) { #if IS_ENABLED(CONFIG_IPV6) - struct dst_entry *dst; - - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(n->dev), NULL, - &flow6, NULL); - if (IS_ERR(dst)) - return NOTIFY_DONE; - - dst_release(dst); - flow6.flowi6_proto = IPPROTO_UDP; - nfp_tun_write_neigh_v6(n->dev, app, &flow6, n, GFP_ATOMIC); + if (!neigh_invalid) { + struct dst_entry *dst; + /* Use ipv6_dst_lookup_flow to populate flow6->saddr + * and other fields. This information is only needed + * for new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL, + &flow6, NULL); + if (IS_ERR(dst)) + return NOTIFY_DONE; + + dst_release(dst); + } + nfp_tun_write_neigh(n->dev, app, &flow6, n, true); #else return NOTIFY_DONE; #endif /* CONFIG_IPV6 */ } else { - /* Do a route lookup to populate flow data. */ - rt = ip_route_output_key(dev_net(n->dev), &flow4); - err = PTR_ERR_OR_ZERO(rt); - if (err) - return NOTIFY_DONE; - - ip_rt_put(rt); - - flow4.flowi4_proto = IPPROTO_UDP; - nfp_tun_write_neigh_v4(n->dev, app, &flow4, n, GFP_ATOMIC); + if (!neigh_invalid) { + struct rtable *rt; + /* Use ip_route_output_key to populate flow4->saddr and + * other fields. This information is only needed for + * new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + rt = ip_route_output_key(dev_net(n->dev), &flow4); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; + + ip_rt_put(rt); + } + nfp_tun_write_neigh(n->dev, app, &flow4, n, false); } #else return NOTIFY_DONE; @@ -631,7 +681,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh_v4(n->dev, app, &flow, n, GFP_ATOMIC); + nfp_tun_write_neigh(n->dev, app, &flow, n, false); neigh_release(n); rcu_read_unlock(); return; @@ -673,7 +723,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh_v6(n->dev, app, &flow, n, GFP_ATOMIC); + nfp_tun_write_neigh(n->dev, app, &flow, n, true); neigh_release(n); rcu_read_unlock(); return; @@ -1368,10 +1418,6 @@ int nfp_tunnel_config_start(struct nfp_app *app) INIT_LIST_HEAD(&priv->tun.ipv6_off_list); /* Initialise priv data for neighbour offloading. */ - spin_lock_init(&priv->tun.neigh_off_lock_v4); - INIT_LIST_HEAD(&priv->tun.neigh_off_list_v4); - spin_lock_init(&priv->tun.neigh_off_lock_v6); - INIT_LIST_HEAD(&priv->tun.neigh_off_list_v6); priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; err = register_netevent_notifier(&priv->tun.neigh_nb); @@ -1386,11 +1432,8 @@ int nfp_tunnel_config_start(struct nfp_app *app) void nfp_tunnel_config_stop(struct nfp_app *app) { - struct nfp_offloaded_route *route_entry, *temp; struct nfp_flower_priv *priv = app->priv; struct nfp_ipv4_addr_entry *ip_entry; - struct nfp_tun_neigh_v6 ipv6_route; - struct nfp_tun_neigh ipv4_route; struct list_head *ptr, *storage; unregister_netevent_notifier(&priv->tun.neigh_nb); @@ -1406,36 +1449,9 @@ void nfp_tunnel_config_stop(struct nfp_app *app) mutex_destroy(&priv->tun.ipv6_off_lock); - /* Free memory in the route list and remove entries from fw cache. */ - list_for_each_entry_safe(route_entry, temp, - &priv->tun.neigh_off_list_v4, list) { - memset(&ipv4_route, 0, sizeof(ipv4_route)); - memcpy(&ipv4_route.dst_ipv4, &route_entry->ip_add, - sizeof(ipv4_route.dst_ipv4)); - list_del(&route_entry->list); - kfree(route_entry); - - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, - sizeof(struct nfp_tun_neigh), - (unsigned char *)&ipv4_route, - GFP_KERNEL); - } - - list_for_each_entry_safe(route_entry, temp, - &priv->tun.neigh_off_list_v6, list) { - memset(&ipv6_route, 0, sizeof(ipv6_route)); - memcpy(&ipv6_route.dst_ipv6, &route_entry->ip_add, - sizeof(ipv6_route.dst_ipv6)); - list_del(&route_entry->list); - kfree(route_entry); - - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, - sizeof(struct nfp_tun_neigh), - (unsigned char *)&ipv6_route, - GFP_KERNEL); - } - /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ rhashtable_free_and_destroy(&priv->tun.offloaded_macs, nfp_check_rhashtable_empty, NULL); + + nfp_tun_cleanup_nn_entries(app); } |