diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-04-17 18:55:27 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-04-17 18:55:27 -0700 |
commit | 6c829efed5e8163dc13fa0d90c1e535e212fcc25 (patch) | |
tree | 9c28bc5d692a8f2f2b0c987dfec650c2a8bfa0ee | |
parent | 99676a5766412f3936c55b9d18565d248e5463ee (diff) | |
parent | c941da23aaf065a893248943cedb27b0f492b292 (diff) |
Merge branch 'support-tunnel-mode-in-mlx5-ipsec-packet-offload'
Leon Romanovsky says:
====================
Support tunnel mode in mlx5 IPsec packet offload
This series extends mlx5 to support tunnel mode in its IPsec packet
offload implementation.
v0: https://lore.kernel.org/all/cover.1681106636.git.leonro@nvidia.com
====================
Link: https://lore.kernel.org/r/cover.1681388425.git.leonro@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
7 files changed, 481 insertions, 47 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index def01bfde610..5fd609d1120e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -35,12 +35,14 @@ #include <crypto/aead.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> +#include <net/netevent.h> #include "en.h" #include "ipsec.h" #include "ipsec_rxtx.h" #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) +#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) { @@ -242,6 +244,57 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->lft.numb_rounds_soft = (u64)n; } +static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_accel_esp_xfrm_attrs *attrs) +{ + struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct xfrm_state *x = sa_entry->x; + struct net_device *netdev; + struct neighbour *n; + u8 addr[ETH_ALEN]; + + if (attrs->mode != XFRM_MODE_TUNNEL || + attrs->type != XFRM_DEV_OFFLOAD_PACKET) + return; + + netdev = x->xso.real_dev; + + mlx5_query_mac_address(mdev, addr); + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + ether_addr_copy(attrs->dmac, addr); + n = neigh_lookup(&arp_tbl, &attrs->saddr.a4, netdev); + if (!n) { + n = neigh_create(&arp_tbl, &attrs->saddr.a4, netdev); + if (IS_ERR(n)) + return; + neigh_event_send(n, NULL); + attrs->drop = true; + break; + } + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(attrs->smac, addr); + break; + case XFRM_DEV_OFFLOAD_OUT: + ether_addr_copy(attrs->smac, addr); + n = neigh_lookup(&arp_tbl, &attrs->daddr.a4, netdev); + if (!n) { + n = neigh_create(&arp_tbl, &attrs->daddr.a4, netdev); + if (IS_ERR(n)) + return; + neigh_event_send(n, NULL); + attrs->drop = true; + break; + } + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(attrs->dmac, addr); + break; + default: + return; + } + neigh_release(n); +} + void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { @@ -297,8 +350,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->upspec.sport = ntohs(x->sel.sport); attrs->upspec.sport_mask = ntohs(x->sel.sport_mask); attrs->upspec.proto = x->sel.proto; + attrs->mode = x->props.mode; mlx5e_ipsec_init_limits(sa_entry, attrs); + mlx5e_ipsec_init_macs(sa_entry, attrs); } static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, @@ -367,6 +422,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } + if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); + return -EINVAL; + } + switch (x->xso.type) { case XFRM_DEV_OFFLOAD_CRYPTO: if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) { @@ -374,11 +434,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->props.mode != XFRM_MODE_TRANSPORT && - x->props.mode != XFRM_MODE_TUNNEL) { - NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); - return -EINVAL; - } break; case XFRM_DEV_OFFLOAD_PACKET: if (!(mlx5_ipsec_device_caps(mdev) & @@ -387,8 +442,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return -EINVAL; } - if (x->props.mode != XFRM_MODE_TRANSPORT) { - NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode"); + if (x->props.mode == XFRM_MODE_TUNNEL && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode"); return -EINVAL; } @@ -458,34 +514,81 @@ static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->set_iv_op = mlx5e_ipsec_set_iv; } +static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work) +{ + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5e_ipsec_netevent_data *data = work->data; + struct mlx5_accel_esp_xfrm_attrs *attrs; + + attrs = &sa_entry->attrs; + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + ether_addr_copy(attrs->smac, data->addr); + break; + case XFRM_DEV_OFFLOAD_OUT: + ether_addr_copy(attrs->dmac, data->addr); + break; + default: + WARN_ON_ONCE(true); + } + attrs->drop = false; + mlx5e_accel_ipsec_fs_modify(sa_entry); +} + static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) { struct xfrm_state *x = sa_entry->x; struct mlx5e_ipsec_work *work; + void *data = NULL; switch (x->xso.type) { case XFRM_DEV_OFFLOAD_CRYPTO: if (!(x->props.flags & XFRM_STATE_ESN)) return 0; break; + case XFRM_DEV_OFFLOAD_PACKET: + if (x->props.mode != XFRM_MODE_TUNNEL) + return 0; + break; default: - return 0; + break; } work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) return -ENOMEM; - work->data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); - if (!work->data) { - kfree(work); - return -ENOMEM; + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_modify_state); + break; + case XFRM_DEV_OFFLOAD_PACKET: + data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data), + GFP_KERNEL); + if (!data) + goto free_work; + + INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event); + break; + default: + break; } - INIT_WORK(&work->work, mlx5e_ipsec_modify_state); + work->data = data; work->sa_entry = sa_entry; sa_entry->work = work; return 0; + +free_work: + kfree(work); + return -ENOMEM; } static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) @@ -566,6 +669,14 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, if (err) goto err_hw_ctx; + if (x->props.mode == XFRM_MODE_TUNNEL && + x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings"); + err = -EINVAL; + goto err_add_rule; + } + /* We use *_bh() variant because xfrm_timer_handler(), which runs * in softirq context, can reach our state delete logic and we need * xa_erase_bh() there. @@ -580,6 +691,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, if (sa_entry->dwork) queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, MLX5_IPSEC_RESCHED); + + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && + x->props.mode == XFRM_MODE_TUNNEL) + xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, + MLX5E_IPSEC_TUNNEL_SA); + out: x->xso.offload_handle = (unsigned long)sa_entry; return 0; @@ -602,6 +719,7 @@ err_xfrm: static void mlx5e_xfrm_del_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5e_ipsec *ipsec = sa_entry->ipsec; struct mlx5e_ipsec_sa_entry *old; @@ -610,6 +728,12 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); WARN_ON(old != sa_entry); + + if (attrs->mode == XFRM_MODE_TUNNEL && + attrs->type == XFRM_DEV_OFFLOAD_PACKET) + /* Make sure that no ARP requests are running in parallel */ + flush_workqueue(ipsec->wq); + } static void mlx5e_xfrm_free_state(struct xfrm_state *x) @@ -634,6 +758,46 @@ sa_entry_free: kfree(sa_entry); } +static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs; + struct mlx5e_ipsec_netevent_data *data; + struct mlx5e_ipsec_sa_entry *sa_entry; + struct mlx5e_ipsec *ipsec; + struct neighbour *n = ptr; + struct net_device *netdev; + struct xfrm_state *x; + unsigned long idx; + + if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) + return NOTIFY_DONE; + + ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb); + xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { + attrs = &sa_entry->attrs; + + if (attrs->family == AF_INET) { + if (!neigh_key_eq32(n, &attrs->saddr.a4) && + !neigh_key_eq32(n, &attrs->daddr.a4)) + continue; + } else { + if (!neigh_key_eq128(n, &attrs->saddr.a4) && + !neigh_key_eq128(n, &attrs->daddr.a4)) + continue; + } + + x = sa_entry->x; + netdev = x->xso.real_dev; + data = sa_entry->work->data; + + neigh_ha_snapshot(data->addr, n, netdev); + queue_work(ipsec->wq, &sa_entry->work->work); + } + + return NOTIFY_DONE; +} + void mlx5e_ipsec_init(struct mlx5e_priv *priv) { struct mlx5e_ipsec *ipsec; @@ -662,6 +826,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) goto err_aso; } + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) { + ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event; + ret = register_netevent_notifier(&ipsec->netevent_nb); + if (ret) + goto clear_aso; + } + ret = mlx5e_accel_ipsec_fs_init(ipsec); if (ret) goto err_fs_init; @@ -672,6 +843,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) return; err_fs_init: + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + unregister_netevent_notifier(&ipsec->netevent_nb); +clear_aso: if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) mlx5e_ipsec_aso_cleanup(ipsec); err_aso: @@ -690,6 +864,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) return; mlx5e_accel_ipsec_fs_cleanup(ipsec); + if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + unregister_netevent_notifier(&ipsec->netevent_nb); if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) mlx5e_ipsec_aso_cleanup(ipsec); destroy_workqueue(ipsec->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 52890d7dce6b..f7f7c09d2b32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -77,7 +77,7 @@ struct mlx5_replay_esn { struct mlx5_accel_esp_xfrm_attrs { u32 spi; - u32 flags; + u32 mode; struct aes_gcm_keymat aes_gcm; union { @@ -99,6 +99,8 @@ struct mlx5_accel_esp_xfrm_attrs { u32 authsize; u32 reqid; struct mlx5_ipsec_lft lft; + u8 smac[ETH_ALEN]; + u8 dmac[ETH_ALEN]; }; enum mlx5_ipsec_cap { @@ -107,6 +109,7 @@ enum mlx5_ipsec_cap { MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2, MLX5_IPSEC_CAP_ROCE = 1 << 3, MLX5_IPSEC_CAP_PRIO = 1 << 4, + MLX5_IPSEC_CAP_TUNNEL = 1 << 5, }; struct mlx5e_priv; @@ -141,6 +144,10 @@ struct mlx5e_ipsec_work { void *data; }; +struct mlx5e_ipsec_netevent_data { + u8 addr[ETH_ALEN]; +}; + struct mlx5e_ipsec_dwork { struct delayed_work dwork; struct mlx5e_ipsec_sa_entry *sa_entry; @@ -166,6 +173,7 @@ struct mlx5e_ipsec { struct mlx5e_ipsec_tx *tx; struct mlx5e_ipsec_aso *aso; struct notifier_block nb; + struct notifier_block netevent_nb; struct mlx5_ipsec_fs *roce; }; @@ -243,6 +251,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry); void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry); void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry); +bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index b47794d4146e..5a8fcd30fcb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -4,12 +4,15 @@ #include <linux/netdevice.h> #include "en.h" #include "en/fs.h" +#include "eswitch.h" #include "ipsec.h" #include "fs_core.h" #include "lib/ipsec_fs_roce.h" #include "lib/fs_chains.h" #define NUM_IPSEC_FTE BIT(15) +#define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16 +#define IPSEC_TUNNEL_DEFAULT_TTL 0x40 struct mlx5e_ipsec_fc { struct mlx5_fc *cnt; @@ -36,6 +39,7 @@ struct mlx5e_ipsec_rx { struct mlx5e_ipsec_rule status; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; }; struct mlx5e_ipsec_tx { @@ -45,6 +49,7 @@ struct mlx5e_ipsec_tx { struct mlx5_flow_namespace *ns; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; + u8 allow_tunnel_mode : 1; }; /* IPsec RX flow steering */ @@ -118,7 +123,7 @@ static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio) static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, int level, int prio, - int max_num_groups) + int max_num_groups, u32 flags) { struct mlx5_flow_table_attr ft_attr = {}; @@ -127,6 +132,7 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, ft_attr.max_fte = NUM_IPSEC_FTE; ft_attr.level = level; ft_attr.prio = prio; + ft_attr.flags = flags; return mlx5_create_auto_grouped_flow_table(ns, &ft_attr); } @@ -251,7 +257,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_del_flow_rules(rx->sa.rule); mlx5_destroy_flow_group(rx->sa.group); mlx5_destroy_flow_table(rx->ft.sa); - + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); mlx5_del_flow_rules(rx->status.rule); mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); mlx5_destroy_flow_table(rx->ft.status); @@ -267,6 +274,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5_flow_destination default_dest; struct mlx5_flow_destination dest[2]; struct mlx5_flow_table *ft; + u32 flags = 0; int err; default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family)); @@ -277,7 +285,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, return err; ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, - MLX5E_NIC_PRIO, 1); + MLX5E_NIC_PRIO, 1, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; @@ -300,8 +308,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, goto err_add; /* Create FT */ - ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, - 2); + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) + rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + if (rx->allow_tunnel_mode) + flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, 2, + flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft; @@ -327,7 +339,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, } ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_NIC_PRIO, - 2); + 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; @@ -356,6 +368,8 @@ err_pol_ft: err_fs: mlx5_destroy_flow_table(rx->ft.sa); err_fs_ft: + if (rx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); mlx5_del_flow_rules(rx->status.rule); mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); err_add: @@ -490,7 +504,8 @@ err_rule: } /* IPsec TX flow steering */ -static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce) +static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, + struct mlx5_ipsec_fs *roce) { mlx5_ipsec_fs_roce_tx_destroy(roce); if (tx->chains) { @@ -502,6 +517,8 @@ static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce) } mlx5_destroy_flow_table(tx->ft.sa); + if (tx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); mlx5_del_flow_rules(tx->status.rule); mlx5_destroy_flow_table(tx->ft.status); } @@ -511,9 +528,10 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, { struct mlx5_flow_destination dest = {}; struct mlx5_flow_table *ft; + u32 flags = 0; int err; - ft = ipsec_ft_create(tx->ns, 2, 0, 1); + ft = ipsec_ft_create(tx->ns, 2, 0, 1, 0); if (IS_ERR(ft)) return PTR_ERR(ft); tx->ft.status = ft; @@ -522,7 +540,11 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, if (err) goto err_status_rule; - ft = ipsec_ft_create(tx->ns, 1, 0, 4); + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) + tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + if (tx->allow_tunnel_mode) + flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; + ft = ipsec_ft_create(tx->ns, 1, 0, 4, flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_sa_ft; @@ -541,7 +563,7 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, goto connect_roce; } - ft = ipsec_ft_create(tx->ns, 0, 0, 2); + ft = ipsec_ft_create(tx->ns, 0, 0, 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; @@ -572,6 +594,8 @@ err_roce: err_pol_ft: mlx5_destroy_flow_table(tx->ft.sa); err_sa_ft: + if (tx->allow_tunnel_mode) + mlx5_eswitch_unblock_encap(mdev); mlx5_del_flow_rules(tx->status.rule); err_status_rule: mlx5_destroy_flow_table(tx->ft.status); @@ -600,7 +624,7 @@ static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx) if (--tx->ft.refcnt) return; - tx_destroy(tx, ipsec->roce); + tx_destroy(ipsec->mdev, tx, ipsec->roce); } static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev, @@ -829,40 +853,181 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir, return 0; } +static int +setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, + struct mlx5_accel_esp_xfrm_attrs *attrs, + struct mlx5_pkt_reformat_params *reformat_params) +{ + struct ip_esp_hdr *esp_hdr; + struct ipv6hdr *ipv6hdr; + struct ethhdr *eth_hdr; + struct iphdr *iphdr; + char *reformatbf; + size_t bfflen; + void *hdr; + + bfflen = sizeof(*eth_hdr); + + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { + bfflen += sizeof(*esp_hdr) + 8; + + switch (attrs->family) { + case AF_INET: + bfflen += sizeof(*iphdr); + break; + case AF_INET6: + bfflen += sizeof(*ipv6hdr); + break; + default: + return -EINVAL; + } + } + + reformatbf = kzalloc(bfflen, GFP_KERNEL); + if (!reformatbf) + return -ENOMEM; + + eth_hdr = (struct ethhdr *)reformatbf; + switch (attrs->family) { + case AF_INET: + eth_hdr->h_proto = htons(ETH_P_IP); + break; + case AF_INET6: + eth_hdr->h_proto = htons(ETH_P_IPV6); + break; + default: + goto free_reformatbf; + } + + ether_addr_copy(eth_hdr->h_dest, attrs->dmac); + ether_addr_copy(eth_hdr->h_source, attrs->smac); + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + reformat_params->type = MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2; + break; + case XFRM_DEV_OFFLOAD_OUT: + reformat_params->type = MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL; + reformat_params->param_0 = attrs->authsize; + + hdr = reformatbf + sizeof(*eth_hdr); + switch (attrs->family) { + case AF_INET: + iphdr = (struct iphdr *)hdr; + memcpy(&iphdr->saddr, &attrs->saddr.a4, 4); + memcpy(&iphdr->daddr, &attrs->daddr.a4, 4); + iphdr->version = 4; + iphdr->ihl = 5; + iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL; + iphdr->protocol = IPPROTO_ESP; + hdr += sizeof(*iphdr); + break; + case AF_INET6: + ipv6hdr = (struct ipv6hdr *)hdr; + memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16); + memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16); + ipv6hdr->nexthdr = IPPROTO_ESP; + ipv6hdr->version = 6; + ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL; + hdr += sizeof(*ipv6hdr); + break; + default: + goto free_reformatbf; + } + + esp_hdr = (struct ip_esp_hdr *)hdr; + esp_hdr->spi = htonl(attrs->spi); + break; + default: + goto free_reformatbf; + } + + reformat_params->size = bfflen; + reformat_params->data = reformatbf; + return 0; + +free_reformatbf: + kfree(reformatbf); + return -EINVAL; +} + +static int +setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs, + struct mlx5_pkt_reformat_params *reformat_params) +{ + u8 *reformatbf; + __be32 spi; + + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: + reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; + break; + case XFRM_DEV_OFFLOAD_OUT: + if (attrs->family == AF_INET) + reformat_params->type = + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; + else + reformat_params->type = + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6; + + reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE, + GFP_KERNEL); + if (!reformatbf) + return -ENOMEM; + + /* convert to network format */ + spi = htonl(attrs->spi); + memcpy(reformatbf, &spi, sizeof(spi)); + + reformat_params->param_0 = attrs->authsize; + reformat_params->size = + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE; + reformat_params->data = reformatbf; + break; + default: + return -EINVAL; + } + + return 0; +} + static int setup_pkt_reformat(struct mlx5_core_dev *mdev, struct mlx5_accel_esp_xfrm_attrs *attrs, struct mlx5_flow_act *flow_act) { - enum mlx5_flow_namespace_type ns_type = MLX5_FLOW_NAMESPACE_EGRESS; struct mlx5_pkt_reformat_params reformat_params = {}; struct mlx5_pkt_reformat *pkt_reformat; - u8 reformatbf[16] = {}; - __be32 spi; + enum mlx5_flow_namespace_type ns_type; + int ret; - if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { - reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; + switch (attrs->dir) { + case XFRM_DEV_OFFLOAD_IN: ns_type = MLX5_FLOW_NAMESPACE_KERNEL; - goto cmd; + break; + case XFRM_DEV_OFFLOAD_OUT: + ns_type = MLX5_FLOW_NAMESPACE_EGRESS; + break; + default: + return -EINVAL; } - if (attrs->family == AF_INET) - reformat_params.type = - MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; - else - reformat_params.type = - MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6; - - /* convert to network format */ - spi = htonl(attrs->spi); - memcpy(reformatbf, &spi, 4); + switch (attrs->mode) { + case XFRM_MODE_TRANSPORT: + ret = setup_pkt_transport_reformat(attrs, &reformat_params); + break; + case XFRM_MODE_TUNNEL: + ret = setup_pkt_tunnel_reformat(mdev, attrs, &reformat_params); + break; + default: + ret = -EINVAL; + } - reformat_params.param_0 = attrs->authsize; - reformat_params.size = sizeof(reformatbf); - reformat_params.data = &reformatbf; + if (ret) + return ret; -cmd: pkt_reformat = mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type); + kfree(reformat_params.data); if (IS_ERR(pkt_reformat)) return PTR_ERR(pkt_reformat); @@ -1453,3 +1618,15 @@ void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5e_accel_ipsec_fs_del_rule(sa_entry); memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry)); } + +bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec_rx *rx = + ipsec_rx(sa_entry->ipsec, sa_entry->attrs.family); + struct mlx5e_ipsec_tx *tx = sa_entry->ipsec->tx; + + if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) + return tx->allow_tunnel_mode; + + return rx->allow_tunnel_mode; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 5fddb86bb35e..df90e19066bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -48,6 +48,12 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) caps |= MLX5_IPSEC_CAP_PRIO; + + if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, + reformat_l2_to_l3_esp_tunnel) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + reformat_l3_esp_tunnel_to_l2)) + caps |= MLX5_IPSEC_CAP_TUNNEL; } if (mlx5_get_roce_state(mdev) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 19e9a77c4633..e9d68fdf68f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -263,6 +263,7 @@ struct mlx5_esw_offload { const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; atomic64_t num_flows; + u64 num_block_encap; enum devlink_eswitch_encap_mode encap; struct ida vport_metadata_ida; unsigned int host_number; /* ECPF supports one external host */ @@ -748,6 +749,9 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev); +void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev); + static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) { if (mlx5_esw_allowed(esw)) @@ -761,6 +765,7 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw) { return esw->fdb_table.offloads.slow_fdb; } + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -805,6 +810,15 @@ mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) { return 0; } + +static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +{ + return true; +} + +static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) +{ +} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 48036dfddd5e..b6e2709c1371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3586,6 +3586,47 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return err; } +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_eswitch *esw; + + devl_lock(devlink); + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) { + devl_unlock(devlink); + /* Failure means no eswitch => not possible to change encap */ + return true; + } + + down_write(&esw->mode_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + up_write(&esw->mode_lock); + devl_unlock(devlink); + return false; + } + + esw->offloads.num_block_encap++; + up_write(&esw->mode_lock); + devl_unlock(devlink); + return true; +} + +void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return; + + down_write(&esw->mode_lock); + esw->offloads.num_block_encap--; + up_write(&esw->mode_lock); +} + int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) @@ -3627,6 +3668,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, goto unlock; } + if (esw->offloads.num_block_encap) { + NL_SET_ERR_MSG_MOD(extack, + "Can't set encapsulation when IPsec SA and/or policies are configured"); + err = -EOPNOTSUPP; + goto unlock; + } + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6c84bf6eec85..20d00e09b168 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -463,9 +463,11 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 max_ft_level[0x8]; u8 reformat_add_esp_trasport[0x1]; - u8 reserved_at_41[0x2]; + u8 reformat_l2_to_l3_esp_tunnel[0x1]; + u8 reserved_at_42[0x1]; u8 reformat_del_esp_trasport[0x1]; - u8 reserved_at_44[0x2]; + u8 reformat_l3_esp_tunnel_to_l2[0x1]; + u8 reserved_at_45[0x1]; u8 execute_aso[0x1]; u8 reserved_at_47[0x19]; @@ -6630,7 +6632,9 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3, MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5, + MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6, MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8, + MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9, MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, |