summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-04-17 18:55:27 -0700
committerJakub Kicinski <kuba@kernel.org>2023-04-17 18:55:27 -0700
commit6c829efed5e8163dc13fa0d90c1e535e212fcc25 (patch)
tree9c28bc5d692a8f2f2b0c987dfec650c2a8bfa0ee
parent99676a5766412f3936c55b9d18565d248e5463ee (diff)
parentc941da23aaf065a893248943cedb27b0f492b292 (diff)
Merge branch 'support-tunnel-mode-in-mlx5-ipsec-packet-offload'
Leon Romanovsky says: ==================== Support tunnel mode in mlx5 IPsec packet offload This series extends mlx5 to support tunnel mode in its IPsec packet offload implementation. v0: https://lore.kernel.org/all/cover.1681106636.git.leonro@nvidia.com ==================== Link: https://lore.kernel.org/r/cover.1681388425.git.leonro@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c202
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c239
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c48
-rw-r--r--include/linux/mlx5/mlx5_ifc.h8
7 files changed, 481 insertions, 47 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index def01bfde610..5fd609d1120e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -35,12 +35,14 @@
#include <crypto/aead.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
+#include <net/netevent.h>
#include "en.h"
#include "ipsec.h"
#include "ipsec_rxtx.h"
#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
+#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
{
@@ -242,6 +244,57 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->lft.numb_rounds_soft = (u64)n;
}
+static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
+ struct xfrm_state *x = sa_entry->x;
+ struct net_device *netdev;
+ struct neighbour *n;
+ u8 addr[ETH_ALEN];
+
+ if (attrs->mode != XFRM_MODE_TUNNEL ||
+ attrs->type != XFRM_DEV_OFFLOAD_PACKET)
+ return;
+
+ netdev = x->xso.real_dev;
+
+ mlx5_query_mac_address(mdev, addr);
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ ether_addr_copy(attrs->dmac, addr);
+ n = neigh_lookup(&arp_tbl, &attrs->saddr.a4, netdev);
+ if (!n) {
+ n = neigh_create(&arp_tbl, &attrs->saddr.a4, netdev);
+ if (IS_ERR(n))
+ return;
+ neigh_event_send(n, NULL);
+ attrs->drop = true;
+ break;
+ }
+ neigh_ha_snapshot(addr, n, netdev);
+ ether_addr_copy(attrs->smac, addr);
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ ether_addr_copy(attrs->smac, addr);
+ n = neigh_lookup(&arp_tbl, &attrs->daddr.a4, netdev);
+ if (!n) {
+ n = neigh_create(&arp_tbl, &attrs->daddr.a4, netdev);
+ if (IS_ERR(n))
+ return;
+ neigh_event_send(n, NULL);
+ attrs->drop = true;
+ break;
+ }
+ neigh_ha_snapshot(addr, n, netdev);
+ ether_addr_copy(attrs->dmac, addr);
+ break;
+ default:
+ return;
+ }
+ neigh_release(n);
+}
+
void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_accel_esp_xfrm_attrs *attrs)
{
@@ -297,8 +350,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
attrs->upspec.sport = ntohs(x->sel.sport);
attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
attrs->upspec.proto = x->sel.proto;
+ attrs->mode = x->props.mode;
mlx5e_ipsec_init_limits(sa_entry, attrs);
+ mlx5e_ipsec_init_macs(sa_entry, attrs);
}
static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
@@ -367,6 +422,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
+ if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
+ NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
+ return -EINVAL;
+ }
+
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
@@ -374,11 +434,6 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
- if (x->props.mode != XFRM_MODE_TRANSPORT &&
- x->props.mode != XFRM_MODE_TUNNEL) {
- NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
- return -EINVAL;
- }
break;
case XFRM_DEV_OFFLOAD_PACKET:
if (!(mlx5_ipsec_device_caps(mdev) &
@@ -387,8 +442,9 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
return -EINVAL;
}
- if (x->props.mode != XFRM_MODE_TRANSPORT) {
- NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode");
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
+ NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
return -EINVAL;
}
@@ -458,34 +514,81 @@ static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
}
+static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
+{
+ struct mlx5e_ipsec_work *work =
+ container_of(_work, struct mlx5e_ipsec_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
+ struct mlx5e_ipsec_netevent_data *data = work->data;
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+
+ attrs = &sa_entry->attrs;
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ ether_addr_copy(attrs->smac, data->addr);
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ ether_addr_copy(attrs->dmac, data->addr);
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ }
+ attrs->drop = false;
+ mlx5e_accel_ipsec_fs_modify(sa_entry);
+}
+
static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
{
struct xfrm_state *x = sa_entry->x;
struct mlx5e_ipsec_work *work;
+ void *data = NULL;
switch (x->xso.type) {
case XFRM_DEV_OFFLOAD_CRYPTO:
if (!(x->props.flags & XFRM_STATE_ESN))
return 0;
break;
+ case XFRM_DEV_OFFLOAD_PACKET:
+ if (x->props.mode != XFRM_MODE_TUNNEL)
+ return 0;
+ break;
default:
- return 0;
+ break;
}
work = kzalloc(sizeof(*work), GFP_KERNEL);
if (!work)
return -ENOMEM;
- work->data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
- if (!work->data) {
- kfree(work);
- return -ENOMEM;
+ switch (x->xso.type) {
+ case XFRM_DEV_OFFLOAD_CRYPTO:
+ data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ if (!data)
+ goto free_work;
+
+ INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
+ break;
+ case XFRM_DEV_OFFLOAD_PACKET:
+ data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
+ GFP_KERNEL);
+ if (!data)
+ goto free_work;
+
+ INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
+ break;
+ default:
+ break;
}
- INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
+ work->data = data;
work->sa_entry = sa_entry;
sa_entry->work = work;
return 0;
+
+free_work:
+ kfree(work);
+ return -ENOMEM;
}
static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
@@ -566,6 +669,14 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
if (err)
goto err_hw_ctx;
+ if (x->props.mode == XFRM_MODE_TUNNEL &&
+ x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
+ !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
+ err = -EINVAL;
+ goto err_add_rule;
+ }
+
/* We use *_bh() variant because xfrm_timer_handler(), which runs
* in softirq context, can reach our state delete logic and we need
* xa_erase_bh() there.
@@ -580,6 +691,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x,
if (sa_entry->dwork)
queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
MLX5_IPSEC_RESCHED);
+
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
+ x->props.mode == XFRM_MODE_TUNNEL)
+ xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
+ MLX5E_IPSEC_TUNNEL_SA);
+
out:
x->xso.offload_handle = (unsigned long)sa_entry;
return 0;
@@ -602,6 +719,7 @@ err_xfrm:
static void mlx5e_xfrm_del_state(struct xfrm_state *x)
{
struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
struct mlx5e_ipsec_sa_entry *old;
@@ -610,6 +728,12 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
WARN_ON(old != sa_entry);
+
+ if (attrs->mode == XFRM_MODE_TUNNEL &&
+ attrs->type == XFRM_DEV_OFFLOAD_PACKET)
+ /* Make sure that no ARP requests are running in parallel */
+ flush_workqueue(ipsec->wq);
+
}
static void mlx5e_xfrm_free_state(struct xfrm_state *x)
@@ -634,6 +758,46 @@ sa_entry_free:
kfree(sa_entry);
}
+static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5_accel_esp_xfrm_attrs *attrs;
+ struct mlx5e_ipsec_netevent_data *data;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct mlx5e_ipsec *ipsec;
+ struct neighbour *n = ptr;
+ struct net_device *netdev;
+ struct xfrm_state *x;
+ unsigned long idx;
+
+ if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
+ return NOTIFY_DONE;
+
+ ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
+ xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
+ attrs = &sa_entry->attrs;
+
+ if (attrs->family == AF_INET) {
+ if (!neigh_key_eq32(n, &attrs->saddr.a4) &&
+ !neigh_key_eq32(n, &attrs->daddr.a4))
+ continue;
+ } else {
+ if (!neigh_key_eq128(n, &attrs->saddr.a4) &&
+ !neigh_key_eq128(n, &attrs->daddr.a4))
+ continue;
+ }
+
+ x = sa_entry->x;
+ netdev = x->xso.real_dev;
+ data = sa_entry->work->data;
+
+ neigh_ha_snapshot(data->addr, n, netdev);
+ queue_work(ipsec->wq, &sa_entry->work->work);
+ }
+
+ return NOTIFY_DONE;
+}
+
void mlx5e_ipsec_init(struct mlx5e_priv *priv)
{
struct mlx5e_ipsec *ipsec;
@@ -662,6 +826,13 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
goto err_aso;
}
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
+ ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
+ ret = register_netevent_notifier(&ipsec->netevent_nb);
+ if (ret)
+ goto clear_aso;
+ }
+
ret = mlx5e_accel_ipsec_fs_init(ipsec);
if (ret)
goto err_fs_init;
@@ -672,6 +843,9 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
return;
err_fs_init:
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ unregister_netevent_notifier(&ipsec->netevent_nb);
+clear_aso:
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
err_aso:
@@ -690,6 +864,8 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
return;
mlx5e_accel_ipsec_fs_cleanup(ipsec);
+ if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ unregister_netevent_notifier(&ipsec->netevent_nb);
if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
mlx5e_ipsec_aso_cleanup(ipsec);
destroy_workqueue(ipsec->wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 52890d7dce6b..f7f7c09d2b32 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -77,7 +77,7 @@ struct mlx5_replay_esn {
struct mlx5_accel_esp_xfrm_attrs {
u32 spi;
- u32 flags;
+ u32 mode;
struct aes_gcm_keymat aes_gcm;
union {
@@ -99,6 +99,8 @@ struct mlx5_accel_esp_xfrm_attrs {
u32 authsize;
u32 reqid;
struct mlx5_ipsec_lft lft;
+ u8 smac[ETH_ALEN];
+ u8 dmac[ETH_ALEN];
};
enum mlx5_ipsec_cap {
@@ -107,6 +109,7 @@ enum mlx5_ipsec_cap {
MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2,
MLX5_IPSEC_CAP_ROCE = 1 << 3,
MLX5_IPSEC_CAP_PRIO = 1 << 4,
+ MLX5_IPSEC_CAP_TUNNEL = 1 << 5,
};
struct mlx5e_priv;
@@ -141,6 +144,10 @@ struct mlx5e_ipsec_work {
void *data;
};
+struct mlx5e_ipsec_netevent_data {
+ u8 addr[ETH_ALEN];
+};
+
struct mlx5e_ipsec_dwork {
struct delayed_work dwork;
struct mlx5e_ipsec_sa_entry *sa_entry;
@@ -166,6 +173,7 @@ struct mlx5e_ipsec {
struct mlx5e_ipsec_tx *tx;
struct mlx5e_ipsec_aso *aso;
struct notifier_block nb;
+ struct notifier_block netevent_nb;
struct mlx5_ipsec_fs *roce;
};
@@ -243,6 +251,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry);
void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry);
+bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index b47794d4146e..5a8fcd30fcb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -4,12 +4,15 @@
#include <linux/netdevice.h>
#include "en.h"
#include "en/fs.h"
+#include "eswitch.h"
#include "ipsec.h"
#include "fs_core.h"
#include "lib/ipsec_fs_roce.h"
#include "lib/fs_chains.h"
#define NUM_IPSEC_FTE BIT(15)
+#define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16
+#define IPSEC_TUNNEL_DEFAULT_TTL 0x40
struct mlx5e_ipsec_fc {
struct mlx5_fc *cnt;
@@ -36,6 +39,7 @@ struct mlx5e_ipsec_rx {
struct mlx5e_ipsec_rule status;
struct mlx5e_ipsec_fc *fc;
struct mlx5_fs_chains *chains;
+ u8 allow_tunnel_mode : 1;
};
struct mlx5e_ipsec_tx {
@@ -45,6 +49,7 @@ struct mlx5e_ipsec_tx {
struct mlx5_flow_namespace *ns;
struct mlx5e_ipsec_fc *fc;
struct mlx5_fs_chains *chains;
+ u8 allow_tunnel_mode : 1;
};
/* IPsec RX flow steering */
@@ -118,7 +123,7 @@ static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio)
static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
int level, int prio,
- int max_num_groups)
+ int max_num_groups, u32 flags)
{
struct mlx5_flow_table_attr ft_attr = {};
@@ -127,6 +132,7 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns,
ft_attr.max_fte = NUM_IPSEC_FTE;
ft_attr.level = level;
ft_attr.prio = prio;
+ ft_attr.flags = flags;
return mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
}
@@ -251,7 +257,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
mlx5_del_flow_rules(rx->sa.rule);
mlx5_destroy_flow_group(rx->sa.group);
mlx5_destroy_flow_table(rx->ft.sa);
-
+ if (rx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(rx->status.rule);
mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
mlx5_destroy_flow_table(rx->ft.status);
@@ -267,6 +274,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
struct mlx5_flow_destination default_dest;
struct mlx5_flow_destination dest[2];
struct mlx5_flow_table *ft;
+ u32 flags = 0;
int err;
default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family));
@@ -277,7 +285,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
return err;
ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
- MLX5E_NIC_PRIO, 1);
+ MLX5E_NIC_PRIO, 1, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_fs_ft_status;
@@ -300,8 +308,12 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
goto err_add;
/* Create FT */
- ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO,
- 2);
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
+ if (rx->allow_tunnel_mode)
+ flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_NIC_PRIO, 2,
+ flags);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_fs_ft;
@@ -327,7 +339,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
}
ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_NIC_PRIO,
- 2);
+ 2, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_pol_ft;
@@ -356,6 +368,8 @@ err_pol_ft:
err_fs:
mlx5_destroy_flow_table(rx->ft.sa);
err_fs_ft:
+ if (rx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(rx->status.rule);
mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
err_add:
@@ -490,7 +504,8 @@ err_rule:
}
/* IPsec TX flow steering */
-static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce)
+static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
+ struct mlx5_ipsec_fs *roce)
{
mlx5_ipsec_fs_roce_tx_destroy(roce);
if (tx->chains) {
@@ -502,6 +517,8 @@ static void tx_destroy(struct mlx5e_ipsec_tx *tx, struct mlx5_ipsec_fs *roce)
}
mlx5_destroy_flow_table(tx->ft.sa);
+ if (tx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(tx->status.rule);
mlx5_destroy_flow_table(tx->ft.status);
}
@@ -511,9 +528,10 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft;
+ u32 flags = 0;
int err;
- ft = ipsec_ft_create(tx->ns, 2, 0, 1);
+ ft = ipsec_ft_create(tx->ns, 2, 0, 1, 0);
if (IS_ERR(ft))
return PTR_ERR(ft);
tx->ft.status = ft;
@@ -522,7 +540,11 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
if (err)
goto err_status_rule;
- ft = ipsec_ft_create(tx->ns, 1, 0, 4);
+ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)
+ tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev);
+ if (tx->allow_tunnel_mode)
+ flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ ft = ipsec_ft_create(tx->ns, 1, 0, 4, flags);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_sa_ft;
@@ -541,7 +563,7 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
goto connect_roce;
}
- ft = ipsec_ft_create(tx->ns, 0, 0, 2);
+ ft = ipsec_ft_create(tx->ns, 0, 0, 2, 0);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
goto err_pol_ft;
@@ -572,6 +594,8 @@ err_roce:
err_pol_ft:
mlx5_destroy_flow_table(tx->ft.sa);
err_sa_ft:
+ if (tx->allow_tunnel_mode)
+ mlx5_eswitch_unblock_encap(mdev);
mlx5_del_flow_rules(tx->status.rule);
err_status_rule:
mlx5_destroy_flow_table(tx->ft.status);
@@ -600,7 +624,7 @@ static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
if (--tx->ft.refcnt)
return;
- tx_destroy(tx, ipsec->roce);
+ tx_destroy(ipsec->mdev, tx, ipsec->roce);
}
static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev,
@@ -829,40 +853,181 @@ static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir,
return 0;
}
+static int
+setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm_attrs *attrs,
+ struct mlx5_pkt_reformat_params *reformat_params)
+{
+ struct ip_esp_hdr *esp_hdr;
+ struct ipv6hdr *ipv6hdr;
+ struct ethhdr *eth_hdr;
+ struct iphdr *iphdr;
+ char *reformatbf;
+ size_t bfflen;
+ void *hdr;
+
+ bfflen = sizeof(*eth_hdr);
+
+ if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) {
+ bfflen += sizeof(*esp_hdr) + 8;
+
+ switch (attrs->family) {
+ case AF_INET:
+ bfflen += sizeof(*iphdr);
+ break;
+ case AF_INET6:
+ bfflen += sizeof(*ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ reformatbf = kzalloc(bfflen, GFP_KERNEL);
+ if (!reformatbf)
+ return -ENOMEM;
+
+ eth_hdr = (struct ethhdr *)reformatbf;
+ switch (attrs->family) {
+ case AF_INET:
+ eth_hdr->h_proto = htons(ETH_P_IP);
+ break;
+ case AF_INET6:
+ eth_hdr->h_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ ether_addr_copy(eth_hdr->h_dest, attrs->dmac);
+ ether_addr_copy(eth_hdr->h_source, attrs->smac);
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ reformat_params->type = MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ reformat_params->type = MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL;
+ reformat_params->param_0 = attrs->authsize;
+
+ hdr = reformatbf + sizeof(*eth_hdr);
+ switch (attrs->family) {
+ case AF_INET:
+ iphdr = (struct iphdr *)hdr;
+ memcpy(&iphdr->saddr, &attrs->saddr.a4, 4);
+ memcpy(&iphdr->daddr, &attrs->daddr.a4, 4);
+ iphdr->version = 4;
+ iphdr->ihl = 5;
+ iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL;
+ iphdr->protocol = IPPROTO_ESP;
+ hdr += sizeof(*iphdr);
+ break;
+ case AF_INET6:
+ ipv6hdr = (struct ipv6hdr *)hdr;
+ memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16);
+ memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16);
+ ipv6hdr->nexthdr = IPPROTO_ESP;
+ ipv6hdr->version = 6;
+ ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL;
+ hdr += sizeof(*ipv6hdr);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ esp_hdr = (struct ip_esp_hdr *)hdr;
+ esp_hdr->spi = htonl(attrs->spi);
+ break;
+ default:
+ goto free_reformatbf;
+ }
+
+ reformat_params->size = bfflen;
+ reformat_params->data = reformatbf;
+ return 0;
+
+free_reformatbf:
+ kfree(reformatbf);
+ return -EINVAL;
+}
+
+static int
+setup_pkt_transport_reformat(struct mlx5_accel_esp_xfrm_attrs *attrs,
+ struct mlx5_pkt_reformat_params *reformat_params)
+{
+ u8 *reformatbf;
+ __be32 spi;
+
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
+ reformat_params->type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ if (attrs->family == AF_INET)
+ reformat_params->type =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
+ else
+ reformat_params->type =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
+
+ reformatbf = kzalloc(MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE,
+ GFP_KERNEL);
+ if (!reformatbf)
+ return -ENOMEM;
+
+ /* convert to network format */
+ spi = htonl(attrs->spi);
+ memcpy(reformatbf, &spi, sizeof(spi));
+
+ reformat_params->param_0 = attrs->authsize;
+ reformat_params->size =
+ MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE;
+ reformat_params->data = reformatbf;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int setup_pkt_reformat(struct mlx5_core_dev *mdev,
struct mlx5_accel_esp_xfrm_attrs *attrs,
struct mlx5_flow_act *flow_act)
{
- enum mlx5_flow_namespace_type ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
struct mlx5_pkt_reformat_params reformat_params = {};
struct mlx5_pkt_reformat *pkt_reformat;
- u8 reformatbf[16] = {};
- __be32 spi;
+ enum mlx5_flow_namespace_type ns_type;
+ int ret;
- if (attrs->dir == XFRM_DEV_OFFLOAD_IN) {
- reformat_params.type = MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT;
+ switch (attrs->dir) {
+ case XFRM_DEV_OFFLOAD_IN:
ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
- goto cmd;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ ns_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ default:
+ return -EINVAL;
}
- if (attrs->family == AF_INET)
- reformat_params.type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4;
- else
- reformat_params.type =
- MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6;
-
- /* convert to network format */
- spi = htonl(attrs->spi);
- memcpy(reformatbf, &spi, 4);
+ switch (attrs->mode) {
+ case XFRM_MODE_TRANSPORT:
+ ret = setup_pkt_transport_reformat(attrs, &reformat_params);
+ break;
+ case XFRM_MODE_TUNNEL:
+ ret = setup_pkt_tunnel_reformat(mdev, attrs, &reformat_params);
+ break;
+ default:
+ ret = -EINVAL;
+ }
- reformat_params.param_0 = attrs->authsize;
- reformat_params.size = sizeof(reformatbf);
- reformat_params.data = &reformatbf;
+ if (ret)
+ return ret;
-cmd:
pkt_reformat =
mlx5_packet_reformat_alloc(mdev, &reformat_params, ns_type);
+ kfree(reformat_params.data);
if (IS_ERR(pkt_reformat))
return PTR_ERR(pkt_reformat);
@@ -1453,3 +1618,15 @@ void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry)
mlx5e_accel_ipsec_fs_del_rule(sa_entry);
memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry));
}
+
+bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec_rx *rx =
+ ipsec_rx(sa_entry->ipsec, sa_entry->attrs.family);
+ struct mlx5e_ipsec_tx *tx = sa_entry->ipsec->tx;
+
+ if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT)
+ return tx->allow_tunnel_mode;
+
+ return rx->allow_tunnel_mode;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index 5fddb86bb35e..df90e19066bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -48,6 +48,12 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level))
caps |= MLX5_IPSEC_CAP_PRIO;
+
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
+ reformat_l2_to_l3_esp_tunnel) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ reformat_l3_esp_tunnel_to_l2))
+ caps |= MLX5_IPSEC_CAP_TUNNEL;
}
if (mlx5_get_roce_state(mdev) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 19e9a77c4633..e9d68fdf68f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -263,6 +263,7 @@ struct mlx5_esw_offload {
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
u8 inline_mode;
atomic64_t num_flows;
+ u64 num_block_encap;
enum devlink_eswitch_encap_mode encap;
struct ida vport_metadata_ida;
unsigned int host_number; /* ECPF supports one external host */
@@ -748,6 +749,9 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
struct mlx5_eswitch *slave_esw);
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
+void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
+
static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
{
if (mlx5_esw_allowed(esw))
@@ -761,6 +765,7 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
{
return esw->fdb_table.offloads.slow_fdb;
}
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -805,6 +810,15 @@ mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
{
return 0;
}
+
+static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
+{
+ return true;
+}
+
+static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
+{
+}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 48036dfddd5e..b6e2709c1371 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3586,6 +3586,47 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
return err;
}
+bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_eswitch *esw;
+
+ devl_lock(devlink);
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw)) {
+ devl_unlock(devlink);
+ /* Failure means no eswitch => not possible to change encap */
+ return true;
+ }
+
+ down_write(&esw->mode_lock);
+ if (esw->mode != MLX5_ESWITCH_LEGACY &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ up_write(&esw->mode_lock);
+ devl_unlock(devlink);
+ return false;
+ }
+
+ esw->offloads.num_block_encap++;
+ up_write(&esw->mode_lock);
+ devl_unlock(devlink);
+ return true;
+}
+
+void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_eswitch *esw;
+
+ esw = mlx5_devlink_eswitch_get(devlink);
+ if (IS_ERR(esw))
+ return;
+
+ down_write(&esw->mode_lock);
+ esw->offloads.num_block_encap--;
+ up_write(&esw->mode_lock);
+}
+
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
enum devlink_eswitch_encap_mode encap,
struct netlink_ext_ack *extack)
@@ -3627,6 +3668,13 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
goto unlock;
}
+ if (esw->offloads.num_block_encap) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set encapsulation when IPsec SA and/or policies are configured");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
esw_destroy_offloads_fdb_tables(esw);
esw->offloads.encap = encap;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6c84bf6eec85..20d00e09b168 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -463,9 +463,11 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 max_ft_level[0x8];
u8 reformat_add_esp_trasport[0x1];
- u8 reserved_at_41[0x2];
+ u8 reformat_l2_to_l3_esp_tunnel[0x1];
+ u8 reserved_at_42[0x1];
u8 reformat_del_esp_trasport[0x1];
- u8 reserved_at_44[0x2];
+ u8 reformat_l3_esp_tunnel_to_l2[0x1];
+ u8 reserved_at_45[0x1];
u8 execute_aso[0x1];
u8 reserved_at_47[0x19];
@@ -6630,7 +6632,9 @@ enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5,
+ MLX5_REFORMAT_TYPE_L2_TO_L3_ESP_TUNNEL = 0x6,
MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8,
+ MLX5_REFORMAT_TYPE_L3_ESP_TUNNEL_TO_L2 = 0x9,
MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb,
MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,