summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorHeng Qi <hengqi@linux.alibaba.com>2024-06-21 18:13:51 +0800
committerJakub Kicinski <kuba@kernel.org>2024-06-25 17:15:06 -0700
commitf750dfe825b904164688adeb147950e0e0c4d262 (patch)
tree3ca12a3a1b3b7cc0dc6370a4b2eac33bd1b00859 /lib
parentb65e697a7c9e0ae28a6255257d8b9b3271960426 (diff)
ethtool: provide customized dim profile management
The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi <hengqi@linux.alibaba.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/dim/net_dim.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
index 67d5beb34dc3..0cd41277c7a3 100644
--- a/lib/dim/net_dim.c
+++ b/lib/dim/net_dim.c
@@ -4,6 +4,7 @@
*/
#include <linux/dim.h>
+#include <linux/rtnetlink.h>
/*
* Net DIM profiles:
@@ -95,6 +96,75 @@ net_dim_get_def_tx_moderation(u8 cq_period_mode)
}
EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
+int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
+ u8 coal_flags, u8 rx_mode, u8 tx_mode,
+ void (*rx_dim_work)(struct work_struct *work),
+ void (*tx_dim_work)(struct work_struct *work))
+{
+ struct dim_cq_moder *rxp = NULL, *txp;
+ struct dim_irq_moder *moder;
+ int len;
+
+ dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL);
+ if (!dev->irq_moder)
+ return -ENOMEM;
+
+ moder = dev->irq_moder;
+ len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile);
+
+ moder->coal_flags = coal_flags;
+ moder->profile_flags = profile_flags;
+
+ if (profile_flags & DIM_PROFILE_RX) {
+ moder->rx_dim_work = rx_dim_work;
+ moder->dim_rx_mode = rx_mode;
+ rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL);
+ if (!rxp)
+ goto free_moder;
+
+ rcu_assign_pointer(moder->rx_profile, rxp);
+ }
+
+ if (profile_flags & DIM_PROFILE_TX) {
+ moder->tx_dim_work = tx_dim_work;
+ moder->dim_tx_mode = tx_mode;
+ txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL);
+ if (!txp)
+ goto free_rxp;
+
+ rcu_assign_pointer(moder->tx_profile, txp);
+ }
+
+ return 0;
+
+free_rxp:
+ kfree(rxp);
+free_moder:
+ kfree(moder);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(net_dim_init_irq_moder);
+
+/* RTNL lock is held. */
+void net_dim_free_irq_moder(struct net_device *dev)
+{
+ struct dim_cq_moder *rxp, *txp;
+
+ if (!dev->irq_moder)
+ return;
+
+ rxp = rtnl_dereference(dev->irq_moder->rx_profile);
+ txp = rtnl_dereference(dev->irq_moder->tx_profile);
+
+ rcu_assign_pointer(dev->irq_moder->rx_profile, NULL);
+ rcu_assign_pointer(dev->irq_moder->tx_profile, NULL);
+
+ kfree_rcu(rxp, rcu);
+ kfree_rcu(txp, rcu);
+ kfree(dev->irq_moder);
+}
+EXPORT_SYMBOL(net_dim_free_irq_moder);
+
static int net_dim_step(struct dim *dim)
{
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))