summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/sfc/siena
diff options
context:
space:
mode:
authorMartin Habets <martinh@xilinx.com>2022-05-09 16:31:43 +0100
committerJakub Kicinski <kuba@kernel.org>2022-05-10 15:38:14 -0700
commitd48523cb88e0703055c1b33e61eb644a7976f92b (patch)
tree2c7a176c21ecf2d6305bd1ef8581475c0ea7c228 /drivers/net/ethernet/sfc/siena
parent6e173d3b4af9e8804ebdbdb7a4afd7ed8f96220b (diff)
sfc: Copy shared files needed for Siena (part 2)
These are the files starting with m through w. No changes are done, those will be done with subsequent commits. Signed-off-by: Martin Habets <habetsm.xilinx@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/sfc/siena')
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi.c2375
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi.h388
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi_mon.c531
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi_port.c117
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi_port.h18
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi_port_common.c1301
-rw-r--r--drivers/net/ethernet/sfc/siena/mcdi_port_common.h67
-rw-r--r--drivers/net/ethernet/sfc/siena/mtd.c124
-rw-r--r--drivers/net/ethernet/sfc/siena/net_driver.h1716
-rw-r--r--drivers/net/ethernet/sfc/siena/nic.c580
-rw-r--r--drivers/net/ethernet/sfc/siena/nic.h392
-rw-r--r--drivers/net/ethernet/sfc/siena/nic_common.h262
-rw-r--r--drivers/net/ethernet/sfc/siena/ptp.c2210
-rw-r--r--drivers/net/ethernet/sfc/siena/ptp.h45
-rw-r--r--drivers/net/ethernet/sfc/siena/rx.c399
-rw-r--r--drivers/net/ethernet/sfc/siena/rx_common.c1086
-rw-r--r--drivers/net/ethernet/sfc/siena/rx_common.h116
-rw-r--r--drivers/net/ethernet/sfc/siena/selftest.c807
-rw-r--r--drivers/net/ethernet/sfc/siena/selftest.h52
-rw-r--r--drivers/net/ethernet/sfc/siena/sriov.c72
-rw-r--r--drivers/net/ethernet/sfc/siena/sriov.h25
-rw-r--r--drivers/net/ethernet/sfc/siena/tx.c643
-rw-r--r--drivers/net/ethernet/sfc/siena/tx.h47
-rw-r--r--drivers/net/ethernet/sfc/siena/tx_common.c449
-rw-r--r--drivers/net/ethernet/sfc/siena/tx_common.h45
-rw-r--r--drivers/net/ethernet/sfc/siena/vfdi.h252
-rw-r--r--drivers/net/ethernet/sfc/siena/workarounds.h34
27 files changed, 14153 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/siena/mcdi.c b/drivers/net/ethernet/sfc/siena/mcdi.c
new file mode 100644
index 000000000000..50baf62b2cbc
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mcdi.c
@@ -0,0 +1,2375 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2008-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/delay.h>
+#include <linux/moduleparam.h>
+#include <linux/atomic.h>
+#include "net_driver.h"
+#include "nic.h"
+#include "io.h"
+#include "farch_regs.h"
+#include "mcdi_pcol.h"
+
+/**************************************************************************
+ *
+ * Management-Controller-to-Driver Interface
+ *
+ **************************************************************************
+ */
+
+#define MCDI_RPC_TIMEOUT (10 * HZ)
+
+/* A reboot/assertion causes the MCDI status word to be set after the
+ * command word is set or a REBOOT event is sent. If we notice a reboot
+ * via these mechanisms then wait 250ms for the status word to be set.
+ */
+#define MCDI_STATUS_DELAY_US 100
+#define MCDI_STATUS_DELAY_COUNT 2500
+#define MCDI_STATUS_SLEEP_MS \
+ (MCDI_STATUS_DELAY_US * MCDI_STATUS_DELAY_COUNT / 1000)
+
+#define SEQ_MASK \
+ EFX_MASK32(EFX_WIDTH(MCDI_HEADER_SEQ))
+
+struct efx_mcdi_async_param {
+ struct list_head list;
+ unsigned int cmd;
+ size_t inlen;
+ size_t outlen;
+ bool quiet;
+ efx_mcdi_async_completer *complete;
+ unsigned long cookie;
+ /* followed by request/response buffer */
+};
+
+static void efx_mcdi_timeout_async(struct timer_list *t);
+static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating,
+ bool *was_attached_out);
+static bool efx_mcdi_poll_once(struct efx_nic *efx);
+static void efx_mcdi_abandon(struct efx_nic *efx);
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+static bool mcdi_logging_default;
+module_param(mcdi_logging_default, bool, 0644);
+MODULE_PARM_DESC(mcdi_logging_default,
+ "Enable MCDI logging on newly-probed functions");
+#endif
+
+int efx_mcdi_init(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi;
+ bool already_attached;
+ int rc = -ENOMEM;
+
+ efx->mcdi = kzalloc(sizeof(*efx->mcdi), GFP_KERNEL);
+ if (!efx->mcdi)
+ goto fail;
+
+ mcdi = efx_mcdi(efx);
+ mcdi->efx = efx;
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ /* consuming code assumes buffer is page-sized */
+ mcdi->logging_buffer = (char *)__get_free_page(GFP_KERNEL);
+ if (!mcdi->logging_buffer)
+ goto fail1;
+ mcdi->logging_enabled = mcdi_logging_default;
+#endif
+ init_waitqueue_head(&mcdi->wq);
+ init_waitqueue_head(&mcdi->proxy_rx_wq);
+ spin_lock_init(&mcdi->iface_lock);
+ mcdi->state = MCDI_STATE_QUIESCENT;
+ mcdi->mode = MCDI_MODE_POLL;
+ spin_lock_init(&mcdi->async_lock);
+ INIT_LIST_HEAD(&mcdi->async_list);
+ timer_setup(&mcdi->async_timer, efx_mcdi_timeout_async, 0);
+
+ (void) efx_mcdi_poll_reboot(efx);
+ mcdi->new_epoch = true;
+
+ /* Recover from a failed assertion before probing */
+ rc = efx_mcdi_handle_assertion(efx);
+ if (rc)
+ goto fail2;
+
+ /* Let the MC (and BMC, if this is a LOM) know that the driver
+ * is loaded. We should do this before we reset the NIC.
+ */
+ rc = efx_mcdi_drv_attach(efx, true, &already_attached);
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "Unable to register driver with MCPU\n");
+ goto fail2;
+ }
+ if (already_attached)
+ /* Not a fatal error */
+ netif_err(efx, probe, efx->net_dev,
+ "Host already registered with MCPU\n");
+
+ if (efx->mcdi->fn_flags &
+ (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_PRIMARY))
+ efx->primary = efx;
+
+ return 0;
+fail2:
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ free_page((unsigned long)mcdi->logging_buffer);
+fail1:
+#endif
+ kfree(efx->mcdi);
+ efx->mcdi = NULL;
+fail:
+ return rc;
+}
+
+void efx_mcdi_detach(struct efx_nic *efx)
+{
+ if (!efx->mcdi)
+ return;
+
+ BUG_ON(efx->mcdi->iface.state != MCDI_STATE_QUIESCENT);
+
+ /* Relinquish the device (back to the BMC, if this is a LOM) */
+ efx_mcdi_drv_attach(efx, false, NULL);
+}
+
+void efx_mcdi_fini(struct efx_nic *efx)
+{
+ if (!efx->mcdi)
+ return;
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ free_page((unsigned long)efx->mcdi->iface.logging_buffer);
+#endif
+
+ kfree(efx->mcdi);
+}
+
+static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd,
+ const efx_dword_t *inbuf, size_t inlen)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ char *buf = mcdi->logging_buffer; /* page-sized */
+#endif
+ efx_dword_t hdr[2];
+ size_t hdr_len;
+ u32 xflags, seqno;
+
+ BUG_ON(mcdi->state == MCDI_STATE_QUIESCENT);
+
+ /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */
+ spin_lock_bh(&mcdi->iface_lock);
+ ++mcdi->seqno;
+ seqno = mcdi->seqno & SEQ_MASK;
+ spin_unlock_bh(&mcdi->iface_lock);
+
+ xflags = 0;
+ if (mcdi->mode == MCDI_MODE_EVENTS)
+ xflags |= MCDI_HEADER_XFLAGS_EVREQ;
+
+ if (efx->type->mcdi_max_ver == 1) {
+ /* MCDI v1 */
+ EFX_POPULATE_DWORD_7(hdr[0],
+ MCDI_HEADER_RESPONSE, 0,
+ MCDI_HEADER_RESYNC, 1,
+ MCDI_HEADER_CODE, cmd,
+ MCDI_HEADER_DATALEN, inlen,
+ MCDI_HEADER_SEQ, seqno,
+ MCDI_HEADER_XFLAGS, xflags,
+ MCDI_HEADER_NOT_EPOCH, !mcdi->new_epoch);
+ hdr_len = 4;
+ } else {
+ /* MCDI v2 */
+ BUG_ON(inlen > MCDI_CTL_SDU_LEN_MAX_V2);
+ EFX_POPULATE_DWORD_7(hdr[0],
+ MCDI_HEADER_RESPONSE, 0,
+ MCDI_HEADER_RESYNC, 1,
+ MCDI_HEADER_CODE, MC_CMD_V2_EXTN,
+ MCDI_HEADER_DATALEN, 0,
+ MCDI_HEADER_SEQ, seqno,
+ MCDI_HEADER_XFLAGS, xflags,
+ MCDI_HEADER_NOT_EPOCH, !mcdi->new_epoch);
+ EFX_POPULATE_DWORD_2(hdr[1],
+ MC_CMD_V2_EXTN_IN_EXTENDED_CMD, cmd,
+ MC_CMD_V2_EXTN_IN_ACTUAL_LEN, inlen);
+ hdr_len = 8;
+ }
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ if (mcdi->logging_enabled && !WARN_ON_ONCE(!buf)) {
+ int bytes = 0;
+ int i;
+ /* Lengths should always be a whole number of dwords, so scream
+ * if they're not.
+ */
+ WARN_ON_ONCE(hdr_len % 4);
+ WARN_ON_ONCE(inlen % 4);
+
+ /* We own the logging buffer, as only one MCDI can be in
+ * progress on a NIC at any one time. So no need for locking.
+ */
+ for (i = 0; i < hdr_len / 4 && bytes < PAGE_SIZE; i++)
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x",
+ le32_to_cpu(hdr[i].u32[0]));
+
+ for (i = 0; i < inlen / 4 && bytes < PAGE_SIZE; i++)
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x",
+ le32_to_cpu(inbuf[i].u32[0]));
+
+ netif_info(efx, hw, efx->net_dev, "MCDI RPC REQ:%s\n", buf);
+ }
+#endif
+
+ efx->type->mcdi_request(efx, hdr, hdr_len, inbuf, inlen);
+
+ mcdi->new_epoch = false;
+}
+
+static int efx_mcdi_errno(unsigned int mcdi_err)
+{
+ switch (mcdi_err) {
+ case 0:
+ return 0;
+#define TRANSLATE_ERROR(name) \
+ case MC_CMD_ERR_ ## name: \
+ return -name;
+ TRANSLATE_ERROR(EPERM);
+ TRANSLATE_ERROR(ENOENT);
+ TRANSLATE_ERROR(EINTR);
+ TRANSLATE_ERROR(EAGAIN);
+ TRANSLATE_ERROR(EACCES);
+ TRANSLATE_ERROR(EBUSY);
+ TRANSLATE_ERROR(EINVAL);
+ TRANSLATE_ERROR(EDEADLK);
+ TRANSLATE_ERROR(ENOSYS);
+ TRANSLATE_ERROR(ETIME);
+ TRANSLATE_ERROR(EALREADY);
+ TRANSLATE_ERROR(ENOSPC);
+#undef TRANSLATE_ERROR
+ case MC_CMD_ERR_ENOTSUP:
+ return -EOPNOTSUPP;
+ case MC_CMD_ERR_ALLOC_FAIL:
+ return -ENOBUFS;
+ case MC_CMD_ERR_MAC_EXIST:
+ return -EADDRINUSE;
+ default:
+ return -EPROTO;
+ }
+}
+
+static void efx_mcdi_read_response_header(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ unsigned int respseq, respcmd, error;
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ char *buf = mcdi->logging_buffer; /* page-sized */
+#endif
+ efx_dword_t hdr;
+
+ efx->type->mcdi_read_response(efx, &hdr, 0, 4);
+ respseq = EFX_DWORD_FIELD(hdr, MCDI_HEADER_SEQ);
+ respcmd = EFX_DWORD_FIELD(hdr, MCDI_HEADER_CODE);
+ error = EFX_DWORD_FIELD(hdr, MCDI_HEADER_ERROR);
+
+ if (respcmd != MC_CMD_V2_EXTN) {
+ mcdi->resp_hdr_len = 4;
+ mcdi->resp_data_len = EFX_DWORD_FIELD(hdr, MCDI_HEADER_DATALEN);
+ } else {
+ efx->type->mcdi_read_response(efx, &hdr, 4, 4);
+ mcdi->resp_hdr_len = 8;
+ mcdi->resp_data_len =
+ EFX_DWORD_FIELD(hdr, MC_CMD_V2_EXTN_IN_ACTUAL_LEN);
+ }
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ if (mcdi->logging_enabled && !WARN_ON_ONCE(!buf)) {
+ size_t hdr_len, data_len;
+ int bytes = 0;
+ int i;
+
+ WARN_ON_ONCE(mcdi->resp_hdr_len % 4);
+ hdr_len = mcdi->resp_hdr_len / 4;
+ /* MCDI_DECLARE_BUF ensures that underlying buffer is padded
+ * to dword size, and the MCDI buffer is always dword size
+ */
+ data_len = DIV_ROUND_UP(mcdi->resp_data_len, 4);
+
+ /* We own the logging buffer, as only one MCDI can be in
+ * progress on a NIC at any one time. So no need for locking.
+ */
+ for (i = 0; i < hdr_len && bytes < PAGE_SIZE; i++) {
+ efx->type->mcdi_read_response(efx, &hdr, (i * 4), 4);
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x", le32_to_cpu(hdr.u32[0]));
+ }
+
+ for (i = 0; i < data_len && bytes < PAGE_SIZE; i++) {
+ efx->type->mcdi_read_response(efx, &hdr,
+ mcdi->resp_hdr_len + (i * 4), 4);
+ bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes,
+ " %08x", le32_to_cpu(hdr.u32[0]));
+ }
+
+ netif_info(efx, hw, efx->net_dev, "MCDI RPC RESP:%s\n", buf);
+ }
+#endif
+
+ mcdi->resprc_raw = 0;
+ if (error && mcdi->resp_data_len == 0) {
+ netif_err(efx, hw, efx->net_dev, "MC rebooted\n");
+ mcdi->resprc = -EIO;
+ } else if ((respseq ^ mcdi->seqno) & SEQ_MASK) {
+ netif_err(efx, hw, efx->net_dev,
+ "MC response mismatch tx seq 0x%x rx seq 0x%x\n",
+ respseq, mcdi->seqno);
+ mcdi->resprc = -EIO;
+ } else if (error) {
+ efx->type->mcdi_read_response(efx, &hdr, mcdi->resp_hdr_len, 4);
+ mcdi->resprc_raw = EFX_DWORD_FIELD(hdr, EFX_DWORD_0);
+ mcdi->resprc = efx_mcdi_errno(mcdi->resprc_raw);
+ } else {
+ mcdi->resprc = 0;
+ }
+}
+
+static bool efx_mcdi_poll_once(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ rmb();
+ if (!efx->type->mcdi_poll_response(efx))
+ return false;
+
+ spin_lock_bh(&mcdi->iface_lock);
+ efx_mcdi_read_response_header(efx);
+ spin_unlock_bh(&mcdi->iface_lock);
+
+ return true;
+}
+
+static int efx_mcdi_poll(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ unsigned long time, finish;
+ unsigned int spins;
+ int rc;
+
+ /* Check for a reboot atomically with respect to efx_mcdi_copyout() */
+ rc = efx_mcdi_poll_reboot(efx);
+ if (rc) {
+ spin_lock_bh(&mcdi->iface_lock);
+ mcdi->resprc = rc;
+ mcdi->resp_hdr_len = 0;
+ mcdi->resp_data_len = 0;
+ spin_unlock_bh(&mcdi->iface_lock);
+ return 0;
+ }
+
+ /* Poll for completion. Poll quickly (once a us) for the 1st jiffy,
+ * because generally mcdi responses are fast. After that, back off
+ * and poll once a jiffy (approximately)
+ */
+ spins = USER_TICK_USEC;
+ finish = jiffies + MCDI_RPC_TIMEOUT;
+
+ while (1) {
+ if (spins != 0) {
+ --spins;
+ udelay(1);
+ } else {
+ schedule_timeout_uninterruptible(1);
+ }
+
+ time = jiffies;
+
+ if (efx_mcdi_poll_once(efx))
+ break;
+
+ if (time_after(time, finish))
+ return -ETIMEDOUT;
+ }
+
+ /* Return rc=0 like wait_event_timeout() */
+ return 0;
+}
+
+/* Test and clear MC-rebooted flag for this port/function; reset
+ * software state as necessary.
+ */
+int efx_mcdi_poll_reboot(struct efx_nic *efx)
+{
+ if (!efx->mcdi)
+ return 0;
+
+ return efx->type->mcdi_poll_reboot(efx);
+}
+
+static bool efx_mcdi_acquire_async(struct efx_mcdi_iface *mcdi)
+{
+ return cmpxchg(&mcdi->state,
+ MCDI_STATE_QUIESCENT, MCDI_STATE_RUNNING_ASYNC) ==
+ MCDI_STATE_QUIESCENT;
+}
+
+static void efx_mcdi_acquire_sync(struct efx_mcdi_iface *mcdi)
+{
+ /* Wait until the interface becomes QUIESCENT and we win the race
+ * to mark it RUNNING_SYNC.
+ */
+ wait_event(mcdi->wq,
+ cmpxchg(&mcdi->state,
+ MCDI_STATE_QUIESCENT, MCDI_STATE_RUNNING_SYNC) ==
+ MCDI_STATE_QUIESCENT);
+}
+
+static int efx_mcdi_await_completion(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ if (wait_event_timeout(mcdi->wq, mcdi->state == MCDI_STATE_COMPLETED,
+ MCDI_RPC_TIMEOUT) == 0)
+ return -ETIMEDOUT;
+
+ /* Check if efx_mcdi_set_mode() switched us back to polled completions.
+ * In which case, poll for completions directly. If efx_mcdi_ev_cpl()
+ * completed the request first, then we'll just end up completing the
+ * request again, which is safe.
+ *
+ * We need an smp_rmb() to synchronise with efx_mcdi_mode_poll(), which
+ * wait_event_timeout() implicitly provides.
+ */
+ if (mcdi->mode == MCDI_MODE_POLL)
+ return efx_mcdi_poll(efx);
+
+ return 0;
+}
+
+/* If the interface is RUNNING_SYNC, switch to COMPLETED and wake the
+ * requester. Return whether this was done. Does not take any locks.
+ */
+static bool efx_mcdi_complete_sync(struct efx_mcdi_iface *mcdi)
+{
+ if (cmpxchg(&mcdi->state,
+ MCDI_STATE_RUNNING_SYNC, MCDI_STATE_COMPLETED) ==
+ MCDI_STATE_RUNNING_SYNC) {
+ wake_up(&mcdi->wq);
+ return true;
+ }
+
+ return false;
+}
+
+static void efx_mcdi_release(struct efx_mcdi_iface *mcdi)
+{
+ if (mcdi->mode == MCDI_MODE_EVENTS) {
+ struct efx_mcdi_async_param *async;
+ struct efx_nic *efx = mcdi->efx;
+
+ /* Process the asynchronous request queue */
+ spin_lock_bh(&mcdi->async_lock);
+ async = list_first_entry_or_null(
+ &mcdi->async_list, struct efx_mcdi_async_param, list);
+ if (async) {
+ mcdi->state = MCDI_STATE_RUNNING_ASYNC;
+ efx_mcdi_send_request(efx, async->cmd,
+ (const efx_dword_t *)(async + 1),
+ async->inlen);
+ mod_timer(&mcdi->async_timer,
+ jiffies + MCDI_RPC_TIMEOUT);
+ }
+ spin_unlock_bh(&mcdi->async_lock);
+
+ if (async)
+ return;
+ }
+
+ mcdi->state = MCDI_STATE_QUIESCENT;
+ wake_up(&mcdi->wq);
+}
+
+/* If the interface is RUNNING_ASYNC, switch to COMPLETED, call the
+ * asynchronous completion function, and release the interface.
+ * Return whether this was done. Must be called in bh-disabled
+ * context. Will take iface_lock and async_lock.
+ */
+static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout)
+{
+ struct efx_nic *efx = mcdi->efx;
+ struct efx_mcdi_async_param *async;
+ size_t hdr_len, data_len, err_len;
+ efx_dword_t *outbuf;
+ MCDI_DECLARE_BUF_ERR(errbuf);
+ int rc;
+
+ if (cmpxchg(&mcdi->state,
+ MCDI_STATE_RUNNING_ASYNC, MCDI_STATE_COMPLETED) !=
+ MCDI_STATE_RUNNING_ASYNC)
+ return false;
+
+ spin_lock(&mcdi->iface_lock);
+ if (timeout) {
+ /* Ensure that if the completion event arrives later,
+ * the seqno check in efx_mcdi_ev_cpl() will fail
+ */
+ ++mcdi->seqno;
+ ++mcdi->credits;
+ rc = -ETIMEDOUT;
+ hdr_len = 0;
+ data_len = 0;
+ } else {
+ rc = mcdi->resprc;
+ hdr_len = mcdi->resp_hdr_len;
+ data_len = mcdi->resp_data_len;
+ }
+ spin_unlock(&mcdi->iface_lock);
+
+ /* Stop the timer. In case the timer function is running, we
+ * must wait for it to return so that there is no possibility
+ * of it aborting the next request.
+ */
+ if (!timeout)
+ del_timer_sync(&mcdi->async_timer);
+
+ spin_lock(&mcdi->async_lock);
+ async = list_first_entry(&mcdi->async_list,
+ struct efx_mcdi_async_param, list);
+ list_del(&async->list);
+ spin_unlock(&mcdi->async_lock);
+
+ outbuf = (efx_dword_t *)(async + 1);
+ efx->type->mcdi_read_response(efx, outbuf, hdr_len,
+ min(async->outlen, data_len));
+ if (!timeout && rc && !async->quiet) {
+ err_len = min(sizeof(errbuf), data_len);
+ efx->type->mcdi_read_response(efx, errbuf, hdr_len,
+ sizeof(errbuf));
+ efx_mcdi_display_error(efx, async->cmd, async->inlen, errbuf,
+ err_len, rc);
+ }
+
+ if (async->complete)
+ async->complete(efx, async->cookie, rc, outbuf,
+ min(async->outlen, data_len));
+ kfree(async);
+
+ efx_mcdi_release(mcdi);
+
+ return true;
+}
+
+static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno,
+ unsigned int datalen, unsigned int mcdi_err)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ bool wake = false;
+
+ spin_lock(&mcdi->iface_lock);
+
+ if ((seqno ^ mcdi->seqno) & SEQ_MASK) {
+ if (mcdi->credits)
+ /* The request has been cancelled */
+ --mcdi->credits;
+ else
+ netif_err(efx, hw, efx->net_dev,
+ "MC response mismatch tx seq 0x%x rx "
+ "seq 0x%x\n", seqno, mcdi->seqno);
+ } else {
+ if (efx->type->mcdi_max_ver >= 2) {
+ /* MCDI v2 responses don't fit in an event */
+ efx_mcdi_read_response_header(efx);
+ } else {
+ mcdi->resprc = efx_mcdi_errno(mcdi_err);
+ mcdi->resp_hdr_len = 4;
+ mcdi->resp_data_len = datalen;
+ }
+
+ wake = true;
+ }
+
+ spin_unlock(&mcdi->iface_lock);
+
+ if (wake) {
+ if (!efx_mcdi_complete_async(mcdi, false))
+ (void) efx_mcdi_complete_sync(mcdi);
+
+ /* If the interface isn't RUNNING_ASYNC or
+ * RUNNING_SYNC then we've received a duplicate
+ * completion after we've already transitioned back to
+ * QUIESCENT. [A subsequent invocation would increment
+ * seqno, so would have failed the seqno check].
+ */
+ }
+}
+
+static void efx_mcdi_timeout_async(struct timer_list *t)
+{
+ struct efx_mcdi_iface *mcdi = from_timer(mcdi, t, async_timer);
+
+ efx_mcdi_complete_async(mcdi, true);
+}
+
+static int
+efx_mcdi_check_supported(struct efx_nic *efx, unsigned int cmd, size_t inlen)
+{
+ if (efx->type->mcdi_max_ver < 0 ||
+ (efx->type->mcdi_max_ver < 2 &&
+ cmd > MC_CMD_CMD_SPACE_ESCAPE_7))
+ return -EINVAL;
+
+ if (inlen > MCDI_CTL_SDU_LEN_MAX_V2 ||
+ (efx->type->mcdi_max_ver < 2 &&
+ inlen > MCDI_CTL_SDU_LEN_MAX_V1))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static bool efx_mcdi_get_proxy_handle(struct efx_nic *efx,
+ size_t hdr_len, size_t data_len,
+ u32 *proxy_handle)
+{
+ MCDI_DECLARE_BUF_ERR(testbuf);
+ const size_t buflen = sizeof(testbuf);
+
+ if (!proxy_handle || data_len < buflen)
+ return false;
+
+ efx->type->mcdi_read_response(efx, testbuf, hdr_len, buflen);
+ if (MCDI_DWORD(testbuf, ERR_CODE) == MC_CMD_ERR_PROXY_PENDING) {
+ *proxy_handle = MCDI_DWORD(testbuf, ERR_PROXY_PENDING_HANDLE);
+ return true;
+ }
+
+ return false;
+}
+
+static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned int cmd,
+ size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual, bool quiet,
+ u32 *proxy_handle, int *raw_rc)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ MCDI_DECLARE_BUF_ERR(errbuf);
+ int rc;
+
+ if (mcdi->mode == MCDI_MODE_POLL)
+ rc = efx_mcdi_poll(efx);
+ else
+ rc = efx_mcdi_await_completion(efx);
+
+ if (rc != 0) {
+ netif_err(efx, hw, efx->net_dev,
+ "MC command 0x%x inlen %d mode %d timed out\n",
+ cmd, (int)inlen, mcdi->mode);
+
+ if (mcdi->mode == MCDI_MODE_EVENTS && efx_mcdi_poll_once(efx)) {
+ netif_err(efx, hw, efx->net_dev,
+ "MCDI request was completed without an event\n");
+ rc = 0;
+ }
+
+ efx_mcdi_abandon(efx);
+
+ /* Close the race with efx_mcdi_ev_cpl() executing just too late
+ * and completing a request we've just cancelled, by ensuring
+ * that the seqno check therein fails.
+ */
+ spin_lock_bh(&mcdi->iface_lock);
+ ++mcdi->seqno;
+ ++mcdi->credits;
+ spin_unlock_bh(&mcdi->iface_lock);
+ }
+
+ if (proxy_handle)
+ *proxy_handle = 0;
+
+ if (rc != 0) {
+ if (outlen_actual)
+ *outlen_actual = 0;
+ } else {
+ size_t hdr_len, data_len, err_len;
+
+ /* At the very least we need a memory barrier here to ensure
+ * we pick up changes from efx_mcdi_ev_cpl(). Protect against
+ * a spurious efx_mcdi_ev_cpl() running concurrently by
+ * acquiring the iface_lock. */
+ spin_lock_bh(&mcdi->iface_lock);
+ rc = mcdi->resprc;
+ if (raw_rc)
+ *raw_rc = mcdi->resprc_raw;
+ hdr_len = mcdi->resp_hdr_len;
+ data_len = mcdi->resp_data_len;
+ err_len = min(sizeof(errbuf), data_len);
+ spin_unlock_bh(&mcdi->iface_lock);
+
+ BUG_ON(rc > 0);
+
+ efx->type->mcdi_read_response(efx, outbuf, hdr_len,
+ min(outlen, data_len));
+ if (outlen_actual)
+ *outlen_actual = data_len;
+
+ efx->type->mcdi_read_response(efx, errbuf, hdr_len, err_len);
+
+ if (cmd == MC_CMD_REBOOT && rc == -EIO) {
+ /* Don't reset if MC_CMD_REBOOT returns EIO */
+ } else if (rc == -EIO || rc == -EINTR) {
+ netif_err(efx, hw, efx->net_dev, "MC reboot detected\n");
+ netif_dbg(efx, hw, efx->net_dev, "MC rebooted during command %d rc %d\n",
+ cmd, -rc);
+ if (efx->type->mcdi_reboot_detected)
+ efx->type->mcdi_reboot_detected(efx);
+ efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE);
+ } else if (proxy_handle && (rc == -EPROTO) &&
+ efx_mcdi_get_proxy_handle(efx, hdr_len, data_len,
+ proxy_handle)) {
+ mcdi->proxy_rx_status = 0;
+ mcdi->proxy_rx_handle = 0;
+ mcdi->state = MCDI_STATE_PROXY_WAIT;
+ } else if (rc && !quiet) {
+ efx_mcdi_display_error(efx, cmd, inlen, errbuf, err_len,
+ rc);
+ }
+
+ if (rc == -EIO || rc == -EINTR) {
+ msleep(MCDI_STATUS_SLEEP_MS);
+ efx_mcdi_poll_reboot(efx);
+ mcdi->new_epoch = true;
+ }
+ }
+
+ if (!proxy_handle || !*proxy_handle)
+ efx_mcdi_release(mcdi);
+ return rc;
+}
+
+static void efx_mcdi_proxy_abort(struct efx_mcdi_iface *mcdi)
+{
+ if (mcdi->state == MCDI_STATE_PROXY_WAIT) {
+ /* Interrupt the proxy wait. */
+ mcdi->proxy_rx_status = -EINTR;
+ wake_up(&mcdi->proxy_rx_wq);
+ }
+}
+
+static void efx_mcdi_ev_proxy_response(struct efx_nic *efx,
+ u32 handle, int status)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ WARN_ON(mcdi->state != MCDI_STATE_PROXY_WAIT);
+
+ mcdi->proxy_rx_status = efx_mcdi_errno(status);
+ /* Ensure the status is written before we update the handle, since the
+ * latter is used to check if we've finished.
+ */
+ wmb();
+ mcdi->proxy_rx_handle = handle;
+ wake_up(&mcdi->proxy_rx_wq);
+}
+
+static int efx_mcdi_proxy_wait(struct efx_nic *efx, u32 handle, bool quiet)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ int rc;
+
+ /* Wait for a proxy event, or timeout. */
+ rc = wait_event_timeout(mcdi->proxy_rx_wq,
+ mcdi->proxy_rx_handle != 0 ||
+ mcdi->proxy_rx_status == -EINTR,
+ MCDI_RPC_TIMEOUT);
+
+ if (rc <= 0) {
+ netif_dbg(efx, hw, efx->net_dev,
+ "MCDI proxy timeout %d\n", handle);
+ return -ETIMEDOUT;
+ } else if (mcdi->proxy_rx_handle != handle) {
+ netif_warn(efx, hw, efx->net_dev,
+ "MCDI proxy unexpected handle %d (expected %d)\n",
+ mcdi->proxy_rx_handle, handle);
+ return -EINVAL;
+ }
+
+ return mcdi->proxy_rx_status;
+}
+
+static int _efx_mcdi_rpc(struct efx_nic *efx, unsigned int cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual, bool quiet, int *raw_rc)
+{
+ u32 proxy_handle = 0; /* Zero is an invalid proxy handle. */
+ int rc;
+
+ if (inbuf && inlen && (inbuf == outbuf)) {
+ /* The input buffer can't be aliased with the output. */
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ rc = efx_mcdi_rpc_start(efx, cmd, inbuf, inlen);
+ if (rc)
+ return rc;
+
+ rc = _efx_mcdi_rpc_finish(efx, cmd, inlen, outbuf, outlen,
+ outlen_actual, quiet, &proxy_handle, raw_rc);
+
+ if (proxy_handle) {
+ /* Handle proxy authorisation. This allows approval of MCDI
+ * operations to be delegated to the admin function, allowing
+ * fine control over (eg) multicast subscriptions.
+ */
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ netif_dbg(efx, hw, efx->net_dev,
+ "MCDI waiting for proxy auth %d\n",
+ proxy_handle);
+ rc = efx_mcdi_proxy_wait(efx, proxy_handle, quiet);
+
+ if (rc == 0) {
+ netif_dbg(efx, hw, efx->net_dev,
+ "MCDI proxy retry %d\n", proxy_handle);
+
+ /* We now retry the original request. */
+ mcdi->state = MCDI_STATE_RUNNING_SYNC;
+ efx_mcdi_send_request(efx, cmd, inbuf, inlen);
+
+ rc = _efx_mcdi_rpc_finish(efx, cmd, inlen,
+ outbuf, outlen, outlen_actual,
+ quiet, NULL, raw_rc);
+ } else {
+ netif_cond_dbg(efx, hw, efx->net_dev, rc == -EPERM, err,
+ "MC command 0x%x failed after proxy auth rc=%d\n",
+ cmd, rc);
+
+ if (rc == -EINTR || rc == -EIO)
+ efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE);
+ efx_mcdi_release(mcdi);
+ }
+ }
+
+ return rc;
+}
+
+static int _efx_mcdi_rpc_evb_retry(struct efx_nic *efx, unsigned cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual, bool quiet)
+{
+ int raw_rc = 0;
+ int rc;
+
+ rc = _efx_mcdi_rpc(efx, cmd, inbuf, inlen,
+ outbuf, outlen, outlen_actual, true, &raw_rc);
+
+ if ((rc == -EPROTO) && (raw_rc == MC_CMD_ERR_NO_EVB_PORT) &&
+ efx->type->is_vf) {
+ /* If the EVB port isn't available within a VF this may
+ * mean the PF is still bringing the switch up. We should
+ * retry our request shortly.
+ */
+ unsigned long abort_time = jiffies + MCDI_RPC_TIMEOUT;
+ unsigned int delay_us = 10000;
+
+ netif_dbg(efx, hw, efx->net_dev,
+ "%s: NO_EVB_PORT; will retry request\n",
+ __func__);
+
+ do {
+ usleep_range(delay_us, delay_us + 10000);
+ rc = _efx_mcdi_rpc(efx, cmd, inbuf, inlen,
+ outbuf, outlen, outlen_actual,
+ true, &raw_rc);
+ if (delay_us < 100000)
+ delay_us <<= 1;
+ } while ((rc == -EPROTO) &&
+ (raw_rc == MC_CMD_ERR_NO_EVB_PORT) &&
+ time_before(jiffies, abort_time));
+ }
+
+ if (rc && !quiet && !(cmd == MC_CMD_REBOOT && rc == -EIO))
+ efx_mcdi_display_error(efx, cmd, inlen,
+ outbuf, outlen, rc);
+
+ return rc;
+}
+
+/**
+ * efx_mcdi_rpc - Issue an MCDI command and wait for completion
+ * @efx: NIC through which to issue the command
+ * @cmd: Command type number
+ * @inbuf: Command parameters
+ * @inlen: Length of command parameters, in bytes. Must be a multiple
+ * of 4 and no greater than %MCDI_CTL_SDU_LEN_MAX_V1.
+ * @outbuf: Response buffer. May be %NULL if @outlen is 0.
+ * @outlen: Length of response buffer, in bytes. If the actual
+ * response is longer than @outlen & ~3, it will be truncated
+ * to that length.
+ * @outlen_actual: Pointer through which to return the actual response
+ * length. May be %NULL if this is not needed.
+ *
+ * This function may sleep and therefore must be called in an appropriate
+ * context.
+ *
+ * Return: A negative error code, or zero if successful. The error
+ * code may come from the MCDI response or may indicate a failure
+ * to communicate with the MC. In the former case, the response
+ * will still be copied to @outbuf and *@outlen_actual will be
+ * set accordingly. In the latter case, *@outlen_actual will be
+ * set to zero.
+ */
+int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual)
+{
+ return _efx_mcdi_rpc_evb_retry(efx, cmd, inbuf, inlen, outbuf, outlen,
+ outlen_actual, false);
+}
+
+/* Normally, on receiving an error code in the MCDI response,
+ * efx_mcdi_rpc will log an error message containing (among other
+ * things) the raw error code, by means of efx_mcdi_display_error.
+ * This _quiet version suppresses that; if the caller wishes to log
+ * the error conditionally on the return code, it should call this
+ * function and is then responsible for calling efx_mcdi_display_error
+ * as needed.
+ */
+int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual)
+{
+ return _efx_mcdi_rpc_evb_retry(efx, cmd, inbuf, inlen, outbuf, outlen,
+ outlen_actual, true);
+}
+
+int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd,
+ const efx_dword_t *inbuf, size_t inlen)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ int rc;
+
+ rc = efx_mcdi_check_supported(efx, cmd, inlen);
+ if (rc)
+ return rc;
+
+ if (efx->mc_bist_for_other_fn)
+ return -ENETDOWN;
+
+ if (mcdi->mode == MCDI_MODE_FAIL)
+ return -ENETDOWN;
+
+ efx_mcdi_acquire_sync(mcdi);
+ efx_mcdi_send_request(efx, cmd, inbuf, inlen);
+ return 0;
+}
+
+static int _efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ size_t outlen,
+ efx_mcdi_async_completer *complete,
+ unsigned long cookie, bool quiet)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ struct efx_mcdi_async_param *async;
+ int rc;
+
+ rc = efx_mcdi_check_supported(efx, cmd, inlen);
+ if (rc)
+ return rc;
+
+ if (efx->mc_bist_for_other_fn)
+ return -ENETDOWN;
+
+ async = kmalloc(sizeof(*async) + ALIGN(max(inlen, outlen), 4),
+ GFP_ATOMIC);
+ if (!async)
+ return -ENOMEM;
+
+ async->cmd = cmd;
+ async->inlen = inlen;
+ async->outlen = outlen;
+ async->quiet = quiet;
+ async->complete = complete;
+ async->cookie = cookie;
+ memcpy(async + 1, inbuf, inlen);
+
+ spin_lock_bh(&mcdi->async_lock);
+
+ if (mcdi->mode == MCDI_MODE_EVENTS) {
+ list_add_tail(&async->list, &mcdi->async_list);
+
+ /* If this is at the front of the queue, try to start it
+ * immediately
+ */
+ if (mcdi->async_list.next == &async->list &&
+ efx_mcdi_acquire_async(mcdi)) {
+ efx_mcdi_send_request(efx, cmd, inbuf, inlen);
+ mod_timer(&mcdi->async_timer,
+ jiffies + MCDI_RPC_TIMEOUT);
+ }
+ } else {
+ kfree(async);
+ rc = -ENETDOWN;
+ }
+
+ spin_unlock_bh(&mcdi->async_lock);
+
+ return rc;
+}
+
+/**
+ * efx_mcdi_rpc_async - Schedule an MCDI command to run asynchronously
+ * @efx: NIC through which to issue the command
+ * @cmd: Command type number
+ * @inbuf: Command parameters
+ * @inlen: Length of command parameters, in bytes
+ * @outlen: Length to allocate for response buffer, in bytes
+ * @complete: Function to be called on completion or cancellation.
+ * @cookie: Arbitrary value to be passed to @complete.
+ *
+ * This function does not sleep and therefore may be called in atomic
+ * context. It will fail if event queues are disabled or if MCDI
+ * event completions have been disabled due to an error.
+ *
+ * If it succeeds, the @complete function will be called exactly once
+ * in atomic context, when one of the following occurs:
+ * (a) the completion event is received (in NAPI context)
+ * (b) event queues are disabled (in the process that disables them)
+ * (c) the request times-out (in timer context)
+ */
+int
+efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd,
+ const efx_dword_t *inbuf, size_t inlen, size_t outlen,
+ efx_mcdi_async_completer *complete, unsigned long cookie)
+{
+ return _efx_mcdi_rpc_async(efx, cmd, inbuf, inlen, outlen, complete,
+ cookie, false);
+}
+
+int efx_mcdi_rpc_async_quiet(struct efx_nic *efx, unsigned int cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ size_t outlen, efx_mcdi_async_completer *complete,
+ unsigned long cookie)
+{
+ return _efx_mcdi_rpc_async(efx, cmd, inbuf, inlen, outlen, complete,
+ cookie, true);
+}
+
+int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual)
+{
+ return _efx_mcdi_rpc_finish(efx, cmd, inlen, outbuf, outlen,
+ outlen_actual, false, NULL, NULL);
+}
+
+int efx_mcdi_rpc_finish_quiet(struct efx_nic *efx, unsigned cmd, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual)
+{
+ return _efx_mcdi_rpc_finish(efx, cmd, inlen, outbuf, outlen,
+ outlen_actual, true, NULL, NULL);
+}
+
+void efx_mcdi_display_error(struct efx_nic *efx, unsigned cmd,
+ size_t inlen, efx_dword_t *outbuf,
+ size_t outlen, int rc)
+{
+ int code = 0, err_arg = 0;
+
+ if (outlen >= MC_CMD_ERR_CODE_OFST + 4)
+ code = MCDI_DWORD(outbuf, ERR_CODE);
+ if (outlen >= MC_CMD_ERR_ARG_OFST + 4)
+ err_arg = MCDI_DWORD(outbuf, ERR_ARG);
+ netif_cond_dbg(efx, hw, efx->net_dev, rc == -EPERM, err,
+ "MC command 0x%x inlen %zu failed rc=%d (raw=%d) arg=%d\n",
+ cmd, inlen, rc, code, err_arg);
+}
+
+/* Switch to polled MCDI completions. This can be called in various
+ * error conditions with various locks held, so it must be lockless.
+ * Caller is responsible for flushing asynchronous requests later.
+ */
+void efx_mcdi_mode_poll(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi;
+
+ if (!efx->mcdi)
+ return;
+
+ mcdi = efx_mcdi(efx);
+ /* If already in polling mode, nothing to do.
+ * If in fail-fast state, don't switch to polled completion.
+ * FLR recovery will do that later.
+ */
+ if (mcdi->mode == MCDI_MODE_POLL || mcdi->mode == MCDI_MODE_FAIL)
+ return;
+
+ /* We can switch from event completion to polled completion, because
+ * mcdi requests are always completed in shared memory. We do this by
+ * switching the mode to POLL'd then completing the request.
+ * efx_mcdi_await_completion() will then call efx_mcdi_poll().
+ *
+ * We need an smp_wmb() to synchronise with efx_mcdi_await_completion(),
+ * which efx_mcdi_complete_sync() provides for us.
+ */
+ mcdi->mode = MCDI_MODE_POLL;
+
+ efx_mcdi_complete_sync(mcdi);
+}
+
+/* Flush any running or queued asynchronous requests, after event processing
+ * is stopped
+ */
+void efx_mcdi_flush_async(struct efx_nic *efx)
+{
+ struct efx_mcdi_async_param *async, *next;
+ struct efx_mcdi_iface *mcdi;
+
+ if (!efx->mcdi)
+ return;
+
+ mcdi = efx_mcdi(efx);
+
+ /* We must be in poll or fail mode so no more requests can be queued */
+ BUG_ON(mcdi->mode == MCDI_MODE_EVENTS);
+
+ del_timer_sync(&mcdi->async_timer);
+
+ /* If a request is still running, make sure we give the MC
+ * time to complete it so that the response won't overwrite our
+ * next request.
+ */
+ if (mcdi->state == MCDI_STATE_RUNNING_ASYNC) {
+ efx_mcdi_poll(efx);
+ mcdi->state = MCDI_STATE_QUIESCENT;
+ }
+
+ /* Nothing else will access the async list now, so it is safe
+ * to walk it without holding async_lock. If we hold it while
+ * calling a completer then lockdep may warn that we have
+ * acquired locks in the wrong order.
+ */
+ list_for_each_entry_safe(async, next, &mcdi->async_list, list) {
+ if (async->complete)
+ async->complete(efx, async->cookie, -ENETDOWN, NULL, 0);
+ list_del(&async->list);
+ kfree(async);
+ }
+}
+
+void efx_mcdi_mode_event(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi;
+
+ if (!efx->mcdi)
+ return;
+
+ mcdi = efx_mcdi(efx);
+ /* If already in event completion mode, nothing to do.
+ * If in fail-fast state, don't switch to event completion. FLR
+ * recovery will do that later.
+ */
+ if (mcdi->mode == MCDI_MODE_EVENTS || mcdi->mode == MCDI_MODE_FAIL)
+ return;
+
+ /* We can't switch from polled to event completion in the middle of a
+ * request, because the completion method is specified in the request.
+ * So acquire the interface to serialise the requestors. We don't need
+ * to acquire the iface_lock to change the mode here, but we do need a
+ * write memory barrier ensure that efx_mcdi_rpc() sees it, which
+ * efx_mcdi_acquire() provides.
+ */
+ efx_mcdi_acquire_sync(mcdi);
+ mcdi->mode = MCDI_MODE_EVENTS;
+ efx_mcdi_release(mcdi);
+}
+
+static void efx_mcdi_ev_death(struct efx_nic *efx, int rc)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ /* If there is an outstanding MCDI request, it has been terminated
+ * either by a BADASSERT or REBOOT event. If the mcdi interface is
+ * in polled mode, then do nothing because the MC reboot handler will
+ * set the header correctly. However, if the mcdi interface is waiting
+ * for a CMDDONE event it won't receive it [and since all MCDI events
+ * are sent to the same queue, we can't be racing with
+ * efx_mcdi_ev_cpl()]
+ *
+ * If there is an outstanding asynchronous request, we can't
+ * complete it now (efx_mcdi_complete() would deadlock). The
+ * reset process will take care of this.
+ *
+ * There's a race here with efx_mcdi_send_request(), because
+ * we might receive a REBOOT event *before* the request has
+ * been copied out. In polled mode (during startup) this is
+ * irrelevant, because efx_mcdi_complete_sync() is ignored. In
+ * event mode, this condition is just an edge-case of
+ * receiving a REBOOT event after posting the MCDI
+ * request. Did the mc reboot before or after the copyout? The
+ * best we can do always is just return failure.
+ *
+ * If there is an outstanding proxy response expected it is not going
+ * to arrive. We should thus abort it.
+ */
+ spin_lock(&mcdi->iface_lock);
+ efx_mcdi_proxy_abort(mcdi);
+
+ if (efx_mcdi_complete_sync(mcdi)) {
+ if (mcdi->mode == MCDI_MODE_EVENTS) {
+ mcdi->resprc = rc;
+ mcdi->resp_hdr_len = 0;
+ mcdi->resp_data_len = 0;
+ ++mcdi->credits;
+ }
+ } else {
+ int count;
+
+ /* Consume the status word since efx_mcdi_rpc_finish() won't */
+ for (count = 0; count < MCDI_STATUS_DELAY_COUNT; ++count) {
+ rc = efx_mcdi_poll_reboot(efx);
+ if (rc)
+ break;
+ udelay(MCDI_STATUS_DELAY_US);
+ }
+
+ /* On EF10, a CODE_MC_REBOOT event can be received without the
+ * reboot detection in efx_mcdi_poll_reboot() being triggered.
+ * If zero was returned from the final call to
+ * efx_mcdi_poll_reboot(), the MC reboot wasn't noticed but the
+ * MC has definitely rebooted so prepare for the reset.
+ */
+ if (!rc && efx->type->mcdi_reboot_detected)
+ efx->type->mcdi_reboot_detected(efx);
+
+ mcdi->new_epoch = true;
+
+ /* Nobody was waiting for an MCDI request, so trigger a reset */
+ efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE);
+ }
+
+ spin_unlock(&mcdi->iface_lock);
+}
+
+/* The MC is going down in to BIST mode. set the BIST flag to block
+ * new MCDI, cancel any outstanding MCDI and and schedule a BIST-type reset
+ * (which doesn't actually execute a reset, it waits for the controlling
+ * function to reset it).
+ */
+static void efx_mcdi_ev_bist(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ spin_lock(&mcdi->iface_lock);
+ efx->mc_bist_for_other_fn = true;
+ efx_mcdi_proxy_abort(mcdi);
+
+ if (efx_mcdi_complete_sync(mcdi)) {
+ if (mcdi->mode == MCDI_MODE_EVENTS) {
+ mcdi->resprc = -EIO;
+ mcdi->resp_hdr_len = 0;
+ mcdi->resp_data_len = 0;
+ ++mcdi->credits;
+ }
+ }
+ mcdi->new_epoch = true;
+ efx_schedule_reset(efx, RESET_TYPE_MC_BIST);
+ spin_unlock(&mcdi->iface_lock);
+}
+
+/* MCDI timeouts seen, so make all MCDI calls fail-fast and issue an FLR to try
+ * to recover.
+ */
+static void efx_mcdi_abandon(struct efx_nic *efx)
+{
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ if (xchg(&mcdi->mode, MCDI_MODE_FAIL) == MCDI_MODE_FAIL)
+ return; /* it had already been done */
+ netif_dbg(efx, hw, efx->net_dev, "MCDI is timing out; trying to recover\n");
+ efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
+}
+
+static void efx_handle_drain_event(struct efx_nic *efx)
+{
+ if (atomic_dec_and_test(&efx->active_queues))
+ wake_up(&efx->flush_wq);
+
+ WARN_ON(atomic_read(&efx->active_queues) < 0);
+}
+
+/* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */
+void efx_mcdi_process_event(struct efx_channel *channel,
+ efx_qword_t *event)
+{
+ struct efx_nic *efx = channel->efx;
+ int code = EFX_QWORD_FIELD(*event, MCDI_EVENT_CODE);
+ u32 data = EFX_QWORD_FIELD(*event, MCDI_EVENT_DATA);
+
+ switch (code) {
+ case MCDI_EVENT_CODE_BADSSERT:
+ netif_err(efx, hw, efx->net_dev,
+ "MC watchdog or assertion failure at 0x%x\n", data);
+ efx_mcdi_ev_death(efx, -EINTR);
+ break;
+
+ case MCDI_EVENT_CODE_PMNOTICE:
+ netif_info(efx, wol, efx->net_dev, "MCDI PM event.\n");
+ break;
+
+ case MCDI_EVENT_CODE_CMDDONE:
+ efx_mcdi_ev_cpl(efx,
+ MCDI_EVENT_FIELD(*event, CMDDONE_SEQ),
+ MCDI_EVENT_FIELD(*event, CMDDONE_DATALEN),
+ MCDI_EVENT_FIELD(*event, CMDDONE_ERRNO));
+ break;
+
+ case MCDI_EVENT_CODE_LINKCHANGE:
+ efx_mcdi_process_link_change(efx, event);
+ break;
+ case MCDI_EVENT_CODE_SENSOREVT:
+ efx_sensor_event(efx, event);
+ break;
+ case MCDI_EVENT_CODE_SCHEDERR:
+ netif_dbg(efx, hw, efx->net_dev,
+ "MC Scheduler alert (0x%x)\n", data);
+ break;
+ case MCDI_EVENT_CODE_REBOOT:
+ case MCDI_EVENT_CODE_MC_REBOOT:
+ netif_info(efx, hw, efx->net_dev, "MC Reboot\n");
+ efx_mcdi_ev_death(efx, -EIO);
+ break;
+ case MCDI_EVENT_CODE_MC_BIST:
+ netif_info(efx, hw, efx->net_dev, "MC entered BIST mode\n");
+ efx_mcdi_ev_bist(efx);
+ break;
+ case MCDI_EVENT_CODE_MAC_STATS_DMA:
+ /* MAC stats are gather lazily. We can ignore this. */
+ break;
+ case MCDI_EVENT_CODE_FLR:
+ if (efx->type->sriov_flr)
+ efx->type->sriov_flr(efx,
+ MCDI_EVENT_FIELD(*event, FLR_VF));
+ break;
+ case MCDI_EVENT_CODE_PTP_RX:
+ case MCDI_EVENT_CODE_PTP_FAULT:
+ case MCDI_EVENT_CODE_PTP_PPS:
+ efx_ptp_event(efx, event);
+ break;
+ case MCDI_EVENT_CODE_PTP_TIME:
+ efx_time_sync_event(channel, event);
+ break;
+ case MCDI_EVENT_CODE_TX_FLUSH:
+ case MCDI_EVENT_CODE_RX_FLUSH:
+ /* Two flush events will be sent: one to the same event
+ * queue as completions, and one to event queue 0.
+ * In the latter case the {RX,TX}_FLUSH_TO_DRIVER
+ * flag will be set, and we should ignore the event
+ * because we want to wait for all completions.
+ */
+ BUILD_BUG_ON(MCDI_EVENT_TX_FLUSH_TO_DRIVER_LBN !=
+ MCDI_EVENT_RX_FLUSH_TO_DRIVER_LBN);
+ if (!MCDI_EVENT_FIELD(*event, TX_FLUSH_TO_DRIVER))
+ efx_handle_drain_event(efx);
+ break;
+ case MCDI_EVENT_CODE_TX_ERR:
+ case MCDI_EVENT_CODE_RX_ERR:
+ netif_err(efx, hw, efx->net_dev,
+ "%s DMA error (event: "EFX_QWORD_FMT")\n",
+ code == MCDI_EVENT_CODE_TX_ERR ? "TX" : "RX",
+ EFX_QWORD_VAL(*event));
+ efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+ break;
+ case MCDI_EVENT_CODE_PROXY_RESPONSE:
+ efx_mcdi_ev_proxy_response(efx,
+ MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_HANDLE),
+ MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_RC));
+ break;
+ default:
+ netif_err(efx, hw, efx->net_dev,
+ "Unknown MCDI event " EFX_QWORD_FMT "\n",
+ EFX_QWORD_VAL(*event));
+ }
+}
+
+/**************************************************************************
+ *
+ * Specific request functions
+ *
+ **************************************************************************
+ */
+
+void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_VERSION_OUT_LEN);
+ size_t outlength;
+ const __le16 *ver_words;
+ size_t offset;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_VERSION_IN_LEN != 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_VERSION, NULL, 0,
+ outbuf, sizeof(outbuf), &outlength);
+ if (rc)
+ goto fail;
+ if (outlength < MC_CMD_GET_VERSION_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ ver_words = (__le16 *)MCDI_PTR(outbuf, GET_VERSION_OUT_VERSION);
+ offset = scnprintf(buf, len, "%u.%u.%u.%u",
+ le16_to_cpu(ver_words[0]),
+ le16_to_cpu(ver_words[1]),
+ le16_to_cpu(ver_words[2]),
+ le16_to_cpu(ver_words[3]));
+
+ if (efx->type->print_additional_fwver)
+ offset += efx->type->print_additional_fwver(efx, buf + offset,
+ len - offset);
+
+ /* It's theoretically possible for the string to exceed 31
+ * characters, though in practice the first three version
+ * components are short enough that this doesn't happen.
+ */
+ if (WARN_ON(offset >= len))
+ buf[0] = 0;
+
+ return;
+
+fail:
+ netif_err(efx, probe, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ buf[0] = 0;
+}
+
+static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating,
+ bool *was_attached)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_DRV_ATTACH_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_DRV_ATTACH_EXT_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_NEW_STATE,
+ driver_operating ? 1 : 0);
+ MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_UPDATE, 1);
+ MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID, MC_CMD_FW_LOW_LATENCY);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ /* If we're not the primary PF, trying to ATTACH with a FIRMWARE_ID
+ * specified will fail with EPERM, and we have to tell the MC we don't
+ * care what firmware we get.
+ */
+ if (rc == -EPERM) {
+ netif_dbg(efx, probe, efx->net_dev,
+ "efx_mcdi_drv_attach with fw-variant setting failed EPERM, trying without it\n");
+ MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID,
+ MC_CMD_FW_DONT_CARE);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf,
+ sizeof(inbuf), outbuf, sizeof(outbuf),
+ &outlen);
+ }
+ if (rc) {
+ efx_mcdi_display_error(efx, MC_CMD_DRV_ATTACH, sizeof(inbuf),
+ outbuf, outlen, rc);
+ goto fail;
+ }
+ if (outlen < MC_CMD_DRV_ATTACH_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ if (driver_operating) {
+ if (outlen >= MC_CMD_DRV_ATTACH_EXT_OUT_LEN) {
+ efx->mcdi->fn_flags =
+ MCDI_DWORD(outbuf,
+ DRV_ATTACH_EXT_OUT_FUNC_FLAGS);
+ } else {
+ /* Synthesise flags for Siena */
+ efx->mcdi->fn_flags =
+ 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL |
+ 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED |
+ (efx_port_num(efx) == 0) <<
+ MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_PRIMARY;
+ }
+ }
+
+ /* We currently assume we have control of the external link
+ * and are completely trusted by firmware. Abort probing
+ * if that's not true for this function.
+ */
+
+ if (was_attached != NULL)
+ *was_attached = MCDI_DWORD(outbuf, DRV_ATTACH_OUT_OLD_STATE);
+ return 0;
+
+fail:
+ netif_err(efx, probe, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address,
+ u16 *fw_subtype_list, u32 *capabilities)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_BOARD_CFG_OUT_LENMAX);
+ size_t outlen, i;
+ int port_num = efx_port_num(efx);
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_IN_LEN != 0);
+ /* we need __aligned(2) for ether_addr_copy */
+ BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT0_OFST & 1);
+ BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT1_OFST & 1);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_BOARD_CFG, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < MC_CMD_GET_BOARD_CFG_OUT_LENMIN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ if (mac_address)
+ ether_addr_copy(mac_address,
+ port_num ?
+ MCDI_PTR(outbuf, GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT1) :
+ MCDI_PTR(outbuf, GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT0));
+ if (fw_subtype_list) {
+ for (i = 0;
+ i < MCDI_VAR_ARRAY_LEN(outlen,
+ GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST);
+ i++)
+ fw_subtype_list[i] = MCDI_ARRAY_WORD(
+ outbuf, GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST, i);
+ for (; i < MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM; i++)
+ fw_subtype_list[i] = 0;
+ }
+ if (capabilities) {
+ if (port_num)
+ *capabilities = MCDI_DWORD(outbuf,
+ GET_BOARD_CFG_OUT_CAPABILITIES_PORT1);
+ else
+ *capabilities = MCDI_DWORD(outbuf,
+ GET_BOARD_CFG_OUT_CAPABILITIES_PORT0);
+ }
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d len=%d\n",
+ __func__, rc, (int)outlen);
+
+ return rc;
+}
+
+int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, u32 dest_evq)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_LOG_CTRL_IN_LEN);
+ u32 dest = 0;
+ int rc;
+
+ if (uart)
+ dest |= MC_CMD_LOG_CTRL_IN_LOG_DEST_UART;
+ if (evq)
+ dest |= MC_CMD_LOG_CTRL_IN_LOG_DEST_EVQ;
+
+ MCDI_SET_DWORD(inbuf, LOG_CTRL_IN_LOG_DEST, dest);
+ MCDI_SET_DWORD(inbuf, LOG_CTRL_IN_LOG_DEST_EVQ, dest_evq);
+
+ BUILD_BUG_ON(MC_CMD_LOG_CTRL_OUT_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_LOG_CTRL, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ return rc;
+}
+
+int efx_mcdi_nvram_types(struct efx_nic *efx, u32 *nvram_types_out)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_TYPES_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_NVRAM_TYPES_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_TYPES, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+ if (outlen < MC_CMD_NVRAM_TYPES_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ *nvram_types_out = MCDI_DWORD(outbuf, NVRAM_TYPES_OUT_TYPES);
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n",
+ __func__, rc);
+ return rc;
+}
+
+/* This function finds types using the new NVRAM_PARTITIONS mcdi. */
+static int efx_new_mcdi_nvram_types(struct efx_nic *efx, u32 *number,
+ u32 *nvram_types)
+{
+ efx_dword_t *outbuf = kzalloc(MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX_MCDI2,
+ GFP_KERNEL);
+ size_t outlen;
+ int rc;
+
+ if (!outbuf)
+ return -ENOMEM;
+
+ BUILD_BUG_ON(MC_CMD_NVRAM_PARTITIONS_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_PARTITIONS, NULL, 0,
+ outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX_MCDI2, &outlen);
+ if (rc)
+ goto fail;
+
+ *number = MCDI_DWORD(outbuf, NVRAM_PARTITIONS_OUT_NUM_PARTITIONS);
+
+ memcpy(nvram_types, MCDI_PTR(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID),
+ *number * sizeof(u32));
+
+fail:
+ kfree(outbuf);
+ return rc;
+}
+
+int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type,
+ size_t *size_out, size_t *erase_size_out,
+ bool *protected_out)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_INFO_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, NVRAM_INFO_IN_TYPE, type);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_INFO, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+ if (outlen < MC_CMD_NVRAM_INFO_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ *size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_SIZE);
+ *erase_size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_ERASESIZE);
+ *protected_out = !!(MCDI_DWORD(outbuf, NVRAM_INFO_OUT_FLAGS) &
+ (1 << MC_CMD_NVRAM_INFO_OUT_PROTECTED_LBN));
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+static int efx_mcdi_nvram_test(struct efx_nic *efx, unsigned int type)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_TEST_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_TEST_OUT_LEN);
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, NVRAM_TEST_IN_TYPE, type);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_TEST, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ return rc;
+
+ switch (MCDI_DWORD(outbuf, NVRAM_TEST_OUT_RESULT)) {
+ case MC_CMD_NVRAM_TEST_PASS:
+ case MC_CMD_NVRAM_TEST_NOTSUPP:
+ return 0;
+ default:
+ return -EIO;
+ }
+}
+
+/* This function tests nvram partitions using the new mcdi partition lookup scheme */
+int efx_new_mcdi_nvram_test_all(struct efx_nic *efx)
+{
+ u32 *nvram_types = kzalloc(MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX_MCDI2,
+ GFP_KERNEL);
+ unsigned int number;
+ int rc, i;
+
+ if (!nvram_types)
+ return -ENOMEM;
+
+ rc = efx_new_mcdi_nvram_types(efx, &number, nvram_types);
+ if (rc)
+ goto fail;
+
+ /* Require at least one check */
+ rc = -EAGAIN;
+
+ for (i = 0; i < number; i++) {
+ if (nvram_types[i] == NVRAM_PARTITION_TYPE_PARTITION_MAP ||
+ nvram_types[i] == NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG)
+ continue;
+
+ rc = efx_mcdi_nvram_test(efx, nvram_types[i]);
+ if (rc)
+ goto fail;
+ }
+
+fail:
+ kfree(nvram_types);
+ return rc;
+}
+
+int efx_mcdi_nvram_test_all(struct efx_nic *efx)
+{
+ u32 nvram_types;
+ unsigned int type;
+ int rc;
+
+ rc = efx_mcdi_nvram_types(efx, &nvram_types);
+ if (rc)
+ goto fail1;
+
+ type = 0;
+ while (nvram_types != 0) {
+ if (nvram_types & 1) {
+ rc = efx_mcdi_nvram_test(efx, type);
+ if (rc)
+ goto fail2;
+ }
+ type++;
+ nvram_types >>= 1;
+ }
+
+ return 0;
+
+fail2:
+ netif_err(efx, hw, efx->net_dev, "%s: failed type=%u\n",
+ __func__, type);
+fail1:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+/* Returns 1 if an assertion was read, 0 if no assertion had fired,
+ * negative on error.
+ */
+static int efx_mcdi_read_assertion(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_ASSERTS_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_ASSERTS_OUT_LEN);
+ unsigned int flags, index;
+ const char *reason;
+ size_t outlen;
+ int retry;
+ int rc;
+
+ /* Attempt to read any stored assertion state before we reboot
+ * the mcfw out of the assertion handler. Retry twice, once
+ * because a boot-time assertion might cause this command to fail
+ * with EINTR. And once again because GET_ASSERTS can race with
+ * MC_CMD_REBOOT running on the other port. */
+ retry = 2;
+ do {
+ MCDI_SET_DWORD(inbuf, GET_ASSERTS_IN_CLEAR, 1);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_ASSERTS,
+ inbuf, MC_CMD_GET_ASSERTS_IN_LEN,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc == -EPERM)
+ return 0;
+ } while ((rc == -EINTR || rc == -EIO) && retry-- > 0);
+
+ if (rc) {
+ efx_mcdi_display_error(efx, MC_CMD_GET_ASSERTS,
+ MC_CMD_GET_ASSERTS_IN_LEN, outbuf,
+ outlen, rc);
+ return rc;
+ }
+ if (outlen < MC_CMD_GET_ASSERTS_OUT_LEN)
+ return -EIO;
+
+ /* Print out any recorded assertion state */
+ flags = MCDI_DWORD(outbuf, GET_ASSERTS_OUT_GLOBAL_FLAGS);
+ if (flags == MC_CMD_GET_ASSERTS_FLAGS_NO_FAILS)
+ return 0;
+
+ reason = (flags == MC_CMD_GET_ASSERTS_FLAGS_SYS_FAIL)
+ ? "system-level assertion"
+ : (flags == MC_CMD_GET_ASSERTS_FLAGS_THR_FAIL)
+ ? "thread-level assertion"
+ : (flags == MC_CMD_GET_ASSERTS_FLAGS_WDOG_FIRED)
+ ? "watchdog reset"
+ : "unknown assertion";
+ netif_err(efx, hw, efx->net_dev,
+ "MCPU %s at PC = 0x%.8x in thread 0x%.8x\n", reason,
+ MCDI_DWORD(outbuf, GET_ASSERTS_OUT_SAVED_PC_OFFS),
+ MCDI_DWORD(outbuf, GET_ASSERTS_OUT_THREAD_OFFS));
+
+ /* Print out the registers */
+ for (index = 0;
+ index < MC_CMD_GET_ASSERTS_OUT_GP_REGS_OFFS_NUM;
+ index++)
+ netif_err(efx, hw, efx->net_dev, "R%.2d (?): 0x%.8x\n",
+ 1 + index,
+ MCDI_ARRAY_DWORD(outbuf, GET_ASSERTS_OUT_GP_REGS_OFFS,
+ index));
+
+ return 1;
+}
+
+static int efx_mcdi_exit_assertion(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_REBOOT_IN_LEN);
+ int rc;
+
+ /* If the MC is running debug firmware, it might now be
+ * waiting for a debugger to attach, but we just want it to
+ * reboot. We set a flag that makes the command a no-op if it
+ * has already done so.
+ * The MCDI will thus return either 0 or -EIO.
+ */
+ BUILD_BUG_ON(MC_CMD_REBOOT_OUT_LEN != 0);
+ MCDI_SET_DWORD(inbuf, REBOOT_IN_FLAGS,
+ MC_CMD_REBOOT_FLAGS_AFTER_ASSERTION);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_REBOOT, inbuf, MC_CMD_REBOOT_IN_LEN,
+ NULL, 0, NULL);
+ if (rc == -EIO)
+ rc = 0;
+ if (rc)
+ efx_mcdi_display_error(efx, MC_CMD_REBOOT, MC_CMD_REBOOT_IN_LEN,
+ NULL, 0, rc);
+ return rc;
+}
+
+int efx_mcdi_handle_assertion(struct efx_nic *efx)
+{
+ int rc;
+
+ rc = efx_mcdi_read_assertion(efx);
+ if (rc <= 0)
+ return rc;
+
+ return efx_mcdi_exit_assertion(efx);
+}
+
+int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_ID_LED_IN_LEN);
+
+ BUILD_BUG_ON(EFX_LED_OFF != MC_CMD_LED_OFF);
+ BUILD_BUG_ON(EFX_LED_ON != MC_CMD_LED_ON);
+ BUILD_BUG_ON(EFX_LED_DEFAULT != MC_CMD_LED_DEFAULT);
+
+ BUILD_BUG_ON(MC_CMD_SET_ID_LED_OUT_LEN != 0);
+
+ MCDI_SET_DWORD(inbuf, SET_ID_LED_IN_STATE, mode);
+
+ return efx_mcdi_rpc(efx, MC_CMD_SET_ID_LED, inbuf, sizeof(inbuf), NULL, 0, NULL);
+}
+
+static int efx_mcdi_reset_func(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_ENTITY_RESET_IN_LEN);
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_ENTITY_RESET_OUT_LEN != 0);
+ MCDI_POPULATE_DWORD_1(inbuf, ENTITY_RESET_IN_FLAG,
+ ENTITY_RESET_IN_FUNCTION_RESOURCE_RESET, 1);
+ rc = efx_mcdi_rpc(efx, MC_CMD_ENTITY_RESET, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ return rc;
+}
+
+static int efx_mcdi_reset_mc(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_REBOOT_IN_LEN);
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_REBOOT_OUT_LEN != 0);
+ MCDI_SET_DWORD(inbuf, REBOOT_IN_FLAGS, 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_REBOOT, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ /* White is black, and up is down */
+ if (rc == -EIO)
+ return 0;
+ if (rc == 0)
+ rc = -EIO;
+ return rc;
+}
+
+enum reset_type efx_mcdi_map_reset_reason(enum reset_type reason)
+{
+ return RESET_TYPE_RECOVER_OR_ALL;
+}
+
+int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method)
+{
+ int rc;
+
+ /* If MCDI is down, we can't handle_assertion */
+ if (method == RESET_TYPE_MCDI_TIMEOUT) {
+ rc = pci_reset_function(efx->pci_dev);
+ if (rc)
+ return rc;
+ /* Re-enable polled MCDI completion */
+ if (efx->mcdi) {
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ mcdi->mode = MCDI_MODE_POLL;
+ }
+ return 0;
+ }
+
+ /* Recover from a failed assertion pre-reset */
+ rc = efx_mcdi_handle_assertion(efx);
+ if (rc)
+ return rc;
+
+ if (method == RESET_TYPE_DATAPATH)
+ return 0;
+ else if (method == RESET_TYPE_WORLD)
+ return efx_mcdi_reset_mc(efx);
+ else
+ return efx_mcdi_reset_func(efx);
+}
+
+static int efx_mcdi_wol_filter_set(struct efx_nic *efx, u32 type,
+ const u8 *mac, int *id_out)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_WOL_FILTER_SET_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_WOL_FILTER_SET_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, WOL_FILTER_SET_IN_WOL_TYPE, type);
+ MCDI_SET_DWORD(inbuf, WOL_FILTER_SET_IN_FILTER_MODE,
+ MC_CMD_FILTER_MODE_SIMPLE);
+ ether_addr_copy(MCDI_PTR(inbuf, WOL_FILTER_SET_IN_MAGIC_MAC), mac);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_SET, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < MC_CMD_WOL_FILTER_SET_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ *id_out = (int)MCDI_DWORD(outbuf, WOL_FILTER_SET_OUT_FILTER_ID);
+
+ return 0;
+
+fail:
+ *id_out = -1;
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+
+}
+
+
+int
+efx_mcdi_wol_filter_set_magic(struct efx_nic *efx, const u8 *mac, int *id_out)
+{
+ return efx_mcdi_wol_filter_set(efx, MC_CMD_WOL_TYPE_MAGIC, mac, id_out);
+}
+
+
+int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_WOL_FILTER_GET_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_GET, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < MC_CMD_WOL_FILTER_GET_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ *id_out = (int)MCDI_DWORD(outbuf, WOL_FILTER_GET_OUT_FILTER_ID);
+
+ return 0;
+
+fail:
+ *id_out = -1;
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+
+int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_WOL_FILTER_REMOVE_IN_LEN);
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, WOL_FILTER_REMOVE_IN_FILTER_ID, (u32)id);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_REMOVE, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ return rc;
+}
+
+int efx_mcdi_flush_rxqs(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ struct efx_rx_queue *rx_queue;
+ MCDI_DECLARE_BUF(inbuf,
+ MC_CMD_FLUSH_RX_QUEUES_IN_LEN(EFX_MAX_CHANNELS));
+ int rc, count;
+
+ BUILD_BUG_ON(EFX_MAX_CHANNELS >
+ MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM);
+
+ count = 0;
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ if (rx_queue->flush_pending) {
+ rx_queue->flush_pending = false;
+ atomic_dec(&efx->rxq_flush_pending);
+ MCDI_SET_ARRAY_DWORD(
+ inbuf, FLUSH_RX_QUEUES_IN_QID_OFST,
+ count, efx_rx_queue_index(rx_queue));
+ count++;
+ }
+ }
+ }
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, inbuf,
+ MC_CMD_FLUSH_RX_QUEUES_IN_LEN(count), NULL, 0, NULL);
+ WARN_ON(rc < 0);
+
+ return rc;
+}
+
+int efx_mcdi_wol_filter_reset(struct efx_nic *efx)
+{
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_RESET, NULL, 0, NULL, 0, NULL);
+ return rc;
+}
+
+int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled,
+ unsigned int *flags)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_WORKAROUND_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_WORKAROUND_EXT_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_WORKAROUND_OUT_LEN != 0);
+ MCDI_SET_DWORD(inbuf, WORKAROUND_IN_TYPE, type);
+ MCDI_SET_DWORD(inbuf, WORKAROUND_IN_ENABLED, enabled);
+ rc = efx_mcdi_rpc(efx, MC_CMD_WORKAROUND, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ if (!flags)
+ return 0;
+
+ if (outlen >= MC_CMD_WORKAROUND_EXT_OUT_LEN)
+ *flags = MCDI_DWORD(outbuf, WORKAROUND_EXT_OUT_FLAGS);
+ else
+ *flags = 0;
+
+ return 0;
+}
+
+int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out,
+ unsigned int *enabled_out)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_WORKAROUNDS_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_WORKAROUNDS, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < MC_CMD_GET_WORKAROUNDS_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ if (impl_out)
+ *impl_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_IMPLEMENTED);
+
+ if (enabled_out)
+ *enabled_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_ENABLED);
+
+ return 0;
+
+fail:
+ /* Older firmware lacks GET_WORKAROUNDS and this isn't especially
+ * terrifying. The call site will have to deal with it though.
+ */
+ netif_cond_dbg(efx, hw, efx->net_dev, rc == -ENOSYS, err,
+ "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+#ifdef CONFIG_SFC_MTD
+
+#define EFX_MCDI_NVRAM_LEN_MAX 128
+
+static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN);
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_START_IN_TYPE, type);
+ MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_START_V2_IN_FLAGS,
+ NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT,
+ 1);
+
+ BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_START_OUT_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_START, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+
+ return rc;
+}
+
+static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type,
+ loff_t offset, u8 *buffer, size_t length)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_V2_LEN);
+ MCDI_DECLARE_BUF(outbuf,
+ MC_CMD_NVRAM_READ_OUT_LEN(EFX_MCDI_NVRAM_LEN_MAX));
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_TYPE, type);
+ MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_OFFSET, offset);
+ MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_LENGTH, length);
+ MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_V2_MODE,
+ MC_CMD_NVRAM_READ_IN_V2_DEFAULT);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_READ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ memcpy(buffer, MCDI_PTR(outbuf, NVRAM_READ_OUT_READ_BUFFER), length);
+ return 0;
+}
+
+static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type,
+ loff_t offset, const u8 *buffer, size_t length)
+{
+ MCDI_DECLARE_BUF(inbuf,
+ MC_CMD_NVRAM_WRITE_IN_LEN(EFX_MCDI_NVRAM_LEN_MAX));
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_TYPE, type);
+ MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_OFFSET, offset);
+ MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_LENGTH, length);
+ memcpy(MCDI_PTR(inbuf, NVRAM_WRITE_IN_WRITE_BUFFER), buffer, length);
+
+ BUILD_BUG_ON(MC_CMD_NVRAM_WRITE_OUT_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf,
+ ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4),
+ NULL, 0, NULL);
+ return rc;
+}
+
+static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type,
+ loff_t offset, size_t length)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_ERASE_IN_LEN);
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, NVRAM_ERASE_IN_TYPE, type);
+ MCDI_SET_DWORD(inbuf, NVRAM_ERASE_IN_OFFSET, offset);
+ MCDI_SET_DWORD(inbuf, NVRAM_ERASE_IN_LENGTH, length);
+
+ BUILD_BUG_ON(MC_CMD_NVRAM_ERASE_OUT_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_ERASE, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ return rc;
+}
+
+static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN);
+ size_t outlen;
+ int rc, rc2;
+
+ MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type);
+ /* Always set this flag. Old firmware ignores it */
+ MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS,
+ NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT,
+ 1);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) {
+ rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE);
+ if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS)
+ netif_err(efx, drv, efx->net_dev,
+ "NVRAM update failed verification with code 0x%x\n",
+ rc2);
+ switch (rc2) {
+ case MC_CMD_NVRAM_VERIFY_RC_SUCCESS:
+ break;
+ case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED:
+ case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED:
+ case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED:
+ case MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED:
+ case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED:
+ rc = -EIO;
+ break;
+ case MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT:
+ case MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST:
+ rc = -EINVAL;
+ break;
+ case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES:
+ case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS:
+ case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH:
+ rc = -EPERM;
+ break;
+ default:
+ netif_err(efx, drv, efx->net_dev,
+ "Unknown response to NVRAM_UPDATE_FINISH\n");
+ rc = -EIO;
+ }
+ }
+
+ return rc;
+}
+
+int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start,
+ size_t len, size_t *retlen, u8 *buffer)
+{
+ struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd);
+ struct efx_nic *efx = mtd->priv;
+ loff_t offset = start;
+ loff_t end = min_t(loff_t, start + len, mtd->size);
+ size_t chunk;
+ int rc = 0;
+
+ while (offset < end) {
+ chunk = min_t(size_t, end - offset, EFX_MCDI_NVRAM_LEN_MAX);
+ rc = efx_mcdi_nvram_read(efx, part->nvram_type, offset,
+ buffer, chunk);
+ if (rc)
+ goto out;
+ offset += chunk;
+ buffer += chunk;
+ }
+out:
+ *retlen = offset - start;
+ return rc;
+}
+
+int efx_mcdi_mtd_erase(struct mtd_info *mtd, loff_t start, size_t len)
+{
+ struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd);
+ struct efx_nic *efx = mtd->priv;
+ loff_t offset = start & ~((loff_t)(mtd->erasesize - 1));
+ loff_t end = min_t(loff_t, start + len, mtd->size);
+ size_t chunk = part->common.mtd.erasesize;
+ int rc = 0;
+
+ if (!part->updating) {
+ rc = efx_mcdi_nvram_update_start(efx, part->nvram_type);
+ if (rc)
+ goto out;
+ part->updating = true;
+ }
+
+ /* The MCDI interface can in fact do multiple erase blocks at once;
+ * but erasing may be slow, so we make multiple calls here to avoid
+ * tripping the MCDI RPC timeout. */
+ while (offset < end) {
+ rc = efx_mcdi_nvram_erase(efx, part->nvram_type, offset,
+ chunk);
+ if (rc)
+ goto out;
+ offset += chunk;
+ }
+out:
+ return rc;
+}
+
+int efx_mcdi_mtd_write(struct mtd_info *mtd, loff_t start,
+ size_t len, size_t *retlen, const u8 *buffer)
+{
+ struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd);
+ struct efx_nic *efx = mtd->priv;
+ loff_t offset = start;
+ loff_t end = min_t(loff_t, start + len, mtd->size);
+ size_t chunk;
+ int rc = 0;
+
+ if (!part->updating) {
+ rc = efx_mcdi_nvram_update_start(efx, part->nvram_type);
+ if (rc)
+ goto out;
+ part->updating = true;
+ }
+
+ while (offset < end) {
+ chunk = min_t(size_t, end - offset, EFX_MCDI_NVRAM_LEN_MAX);
+ rc = efx_mcdi_nvram_write(efx, part->nvram_type, offset,
+ buffer, chunk);
+ if (rc)
+ goto out;
+ offset += chunk;
+ buffer += chunk;
+ }
+out:
+ *retlen = offset - start;
+ return rc;
+}
+
+int efx_mcdi_mtd_sync(struct mtd_info *mtd)
+{
+ struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd);
+ struct efx_nic *efx = mtd->priv;
+ int rc = 0;
+
+ if (part->updating) {
+ part->updating = false;
+ rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type);
+ }
+
+ return rc;
+}
+
+void efx_mcdi_mtd_rename(struct efx_mtd_partition *part)
+{
+ struct efx_mcdi_mtd_partition *mcdi_part =
+ container_of(part, struct efx_mcdi_mtd_partition, common);
+ struct efx_nic *efx = part->mtd.priv;
+
+ snprintf(part->name, sizeof(part->name), "%s %s:%02x",
+ efx->name, part->type_name, mcdi_part->fw_subtype);
+}
+
+#endif /* CONFIG_SFC_MTD */
diff --git a/drivers/net/ethernet/sfc/siena/mcdi.h b/drivers/net/ethernet/sfc/siena/mcdi.h
new file mode 100644
index 000000000000..69c2924a147c
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mcdi.h
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2008-2013 Solarflare Communications Inc.
+ */
+
+#ifndef EFX_MCDI_H
+#define EFX_MCDI_H
+
+/**
+ * enum efx_mcdi_state - MCDI request handling state
+ * @MCDI_STATE_QUIESCENT: No pending MCDI requests. If the caller holds the
+ * mcdi @iface_lock then they are able to move to %MCDI_STATE_RUNNING
+ * @MCDI_STATE_RUNNING_SYNC: There is a synchronous MCDI request pending.
+ * Only the thread that moved into this state is allowed to move out of it.
+ * @MCDI_STATE_RUNNING_ASYNC: There is an asynchronous MCDI request pending.
+ * @MCDI_STATE_PROXY_WAIT: An MCDI request has completed with a response that
+ * indicates we must wait for a proxy try again message.
+ * @MCDI_STATE_COMPLETED: An MCDI request has completed, but the owning thread
+ * has not yet consumed the result. For all other threads, equivalent to
+ * %MCDI_STATE_RUNNING.
+ */
+enum efx_mcdi_state {
+ MCDI_STATE_QUIESCENT,
+ MCDI_STATE_RUNNING_SYNC,
+ MCDI_STATE_RUNNING_ASYNC,
+ MCDI_STATE_PROXY_WAIT,
+ MCDI_STATE_COMPLETED,
+};
+
+/**
+ * enum efx_mcdi_mode - MCDI transaction mode
+ * @MCDI_MODE_POLL: poll for MCDI completion, until timeout
+ * @MCDI_MODE_EVENTS: wait for an mcdi_event. On timeout, poll once
+ * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls
+ */
+enum efx_mcdi_mode {
+ MCDI_MODE_POLL,
+ MCDI_MODE_EVENTS,
+ MCDI_MODE_FAIL,
+};
+
+/**
+ * struct efx_mcdi_iface - MCDI protocol context
+ * @efx: The associated NIC.
+ * @state: Request handling state. Waited for by @wq.
+ * @mode: Poll for mcdi completion, or wait for an mcdi_event.
+ * @wq: Wait queue for threads waiting for @state != %MCDI_STATE_RUNNING
+ * @new_epoch: Indicates start of day or start of MC reboot recovery
+ * @iface_lock: Serialises access to @seqno, @credits and response metadata
+ * @seqno: The next sequence number to use for mcdi requests.
+ * @credits: Number of spurious MCDI completion events allowed before we
+ * trigger a fatal error
+ * @resprc: Response error/success code (Linux numbering)
+ * @resp_hdr_len: Response header length
+ * @resp_data_len: Response data (SDU or error) length
+ * @async_lock: Serialises access to @async_list while event processing is
+ * enabled
+ * @async_list: Queue of asynchronous requests
+ * @async_timer: Timer for asynchronous request timeout
+ * @logging_buffer: buffer that may be used to build MCDI tracing messages
+ * @logging_enabled: whether to trace MCDI
+ * @proxy_rx_handle: Most recently received proxy authorisation handle
+ * @proxy_rx_status: Status of most recent proxy authorisation
+ * @proxy_rx_wq: Wait queue for updates to proxy_rx_handle
+ */
+struct efx_mcdi_iface {
+ struct efx_nic *efx;
+ enum efx_mcdi_state state;
+ enum efx_mcdi_mode mode;
+ wait_queue_head_t wq;
+ spinlock_t iface_lock;
+ bool new_epoch;
+ unsigned int credits;
+ unsigned int seqno;
+ int resprc;
+ int resprc_raw;
+ size_t resp_hdr_len;
+ size_t resp_data_len;
+ spinlock_t async_lock;
+ struct list_head async_list;
+ struct timer_list async_timer;
+#ifdef CONFIG_SFC_MCDI_LOGGING
+ char *logging_buffer;
+ bool logging_enabled;
+#endif
+ unsigned int proxy_rx_handle;
+ int proxy_rx_status;
+ wait_queue_head_t proxy_rx_wq;
+};
+
+struct efx_mcdi_mon {
+ struct efx_buffer dma_buf;
+ struct mutex update_lock;
+ unsigned long last_update;
+ struct device *device;
+ struct efx_mcdi_mon_attribute *attrs;
+ struct attribute_group group;
+ const struct attribute_group *groups[2];
+ unsigned int n_attrs;
+};
+
+struct efx_mcdi_mtd_partition {
+ struct efx_mtd_partition common;
+ bool updating;
+ u16 nvram_type;
+ u16 fw_subtype;
+};
+
+#define to_efx_mcdi_mtd_partition(mtd) \
+ container_of(mtd, struct efx_mcdi_mtd_partition, common.mtd)
+
+/**
+ * struct efx_mcdi_data - extra state for NICs that implement MCDI
+ * @iface: Interface/protocol state
+ * @hwmon: Hardware monitor state
+ * @fn_flags: Flags for this function, as returned by %MC_CMD_DRV_ATTACH.
+ */
+struct efx_mcdi_data {
+ struct efx_mcdi_iface iface;
+#ifdef CONFIG_SFC_MCDI_MON
+ struct efx_mcdi_mon hwmon;
+#endif
+ u32 fn_flags;
+};
+
+static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
+{
+ EFX_WARN_ON_PARANOID(!efx->mcdi);
+ return &efx->mcdi->iface;
+}
+
+#ifdef CONFIG_SFC_MCDI_MON
+static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx)
+{
+ EFX_WARN_ON_PARANOID(!efx->mcdi);
+ return &efx->mcdi->hwmon;
+}
+#endif
+
+int efx_mcdi_init(struct efx_nic *efx);
+void efx_mcdi_detach(struct efx_nic *efx);
+void efx_mcdi_fini(struct efx_nic *efx);
+
+int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const efx_dword_t *inbuf,
+ size_t inlen, efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual);
+int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual);
+
+int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd,
+ const efx_dword_t *inbuf, size_t inlen);
+int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen,
+ efx_dword_t *outbuf, size_t outlen,
+ size_t *outlen_actual);
+int efx_mcdi_rpc_finish_quiet(struct efx_nic *efx, unsigned cmd,
+ size_t inlen, efx_dword_t *outbuf,
+ size_t outlen, size_t *outlen_actual);
+
+typedef void efx_mcdi_async_completer(struct efx_nic *efx,
+ unsigned long cookie, int rc,
+ efx_dword_t *outbuf,
+ size_t outlen_actual);
+int efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd,
+ const efx_dword_t *inbuf, size_t inlen, size_t outlen,
+ efx_mcdi_async_completer *complete,
+ unsigned long cookie);
+int efx_mcdi_rpc_async_quiet(struct efx_nic *efx, unsigned int cmd,
+ const efx_dword_t *inbuf, size_t inlen,
+ size_t outlen,
+ efx_mcdi_async_completer *complete,
+ unsigned long cookie);
+
+void efx_mcdi_display_error(struct efx_nic *efx, unsigned cmd,
+ size_t inlen, efx_dword_t *outbuf,
+ size_t outlen, int rc);
+
+int efx_mcdi_poll_reboot(struct efx_nic *efx);
+void efx_mcdi_mode_poll(struct efx_nic *efx);
+void efx_mcdi_mode_event(struct efx_nic *efx);
+void efx_mcdi_flush_async(struct efx_nic *efx);
+
+void efx_mcdi_process_event(struct efx_channel *channel, efx_qword_t *event);
+void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
+
+/* We expect that 16- and 32-bit fields in MCDI requests and responses
+ * are appropriately aligned, but 64-bit fields are only
+ * 32-bit-aligned. Also, on Siena we must copy to the MC shared
+ * memory strictly 32 bits at a time, so add any necessary padding.
+ */
+#define MCDI_TX_BUF_LEN(_len) DIV_ROUND_UP((_len), 4)
+#define _MCDI_DECLARE_BUF(_name, _len) \
+ efx_dword_t _name[DIV_ROUND_UP(_len, 4)]
+#define MCDI_DECLARE_BUF(_name, _len) \
+ _MCDI_DECLARE_BUF(_name, _len) = {{{0}}}
+#define MCDI_DECLARE_BUF_ERR(_name) \
+ MCDI_DECLARE_BUF(_name, 8)
+#define _MCDI_PTR(_buf, _offset) \
+ ((u8 *)(_buf) + (_offset))
+#define MCDI_PTR(_buf, _field) \
+ _MCDI_PTR(_buf, MC_CMD_ ## _field ## _OFST)
+#define _MCDI_CHECK_ALIGN(_ofst, _align) \
+ ((_ofst) + BUILD_BUG_ON_ZERO((_ofst) & (_align - 1)))
+#define _MCDI_DWORD(_buf, _field) \
+ ((_buf) + (_MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, 4) >> 2))
+
+#define MCDI_BYTE(_buf, _field) \
+ ((void)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 1), \
+ *MCDI_PTR(_buf, _field))
+#define MCDI_WORD(_buf, _field) \
+ ((u16)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2) + \
+ le16_to_cpu(*(__force const __le16 *)MCDI_PTR(_buf, _field)))
+#define MCDI_SET_DWORD(_buf, _field, _value) \
+ EFX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), EFX_DWORD_0, _value)
+#define MCDI_DWORD(_buf, _field) \
+ EFX_DWORD_FIELD(*_MCDI_DWORD(_buf, _field), EFX_DWORD_0)
+#define MCDI_POPULATE_DWORD_1(_buf, _field, _name1, _value1) \
+ EFX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), \
+ MC_CMD_ ## _name1, _value1)
+#define MCDI_POPULATE_DWORD_2(_buf, _field, _name1, _value1, \
+ _name2, _value2) \
+ EFX_POPULATE_DWORD_2(*_MCDI_DWORD(_buf, _field), \
+ MC_CMD_ ## _name1, _value1, \
+ MC_CMD_ ## _name2, _value2)
+#define MCDI_POPULATE_DWORD_3(_buf, _field, _name1, _value1, \
+ _name2, _value2, _name3, _value3) \
+ EFX_POPULATE_DWORD_3(*_MCDI_DWORD(_buf, _field), \
+ MC_CMD_ ## _name1, _value1, \
+ MC_CMD_ ## _name2, _value2, \
+ MC_CMD_ ## _name3, _value3)
+#define MCDI_POPULATE_DWORD_4(_buf, _field, _name1, _value1, \
+ _name2, _value2, _name3, _value3, \
+ _name4, _value4) \
+ EFX_POPULATE_DWORD_4(*_MCDI_DWORD(_buf, _field), \
+ MC_CMD_ ## _name1, _value1, \
+ MC_CMD_ ## _name2, _value2, \
+ MC_CMD_ ## _name3, _value3, \
+ MC_CMD_ ## _name4, _value4)
+#define MCDI_POPULATE_DWORD_5(_buf, _field, _name1, _value1, \
+ _name2, _value2, _name3, _value3, \
+ _name4, _value4, _name5, _value5) \
+ EFX_POPULATE_DWORD_5(*_MCDI_DWORD(_buf, _field), \
+ MC_CMD_ ## _name1, _value1, \
+ MC_CMD_ ## _name2, _value2, \
+ MC_CMD_ ## _name3, _value3, \
+ MC_CMD_ ## _name4, _value4, \
+ MC_CMD_ ## _name5, _value5)
+#define MCDI_POPULATE_DWORD_6(_buf, _field, _name1, _value1, \
+ _name2, _value2, _name3, _value3, \
+ _name4, _value4, _name5, _value5, \
+ _name6, _value6) \
+ EFX_POPULATE_DWORD_6(*_MCDI_DWORD(_buf, _field), \
+ MC_CMD_ ## _name1, _value1, \
+ MC_CMD_ ## _name2, _value2, \
+ MC_CMD_ ## _name3, _value3, \
+ MC_CMD_ ## _name4, _value4, \
+ MC_CMD_ ## _name5, _value5, \
+ MC_CMD_ ## _name6, _value6)
+#define MCDI_POPULATE_DWORD_7(_buf, _field, _name1, _value1, \
+ _name2, _value2, _name3, _value3, \
+ _name4, _value4, _name5, _value5, \
+ _name6, _value6, _name7, _value7) \
+ EFX_POPULATE_DWORD_7(*_MCDI_DWORD(_buf, _field), \
+ MC_CMD_ ## _name1, _value1, \
+ MC_CMD_ ## _name2, _value2, \
+ MC_CMD_ ## _name3, _value3, \
+ MC_CMD_ ## _name4, _value4, \
+ MC_CMD_ ## _name5, _value5, \
+ MC_CMD_ ## _name6, _value6, \
+ MC_CMD_ ## _name7, _value7)
+#define MCDI_SET_QWORD(_buf, _field, _value) \
+ do { \
+ EFX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[0], \
+ EFX_DWORD_0, (u32)(_value)); \
+ EFX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[1], \
+ EFX_DWORD_0, (u64)(_value) >> 32); \
+ } while (0)
+#define MCDI_QWORD(_buf, _field) \
+ (EFX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[0], EFX_DWORD_0) | \
+ (u64)EFX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[1], EFX_DWORD_0) << 32)
+#define MCDI_FIELD(_ptr, _type, _field) \
+ EFX_EXTRACT_DWORD( \
+ *(efx_dword_t *) \
+ _MCDI_PTR(_ptr, MC_CMD_ ## _type ## _ ## _field ## _OFST & ~3),\
+ MC_CMD_ ## _type ## _ ## _field ## _LBN & 0x1f, \
+ (MC_CMD_ ## _type ## _ ## _field ## _LBN & 0x1f) + \
+ MC_CMD_ ## _type ## _ ## _field ## _WIDTH - 1)
+
+#define _MCDI_ARRAY_PTR(_buf, _field, _index, _align) \
+ (_MCDI_PTR(_buf, _MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, _align))\
+ + (_index) * _MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _LEN, _align))
+#define MCDI_DECLARE_STRUCT_PTR(_name) \
+ efx_dword_t *_name
+#define MCDI_ARRAY_STRUCT_PTR(_buf, _field, _index) \
+ ((efx_dword_t *)_MCDI_ARRAY_PTR(_buf, _field, _index, 4))
+#define MCDI_VAR_ARRAY_LEN(_len, _field) \
+ min_t(size_t, MC_CMD_ ## _field ## _MAXNUM, \
+ ((_len) - MC_CMD_ ## _field ## _OFST) / MC_CMD_ ## _field ## _LEN)
+#define MCDI_ARRAY_WORD(_buf, _field, _index) \
+ (BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2) + \
+ le16_to_cpu(*(__force const __le16 *) \
+ _MCDI_ARRAY_PTR(_buf, _field, _index, 2)))
+#define _MCDI_ARRAY_DWORD(_buf, _field, _index) \
+ (BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 4) + \
+ (efx_dword_t *)_MCDI_ARRAY_PTR(_buf, _field, _index, 4))
+#define MCDI_SET_ARRAY_DWORD(_buf, _field, _index, _value) \
+ EFX_SET_DWORD_FIELD(*_MCDI_ARRAY_DWORD(_buf, _field, _index), \
+ EFX_DWORD_0, _value)
+#define MCDI_ARRAY_DWORD(_buf, _field, _index) \
+ EFX_DWORD_FIELD(*_MCDI_ARRAY_DWORD(_buf, _field, _index), EFX_DWORD_0)
+#define _MCDI_ARRAY_QWORD(_buf, _field, _index) \
+ (BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 8) + \
+ (efx_dword_t *)_MCDI_ARRAY_PTR(_buf, _field, _index, 4))
+#define MCDI_SET_ARRAY_QWORD(_buf, _field, _index, _value) \
+ do { \
+ EFX_SET_DWORD_FIELD(_MCDI_ARRAY_QWORD(_buf, _field, _index)[0],\
+ EFX_DWORD_0, (u32)(_value)); \
+ EFX_SET_DWORD_FIELD(_MCDI_ARRAY_QWORD(_buf, _field, _index)[1],\
+ EFX_DWORD_0, (u64)(_value) >> 32); \
+ } while (0)
+#define MCDI_ARRAY_FIELD(_buf, _field1, _type, _index, _field2) \
+ MCDI_FIELD(MCDI_ARRAY_STRUCT_PTR(_buf, _field1, _index), \
+ _type ## _TYPEDEF, _field2)
+
+#define MCDI_EVENT_FIELD(_ev, _field) \
+ EFX_QWORD_FIELD(_ev, MCDI_EVENT_ ## _field)
+
+#define MCDI_CAPABILITY(field) \
+ MC_CMD_GET_CAPABILITIES_V8_OUT_ ## field ## _LBN
+
+#define MCDI_CAPABILITY_OFST(field) \
+ MC_CMD_GET_CAPABILITIES_V8_OUT_ ## field ## _OFST
+
+#define efx_has_cap(efx, field) \
+ efx->type->check_caps(efx, \
+ MCDI_CAPABILITY(field), \
+ MCDI_CAPABILITY_OFST(field))
+
+void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len);
+int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address,
+ u16 *fw_subtype_list, u32 *capabilities);
+int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, u32 dest_evq);
+int efx_mcdi_nvram_types(struct efx_nic *efx, u32 *nvram_types_out);
+int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type,
+ size_t *size_out, size_t *erase_size_out,
+ bool *protected_out);
+int efx_new_mcdi_nvram_test_all(struct efx_nic *efx);
+int efx_mcdi_nvram_test_all(struct efx_nic *efx);
+int efx_mcdi_handle_assertion(struct efx_nic *efx);
+int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode);
+int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx, const u8 *mac,
+ int *id_out);
+int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out);
+int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id);
+int efx_mcdi_wol_filter_reset(struct efx_nic *efx);
+int efx_mcdi_flush_rxqs(struct efx_nic *efx);
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
+void efx_mcdi_mac_start_stats(struct efx_nic *efx);
+void efx_mcdi_mac_stop_stats(struct efx_nic *efx);
+void efx_mcdi_mac_pull_stats(struct efx_nic *efx);
+enum reset_type efx_mcdi_map_reset_reason(enum reset_type reason);
+int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method);
+int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled,
+ unsigned int *flags);
+int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out,
+ unsigned int *enabled_out);
+
+#ifdef CONFIG_SFC_MCDI_MON
+int efx_mcdi_mon_probe(struct efx_nic *efx);
+void efx_mcdi_mon_remove(struct efx_nic *efx);
+#else
+static inline int efx_mcdi_mon_probe(struct efx_nic *efx) { return 0; }
+static inline void efx_mcdi_mon_remove(struct efx_nic *efx) {}
+#endif
+
+#ifdef CONFIG_SFC_MTD
+int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, size_t len,
+ size_t *retlen, u8 *buffer);
+int efx_mcdi_mtd_erase(struct mtd_info *mtd, loff_t start, size_t len);
+int efx_mcdi_mtd_write(struct mtd_info *mtd, loff_t start, size_t len,
+ size_t *retlen, const u8 *buffer);
+int efx_mcdi_mtd_sync(struct mtd_info *mtd);
+void efx_mcdi_mtd_rename(struct efx_mtd_partition *part);
+#endif
+
+#endif /* EFX_MCDI_H */
diff --git a/drivers/net/ethernet/sfc/siena/mcdi_mon.c b/drivers/net/ethernet/sfc/siena/mcdi_mon.c
new file mode 100644
index 000000000000..5954fcfee2b1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mcdi_mon.c
@@ -0,0 +1,531 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2011-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/hwmon.h>
+#include <linux/stat.h>
+
+#include "net_driver.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+#include "nic.h"
+
+enum efx_hwmon_type {
+ EFX_HWMON_UNKNOWN,
+ EFX_HWMON_TEMP, /* temperature */
+ EFX_HWMON_COOL, /* cooling device, probably a heatsink */
+ EFX_HWMON_IN, /* voltage */
+ EFX_HWMON_CURR, /* current */
+ EFX_HWMON_POWER, /* power */
+ EFX_HWMON_TYPES_COUNT
+};
+
+static const char *const efx_hwmon_unit[EFX_HWMON_TYPES_COUNT] = {
+ [EFX_HWMON_TEMP] = " degC",
+ [EFX_HWMON_COOL] = " rpm", /* though nonsense for a heatsink */
+ [EFX_HWMON_IN] = " mV",
+ [EFX_HWMON_CURR] = " mA",
+ [EFX_HWMON_POWER] = " W",
+};
+
+static const struct {
+ const char *label;
+ enum efx_hwmon_type hwmon_type;
+ int port;
+} efx_mcdi_sensor_type[] = {
+#define SENSOR(name, label, hwmon_type, port) \
+ [MC_CMD_SENSOR_##name] = { label, EFX_HWMON_ ## hwmon_type, port }
+ SENSOR(CONTROLLER_TEMP, "Controller board temp.", TEMP, -1),
+ SENSOR(PHY_COMMON_TEMP, "PHY temp.", TEMP, -1),
+ SENSOR(CONTROLLER_COOLING, "Controller heat sink", COOL, -1),
+ SENSOR(PHY0_TEMP, "PHY temp.", TEMP, 0),
+ SENSOR(PHY0_COOLING, "PHY heat sink", COOL, 0),
+ SENSOR(PHY1_TEMP, "PHY temp.", TEMP, 1),
+ SENSOR(PHY1_COOLING, "PHY heat sink", COOL, 1),
+ SENSOR(IN_1V0, "1.0V supply", IN, -1),
+ SENSOR(IN_1V2, "1.2V supply", IN, -1),
+ SENSOR(IN_1V8, "1.8V supply", IN, -1),
+ SENSOR(IN_2V5, "2.5V supply", IN, -1),
+ SENSOR(IN_3V3, "3.3V supply", IN, -1),
+ SENSOR(IN_12V0, "12.0V supply", IN, -1),
+ SENSOR(IN_1V2A, "1.2V analogue supply", IN, -1),
+ SENSOR(IN_VREF, "Ref. voltage", IN, -1),
+ SENSOR(OUT_VAOE, "AOE FPGA supply", IN, -1),
+ SENSOR(AOE_TEMP, "AOE FPGA temp.", TEMP, -1),
+ SENSOR(PSU_AOE_TEMP, "AOE regulator temp.", TEMP, -1),
+ SENSOR(PSU_TEMP, "Controller regulator temp.",
+ TEMP, -1),
+ SENSOR(FAN_0, "Fan 0", COOL, -1),
+ SENSOR(FAN_1, "Fan 1", COOL, -1),
+ SENSOR(FAN_2, "Fan 2", COOL, -1),
+ SENSOR(FAN_3, "Fan 3", COOL, -1),
+ SENSOR(FAN_4, "Fan 4", COOL, -1),
+ SENSOR(IN_VAOE, "AOE input supply", IN, -1),
+ SENSOR(OUT_IAOE, "AOE output current", CURR, -1),
+ SENSOR(IN_IAOE, "AOE input current", CURR, -1),
+ SENSOR(NIC_POWER, "Board power use", POWER, -1),
+ SENSOR(IN_0V9, "0.9V supply", IN, -1),
+ SENSOR(IN_I0V9, "0.9V supply current", CURR, -1),
+ SENSOR(IN_I1V2, "1.2V supply current", CURR, -1),
+ SENSOR(IN_0V9_ADC, "0.9V supply (ext. ADC)", IN, -1),
+ SENSOR(CONTROLLER_2_TEMP, "Controller board temp. 2", TEMP, -1),
+ SENSOR(VREG_INTERNAL_TEMP, "Regulator die temp.", TEMP, -1),
+ SENSOR(VREG_0V9_TEMP, "0.9V regulator temp.", TEMP, -1),
+ SENSOR(VREG_1V2_TEMP, "1.2V regulator temp.", TEMP, -1),
+ SENSOR(CONTROLLER_VPTAT,
+ "Controller PTAT voltage (int. ADC)", IN, -1),
+ SENSOR(CONTROLLER_INTERNAL_TEMP,
+ "Controller die temp. (int. ADC)", TEMP, -1),
+ SENSOR(CONTROLLER_VPTAT_EXTADC,
+ "Controller PTAT voltage (ext. ADC)", IN, -1),
+ SENSOR(CONTROLLER_INTERNAL_TEMP_EXTADC,
+ "Controller die temp. (ext. ADC)", TEMP, -1),
+ SENSOR(AMBIENT_TEMP, "Ambient temp.", TEMP, -1),
+ SENSOR(AIRFLOW, "Air flow raw", IN, -1),
+ SENSOR(VDD08D_VSS08D_CSR, "0.9V die (int. ADC)", IN, -1),
+ SENSOR(VDD08D_VSS08D_CSR_EXTADC, "0.9V die (ext. ADC)", IN, -1),
+ SENSOR(HOTPOINT_TEMP, "Controller board temp. (hotpoint)", TEMP, -1),
+#undef SENSOR
+};
+
+static const char *const sensor_status_names[] = {
+ [MC_CMD_SENSOR_STATE_OK] = "OK",
+ [MC_CMD_SENSOR_STATE_WARNING] = "Warning",
+ [MC_CMD_SENSOR_STATE_FATAL] = "Fatal",
+ [MC_CMD_SENSOR_STATE_BROKEN] = "Device failure",
+ [MC_CMD_SENSOR_STATE_NO_READING] = "No reading",
+};
+
+void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev)
+{
+ unsigned int type, state, value;
+ enum efx_hwmon_type hwmon_type = EFX_HWMON_UNKNOWN;
+ const char *name = NULL, *state_txt, *unit;
+
+ type = EFX_QWORD_FIELD(*ev, MCDI_EVENT_SENSOREVT_MONITOR);
+ state = EFX_QWORD_FIELD(*ev, MCDI_EVENT_SENSOREVT_STATE);
+ value = EFX_QWORD_FIELD(*ev, MCDI_EVENT_SENSOREVT_VALUE);
+
+ /* Deal gracefully with the board having more drivers than we
+ * know about, but do not expect new sensor states. */
+ if (type < ARRAY_SIZE(efx_mcdi_sensor_type)) {
+ name = efx_mcdi_sensor_type[type].label;
+ hwmon_type = efx_mcdi_sensor_type[type].hwmon_type;
+ }
+ if (!name)
+ name = "No sensor name available";
+ EFX_WARN_ON_PARANOID(state >= ARRAY_SIZE(sensor_status_names));
+ state_txt = sensor_status_names[state];
+ EFX_WARN_ON_PARANOID(hwmon_type >= EFX_HWMON_TYPES_COUNT);
+ unit = efx_hwmon_unit[hwmon_type];
+ if (!unit)
+ unit = "";
+
+ netif_err(efx, hw, efx->net_dev,
+ "Sensor %d (%s) reports condition '%s' for value %d%s\n",
+ type, name, state_txt, value, unit);
+}
+
+#ifdef CONFIG_SFC_MCDI_MON
+
+struct efx_mcdi_mon_attribute {
+ struct device_attribute dev_attr;
+ unsigned int index;
+ unsigned int type;
+ enum efx_hwmon_type hwmon_type;
+ unsigned int limit_value;
+ char name[12];
+};
+
+static int efx_mcdi_mon_update(struct efx_nic *efx)
+{
+ struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_READ_SENSORS_EXT_IN_LEN);
+ int rc;
+
+ MCDI_SET_QWORD(inbuf, READ_SENSORS_EXT_IN_DMA_ADDR,
+ hwmon->dma_buf.dma_addr);
+ MCDI_SET_DWORD(inbuf, READ_SENSORS_EXT_IN_LENGTH, hwmon->dma_buf.len);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_READ_SENSORS,
+ inbuf, sizeof(inbuf), NULL, 0, NULL);
+ if (rc == 0)
+ hwmon->last_update = jiffies;
+ return rc;
+}
+
+static int efx_mcdi_mon_get_entry(struct device *dev, unsigned int index,
+ efx_dword_t *entry)
+{
+ struct efx_nic *efx = dev_get_drvdata(dev->parent);
+ struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx);
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_READ_SENSORS_OUT_LEN != 0);
+
+ mutex_lock(&hwmon->update_lock);
+
+ /* Use cached value if last update was < 1 s ago */
+ if (time_before(jiffies, hwmon->last_update + HZ))
+ rc = 0;
+ else
+ rc = efx_mcdi_mon_update(efx);
+
+ /* Copy out the requested entry */
+ *entry = ((efx_dword_t *)hwmon->dma_buf.addr)[index];
+
+ mutex_unlock(&hwmon->update_lock);
+
+ return rc;
+}
+
+static ssize_t efx_mcdi_mon_show_value(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct efx_mcdi_mon_attribute *mon_attr =
+ container_of(attr, struct efx_mcdi_mon_attribute, dev_attr);
+ efx_dword_t entry;
+ unsigned int value, state;
+ int rc;
+
+ rc = efx_mcdi_mon_get_entry(dev, mon_attr->index, &entry);
+ if (rc)
+ return rc;
+
+ state = EFX_DWORD_FIELD(entry, MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE);
+ if (state == MC_CMD_SENSOR_STATE_NO_READING)
+ return -EBUSY;
+
+ value = EFX_DWORD_FIELD(entry, MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_VALUE);
+
+ switch (mon_attr->hwmon_type) {
+ case EFX_HWMON_TEMP:
+ /* Convert temperature from degrees to milli-degrees Celsius */
+ value *= 1000;
+ break;
+ case EFX_HWMON_POWER:
+ /* Convert power from watts to microwatts */
+ value *= 1000000;
+ break;
+ default:
+ /* No conversion needed */
+ break;
+ }
+
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t efx_mcdi_mon_show_limit(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct efx_mcdi_mon_attribute *mon_attr =
+ container_of(attr, struct efx_mcdi_mon_attribute, dev_attr);
+ unsigned int value;
+
+ value = mon_attr->limit_value;
+
+ switch (mon_attr->hwmon_type) {
+ case EFX_HWMON_TEMP:
+ /* Convert temperature from degrees to milli-degrees Celsius */
+ value *= 1000;
+ break;
+ case EFX_HWMON_POWER:
+ /* Convert power from watts to microwatts */
+ value *= 1000000;
+ break;
+ default:
+ /* No conversion needed */
+ break;
+ }
+
+ return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t efx_mcdi_mon_show_alarm(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct efx_mcdi_mon_attribute *mon_attr =
+ container_of(attr, struct efx_mcdi_mon_attribute, dev_attr);
+ efx_dword_t entry;
+ int state;
+ int rc;
+
+ rc = efx_mcdi_mon_get_entry(dev, mon_attr->index, &entry);
+ if (rc)
+ return rc;
+
+ state = EFX_DWORD_FIELD(entry, MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE);
+ return sprintf(buf, "%d\n", state != MC_CMD_SENSOR_STATE_OK);
+}
+
+static ssize_t efx_mcdi_mon_show_label(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct efx_mcdi_mon_attribute *mon_attr =
+ container_of(attr, struct efx_mcdi_mon_attribute, dev_attr);
+ return sprintf(buf, "%s\n",
+ efx_mcdi_sensor_type[mon_attr->type].label);
+}
+
+static void
+efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name,
+ ssize_t (*reader)(struct device *,
+ struct device_attribute *, char *),
+ unsigned int index, unsigned int type,
+ unsigned int limit_value)
+{
+ struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx);
+ struct efx_mcdi_mon_attribute *attr = &hwmon->attrs[hwmon->n_attrs];
+
+ strlcpy(attr->name, name, sizeof(attr->name));
+ attr->index = index;
+ attr->type = type;
+ if (type < ARRAY_SIZE(efx_mcdi_sensor_type))
+ attr->hwmon_type = efx_mcdi_sensor_type[type].hwmon_type;
+ else
+ attr->hwmon_type = EFX_HWMON_UNKNOWN;
+ attr->limit_value = limit_value;
+ sysfs_attr_init(&attr->dev_attr.attr);
+ attr->dev_attr.attr.name = attr->name;
+ attr->dev_attr.attr.mode = 0444;
+ attr->dev_attr.show = reader;
+ hwmon->group.attrs[hwmon->n_attrs++] = &attr->dev_attr.attr;
+}
+
+int efx_mcdi_mon_probe(struct efx_nic *efx)
+{
+ unsigned int n_temp = 0, n_cool = 0, n_in = 0, n_curr = 0, n_power = 0;
+ struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SENSOR_INFO_EXT_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_SENSOR_INFO_OUT_LENMAX);
+ unsigned int n_pages, n_sensors, n_attrs, page;
+ size_t outlen;
+ char name[12];
+ u32 mask;
+ int rc, i, j, type;
+
+ /* Find out how many sensors are present */
+ n_sensors = 0;
+ page = 0;
+ do {
+ MCDI_SET_DWORD(inbuf, SENSOR_INFO_EXT_IN_PAGE, page);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_SENSOR_INFO, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < MC_CMD_SENSOR_INFO_OUT_LENMIN)
+ return -EIO;
+
+ mask = MCDI_DWORD(outbuf, SENSOR_INFO_OUT_MASK);
+ n_sensors += hweight32(mask & ~(1 << MC_CMD_SENSOR_PAGE0_NEXT));
+ ++page;
+ } while (mask & (1 << MC_CMD_SENSOR_PAGE0_NEXT));
+ n_pages = page;
+
+ /* Don't create a device if there are none */
+ if (n_sensors == 0)
+ return 0;
+
+ rc = efx_nic_alloc_buffer(
+ efx, &hwmon->dma_buf,
+ n_sensors * MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_LEN,
+ GFP_KERNEL);
+ if (rc)
+ return rc;
+
+ mutex_init(&hwmon->update_lock);
+ efx_mcdi_mon_update(efx);
+
+ /* Allocate space for the maximum possible number of
+ * attributes for this set of sensors:
+ * value, min, max, crit, alarm and label for each sensor.
+ */
+ n_attrs = 6 * n_sensors;
+ hwmon->attrs = kcalloc(n_attrs, sizeof(*hwmon->attrs), GFP_KERNEL);
+ if (!hwmon->attrs) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ hwmon->group.attrs = kcalloc(n_attrs + 1, sizeof(struct attribute *),
+ GFP_KERNEL);
+ if (!hwmon->group.attrs) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ for (i = 0, j = -1, type = -1; ; i++) {
+ enum efx_hwmon_type hwmon_type;
+ const char *hwmon_prefix;
+ unsigned hwmon_index;
+ u16 min1, max1, min2, max2;
+
+ /* Find next sensor type or exit if there is none */
+ do {
+ type++;
+
+ if ((type % 32) == 0) {
+ page = type / 32;
+ j = -1;
+ if (page == n_pages)
+ goto hwmon_register;
+
+ MCDI_SET_DWORD(inbuf, SENSOR_INFO_EXT_IN_PAGE,
+ page);
+ rc = efx_mcdi_rpc(efx, MC_CMD_SENSOR_INFO,
+ inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf),
+ &outlen);
+ if (rc)
+ goto fail;
+ if (outlen < MC_CMD_SENSOR_INFO_OUT_LENMIN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ mask = (MCDI_DWORD(outbuf,
+ SENSOR_INFO_OUT_MASK) &
+ ~(1 << MC_CMD_SENSOR_PAGE0_NEXT));
+
+ /* Check again for short response */
+ if (outlen <
+ MC_CMD_SENSOR_INFO_OUT_LEN(hweight32(mask))) {
+ rc = -EIO;
+ goto fail;
+ }
+ }
+ } while (!(mask & (1 << type % 32)));
+ j++;
+
+ if (type < ARRAY_SIZE(efx_mcdi_sensor_type)) {
+ hwmon_type = efx_mcdi_sensor_type[type].hwmon_type;
+
+ /* Skip sensors specific to a different port */
+ if (hwmon_type != EFX_HWMON_UNKNOWN &&
+ efx_mcdi_sensor_type[type].port >= 0 &&
+ efx_mcdi_sensor_type[type].port !=
+ efx_port_num(efx))
+ continue;
+ } else {
+ hwmon_type = EFX_HWMON_UNKNOWN;
+ }
+
+ switch (hwmon_type) {
+ case EFX_HWMON_TEMP:
+ hwmon_prefix = "temp";
+ hwmon_index = ++n_temp; /* 1-based */
+ break;
+ case EFX_HWMON_COOL:
+ /* This is likely to be a heatsink, but there
+ * is no convention for representing cooling
+ * devices other than fans.
+ */
+ hwmon_prefix = "fan";
+ hwmon_index = ++n_cool; /* 1-based */
+ break;
+ default:
+ hwmon_prefix = "in";
+ hwmon_index = n_in++; /* 0-based */
+ break;
+ case EFX_HWMON_CURR:
+ hwmon_prefix = "curr";
+ hwmon_index = ++n_curr; /* 1-based */
+ break;
+ case EFX_HWMON_POWER:
+ hwmon_prefix = "power";
+ hwmon_index = ++n_power; /* 1-based */
+ break;
+ }
+
+ min1 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY,
+ SENSOR_INFO_ENTRY, j, MIN1);
+ max1 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY,
+ SENSOR_INFO_ENTRY, j, MAX1);
+ min2 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY,
+ SENSOR_INFO_ENTRY, j, MIN2);
+ max2 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY,
+ SENSOR_INFO_ENTRY, j, MAX2);
+
+ if (min1 != max1) {
+ snprintf(name, sizeof(name), "%s%u_input",
+ hwmon_prefix, hwmon_index);
+ efx_mcdi_mon_add_attr(
+ efx, name, efx_mcdi_mon_show_value, i, type, 0);
+
+ if (hwmon_type != EFX_HWMON_POWER) {
+ snprintf(name, sizeof(name), "%s%u_min",
+ hwmon_prefix, hwmon_index);
+ efx_mcdi_mon_add_attr(
+ efx, name, efx_mcdi_mon_show_limit,
+ i, type, min1);
+ }
+
+ snprintf(name, sizeof(name), "%s%u_max",
+ hwmon_prefix, hwmon_index);
+ efx_mcdi_mon_add_attr(
+ efx, name, efx_mcdi_mon_show_limit,
+ i, type, max1);
+
+ if (min2 != max2) {
+ /* Assume max2 is critical value.
+ * But we have no good way to expose min2.
+ */
+ snprintf(name, sizeof(name), "%s%u_crit",
+ hwmon_prefix, hwmon_index);
+ efx_mcdi_mon_add_attr(
+ efx, name, efx_mcdi_mon_show_limit,
+ i, type, max2);
+ }
+ }
+
+ snprintf(name, sizeof(name), "%s%u_alarm",
+ hwmon_prefix, hwmon_index);
+ efx_mcdi_mon_add_attr(
+ efx, name, efx_mcdi_mon_show_alarm, i, type, 0);
+
+ if (type < ARRAY_SIZE(efx_mcdi_sensor_type) &&
+ efx_mcdi_sensor_type[type].label) {
+ snprintf(name, sizeof(name), "%s%u_label",
+ hwmon_prefix, hwmon_index);
+ efx_mcdi_mon_add_attr(
+ efx, name, efx_mcdi_mon_show_label, i, type, 0);
+ }
+ }
+
+hwmon_register:
+ hwmon->groups[0] = &hwmon->group;
+ hwmon->device = hwmon_device_register_with_groups(&efx->pci_dev->dev,
+ KBUILD_MODNAME, NULL,
+ hwmon->groups);
+ if (IS_ERR(hwmon->device)) {
+ rc = PTR_ERR(hwmon->device);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ efx_mcdi_mon_remove(efx);
+ return rc;
+}
+
+void efx_mcdi_mon_remove(struct efx_nic *efx)
+{
+ struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx);
+
+ if (hwmon->device)
+ hwmon_device_unregister(hwmon->device);
+ kfree(hwmon->attrs);
+ kfree(hwmon->group.attrs);
+ efx_nic_free_buffer(efx, &hwmon->dma_buf);
+}
+
+#endif /* CONFIG_SFC_MCDI_MON */
diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port.c b/drivers/net/ethernet/sfc/siena/mcdi_port.c
new file mode 100644
index 000000000000..94c6a345c0b1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mcdi_port.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2009-2013 Solarflare Communications Inc.
+ */
+
+/*
+ * Driver for PHY related operations via MCDI.
+ */
+
+#include <linux/slab.h>
+#include "efx.h"
+#include "mcdi_port.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+#include "nic.h"
+#include "selftest.h"
+#include "mcdi_port_common.h"
+
+static int efx_mcdi_mdio_read(struct net_device *net_dev,
+ int prtad, int devad, u16 addr)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MDIO_READ_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_MDIO_READ_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, MDIO_READ_IN_BUS, efx->mdio_bus);
+ MCDI_SET_DWORD(inbuf, MDIO_READ_IN_PRTAD, prtad);
+ MCDI_SET_DWORD(inbuf, MDIO_READ_IN_DEVAD, devad);
+ MCDI_SET_DWORD(inbuf, MDIO_READ_IN_ADDR, addr);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_MDIO_READ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ if (MCDI_DWORD(outbuf, MDIO_READ_OUT_STATUS) !=
+ MC_CMD_MDIO_STATUS_GOOD)
+ return -EIO;
+
+ return (u16)MCDI_DWORD(outbuf, MDIO_READ_OUT_VALUE);
+}
+
+static int efx_mcdi_mdio_write(struct net_device *net_dev,
+ int prtad, int devad, u16 addr, u16 value)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MDIO_WRITE_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_MDIO_WRITE_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_BUS, efx->mdio_bus);
+ MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_PRTAD, prtad);
+ MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_DEVAD, devad);
+ MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_ADDR, addr);
+ MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_VALUE, value);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_MDIO_WRITE, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ if (MCDI_DWORD(outbuf, MDIO_WRITE_OUT_STATUS) !=
+ MC_CMD_MDIO_STATUS_GOOD)
+ return -EIO;
+
+ return 0;
+}
+
+u32 efx_mcdi_phy_get_caps(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+ return phy_data->supported_cap;
+}
+
+bool efx_mcdi_mac_check_fault(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+ size_t outlength;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), &outlength);
+ if (rc)
+ return true;
+
+ return MCDI_DWORD(outbuf, GET_LINK_OUT_MAC_FAULT) != 0;
+}
+
+int efx_mcdi_port_probe(struct efx_nic *efx)
+{
+ int rc;
+
+ /* Set up MDIO structure for PHY */
+ efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
+ efx->mdio.mdio_read = efx_mcdi_mdio_read;
+ efx->mdio.mdio_write = efx_mcdi_mdio_write;
+
+ /* Fill out MDIO structure, loopback modes, and initial link state */
+ rc = efx_mcdi_phy_probe(efx);
+ if (rc != 0)
+ return rc;
+
+ return efx_mcdi_mac_init_stats(efx);
+}
+
+void efx_mcdi_port_remove(struct efx_nic *efx)
+{
+ efx_mcdi_phy_remove(efx);
+ efx_mcdi_mac_fini_stats(efx);
+}
diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port.h b/drivers/net/ethernet/sfc/siena/mcdi_port.h
new file mode 100644
index 000000000000..07863ddbe740
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mcdi_port.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2008-2013 Solarflare Communications Inc.
+ * Copyright 2019-2020 Xilinx Inc.
+ */
+
+#ifndef EFX_MCDI_PORT_H
+#define EFX_MCDI_PORT_H
+
+#include "net_driver.h"
+
+u32 efx_mcdi_phy_get_caps(struct efx_nic *efx);
+bool efx_mcdi_mac_check_fault(struct efx_nic *efx);
+int efx_mcdi_port_probe(struct efx_nic *efx);
+void efx_mcdi_port_remove(struct efx_nic *efx);
+
+#endif /* EFX_MCDI_PORT_H */
diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port_common.c b/drivers/net/ethernet/sfc/siena/mcdi_port_common.c
new file mode 100644
index 000000000000..899cc1671004
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mcdi_port_common.c
@@ -0,0 +1,1301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "mcdi_port_common.h"
+#include "efx_common.h"
+#include "nic.h"
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0);
+ BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name));
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS);
+ cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE);
+ cfg->supported_cap =
+ MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP);
+ cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL);
+ cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT);
+ cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK);
+ memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME),
+ sizeof(cfg->name));
+ cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE);
+ cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK);
+ memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION),
+ sizeof(cfg->revision));
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+void efx_link_set_advertising(struct efx_nic *efx,
+ const unsigned long *advertising)
+{
+ memcpy(efx->link_advertising, advertising,
+ sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
+
+ efx->link_advertising[0] |= ADVERTISED_Autoneg;
+ if (advertising[0] & ADVERTISED_Pause)
+ efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
+ else
+ efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
+ if (advertising[0] & ADVERTISED_Asym_Pause)
+ efx->wanted_fc ^= EFX_FC_TX;
+}
+
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+ u32 flags, u32 loopback_mode, u32 loopback_speed)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN);
+
+ BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0);
+
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed);
+
+ return efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+}
+
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST +
+ MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED);
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset)
+{
+ #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+ linkset)
+
+ bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ switch (media) {
+ case MC_CMD_MEDIA_KX4:
+ SET_BIT(Backplane);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseKX_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseKX4_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+ SET_BIT(40000baseKR4_Full);
+ break;
+
+ case MC_CMD_MEDIA_XFP:
+ case MC_CMD_MEDIA_SFP_PLUS:
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ SET_BIT(FIBRE);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) {
+ SET_BIT(1000baseT_Full);
+ SET_BIT(1000baseX_Full);
+ }
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) {
+ SET_BIT(10000baseCR_Full);
+ SET_BIT(10000baseLR_Full);
+ SET_BIT(10000baseSR_Full);
+ }
+ if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) {
+ SET_BIT(40000baseCR4_Full);
+ SET_BIT(40000baseSR4_Full);
+ }
+ if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) {
+ SET_BIT(100000baseCR4_Full);
+ SET_BIT(100000baseSR4_Full);
+ }
+ if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) {
+ SET_BIT(25000baseCR_Full);
+ SET_BIT(25000baseSR_Full);
+ }
+ if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN))
+ SET_BIT(50000baseCR2_Full);
+ break;
+
+ case MC_CMD_MEDIA_BASE_T:
+ SET_BIT(TP);
+ if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN))
+ SET_BIT(10baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN))
+ SET_BIT(10baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN))
+ SET_BIT(100baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN))
+ SET_BIT(100baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN))
+ SET_BIT(1000baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseT_Full);
+ break;
+ }
+
+ if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+ SET_BIT(Pause);
+ if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+ SET_BIT(Asym_Pause);
+ if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ SET_BIT(Autoneg);
+
+ #undef SET_BIT
+}
+
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset)
+{
+ u32 result = 0;
+
+ #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+ linkset)
+
+ if (TEST_BIT(10baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN);
+ if (TEST_BIT(10baseT_Full))
+ result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN);
+ if (TEST_BIT(100baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN);
+ if (TEST_BIT(100baseT_Full))
+ result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN);
+ if (TEST_BIT(1000baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN);
+ if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full) ||
+ TEST_BIT(1000baseX_Full))
+ result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN);
+ if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full) ||
+ TEST_BIT(10000baseCR_Full) || TEST_BIT(10000baseLR_Full) ||
+ TEST_BIT(10000baseSR_Full))
+ result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN);
+ if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full) ||
+ TEST_BIT(40000baseSR4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN);
+ if (TEST_BIT(100000baseCR4_Full) || TEST_BIT(100000baseSR4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN);
+ if (TEST_BIT(25000baseCR_Full) || TEST_BIT(25000baseSR_Full))
+ result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN);
+ if (TEST_BIT(50000baseCR2_Full))
+ result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN);
+ if (TEST_BIT(Pause))
+ result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN);
+ if (TEST_BIT(Asym_Pause))
+ result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN);
+ if (TEST_BIT(Autoneg))
+ result |= (1 << MC_CMD_PHY_CAP_AN_LBN);
+
+ #undef TEST_BIT
+
+ return result;
+}
+
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ enum efx_phy_mode mode, supported;
+ u32 flags;
+
+ /* TODO: Advertise the capabilities supported by this PHY */
+ supported = 0;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN))
+ supported |= PHY_MODE_TX_DISABLED;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN))
+ supported |= PHY_MODE_LOW_POWER;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN))
+ supported |= PHY_MODE_OFF;
+
+ mode = efx->phy_mode & supported;
+
+ flags = 0;
+ if (mode & PHY_MODE_TX_DISABLED)
+ flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN);
+ if (mode & PHY_MODE_LOW_POWER)
+ flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN);
+ if (mode & PHY_MODE_OFF)
+ flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN);
+
+ return flags;
+}
+
+u8 mcdi_to_ethtool_media(u32 media)
+{
+ switch (media) {
+ case MC_CMD_MEDIA_XAUI:
+ case MC_CMD_MEDIA_CX4:
+ case MC_CMD_MEDIA_KX4:
+ return PORT_OTHER;
+
+ case MC_CMD_MEDIA_XFP:
+ case MC_CMD_MEDIA_SFP_PLUS:
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ return PORT_FIBRE;
+
+ case MC_CMD_MEDIA_BASE_T:
+ return PORT_TP;
+
+ default:
+ return PORT_OTHER;
+ }
+}
+
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+ struct efx_link_state *link_state,
+ u32 speed, u32 flags, u32 fcntl)
+{
+ switch (fcntl) {
+ case MC_CMD_FCNTL_AUTO:
+ WARN_ON(1); /* This is not a link mode */
+ link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX;
+ break;
+ case MC_CMD_FCNTL_BIDIR:
+ link_state->fc = EFX_FC_TX | EFX_FC_RX;
+ break;
+ case MC_CMD_FCNTL_RESPOND:
+ link_state->fc = EFX_FC_RX;
+ break;
+ default:
+ WARN_ON(1);
+ fallthrough;
+ case MC_CMD_FCNTL_OFF:
+ link_state->fc = 0;
+ break;
+ }
+
+ link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN));
+ link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
+ link_state->speed = speed;
+}
+
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits. Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+u32 ethtool_fec_caps_to_mcdi(u32 supported_cap, u32 ethtool_cap)
+{
+ u32 ret = 0;
+
+ if (ethtool_cap & ETHTOOL_FEC_OFF)
+ return 0;
+
+ if (ethtool_cap & ETHTOOL_FEC_AUTO)
+ ret |= ((1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_RS_FEC_LBN)) & supported_cap;
+ if (ethtool_cap & ETHTOOL_FEC_RS &&
+ supported_cap & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN))
+ ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+ if (ethtool_cap & ETHTOOL_FEC_BASER) {
+ if (supported_cap & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN))
+ ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+ if (supported_cap & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN))
+ ret |= (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+ }
+ return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO. This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+ bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+ rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+ baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+ : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+ baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+ : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+ if (!baser && !rs)
+ return ETHTOOL_FEC_OFF;
+ return (rs_req ? ETHTOOL_FEC_RS : 0) |
+ (baser_req ? ETHTOOL_FEC_BASER : 0) |
+ (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
+/* Verify that the forced flow control settings (!EFX_FC_AUTO) are
+ * supported by the link partner. Warn the user if this isn't the case
+ */
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ u32 rmtadv;
+
+ /* The link partner capabilities are only relevant if the
+ * link supports flow control autonegotiation
+ */
+ if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ return;
+
+ /* If flow control autoneg is supported and enabled, then fine */
+ if (efx->wanted_fc & EFX_FC_AUTO)
+ return;
+
+ rmtadv = 0;
+ if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+ rmtadv |= ADVERTISED_Pause;
+ if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+ rmtadv |= ADVERTISED_Asym_Pause;
+
+ if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause)
+ netif_err(efx, link, efx->net_dev,
+ "warning: link partner doesn't support pause frames");
+}
+
+bool efx_mcdi_phy_poll(struct efx_nic *efx)
+{
+ struct efx_link_state old_state = efx->link_state;
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+ int rc;
+
+ WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ efx->link_state.up = false;
+ else
+ efx_mcdi_phy_decode_link(
+ efx, &efx->link_state,
+ MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
+
+ return !efx_link_state_equal(&efx->link_state, &old_state);
+}
+
+int efx_mcdi_phy_probe(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_data;
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+ u32 caps;
+ int rc;
+
+ /* Initialise and populate phy_data */
+ phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
+ if (phy_data == NULL)
+ return -ENOMEM;
+
+ rc = efx_mcdi_get_phy_cfg(efx, phy_data);
+ if (rc != 0)
+ goto fail;
+
+ /* Read initial link advertisement */
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ goto fail;
+
+ /* Fill out nic state */
+ efx->phy_data = phy_data;
+ efx->phy_type = phy_data->type;
+
+ efx->mdio_bus = phy_data->channel;
+ efx->mdio.prtad = phy_data->port;
+ efx->mdio.mmds = phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22);
+ efx->mdio.mode_support = 0;
+ if (phy_data->mmd_mask & (1 << MC_CMD_MMD_CLAUSE22))
+ efx->mdio.mode_support |= MDIO_SUPPORTS_C22;
+ if (phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22))
+ efx->mdio.mode_support |= MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
+
+ caps = MCDI_DWORD(outbuf, GET_LINK_OUT_CAP);
+ if (caps & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ mcdi_to_ethtool_linkset(phy_data->media, caps,
+ efx->link_advertising);
+ else
+ phy_data->forced_cap = caps;
+
+ /* Assert that we can map efx -> mcdi loopback modes */
+ BUILD_BUG_ON(LOOPBACK_NONE != MC_CMD_LOOPBACK_NONE);
+ BUILD_BUG_ON(LOOPBACK_DATA != MC_CMD_LOOPBACK_DATA);
+ BUILD_BUG_ON(LOOPBACK_GMAC != MC_CMD_LOOPBACK_GMAC);
+ BUILD_BUG_ON(LOOPBACK_XGMII != MC_CMD_LOOPBACK_XGMII);
+ BUILD_BUG_ON(LOOPBACK_XGXS != MC_CMD_LOOPBACK_XGXS);
+ BUILD_BUG_ON(LOOPBACK_XAUI != MC_CMD_LOOPBACK_XAUI);
+ BUILD_BUG_ON(LOOPBACK_GMII != MC_CMD_LOOPBACK_GMII);
+ BUILD_BUG_ON(LOOPBACK_SGMII != MC_CMD_LOOPBACK_SGMII);
+ BUILD_BUG_ON(LOOPBACK_XGBR != MC_CMD_LOOPBACK_XGBR);
+ BUILD_BUG_ON(LOOPBACK_XFI != MC_CMD_LOOPBACK_XFI);
+ BUILD_BUG_ON(LOOPBACK_XAUI_FAR != MC_CMD_LOOPBACK_XAUI_FAR);
+ BUILD_BUG_ON(LOOPBACK_GMII_FAR != MC_CMD_LOOPBACK_GMII_FAR);
+ BUILD_BUG_ON(LOOPBACK_SGMII_FAR != MC_CMD_LOOPBACK_SGMII_FAR);
+ BUILD_BUG_ON(LOOPBACK_XFI_FAR != MC_CMD_LOOPBACK_XFI_FAR);
+ BUILD_BUG_ON(LOOPBACK_GPHY != MC_CMD_LOOPBACK_GPHY);
+ BUILD_BUG_ON(LOOPBACK_PHYXS != MC_CMD_LOOPBACK_PHYXS);
+ BUILD_BUG_ON(LOOPBACK_PCS != MC_CMD_LOOPBACK_PCS);
+ BUILD_BUG_ON(LOOPBACK_PMAPMD != MC_CMD_LOOPBACK_PMAPMD);
+ BUILD_BUG_ON(LOOPBACK_XPORT != MC_CMD_LOOPBACK_XPORT);
+ BUILD_BUG_ON(LOOPBACK_XGMII_WS != MC_CMD_LOOPBACK_XGMII_WS);
+ BUILD_BUG_ON(LOOPBACK_XAUI_WS != MC_CMD_LOOPBACK_XAUI_WS);
+ BUILD_BUG_ON(LOOPBACK_XAUI_WS_FAR != MC_CMD_LOOPBACK_XAUI_WS_FAR);
+ BUILD_BUG_ON(LOOPBACK_XAUI_WS_NEAR != MC_CMD_LOOPBACK_XAUI_WS_NEAR);
+ BUILD_BUG_ON(LOOPBACK_GMII_WS != MC_CMD_LOOPBACK_GMII_WS);
+ BUILD_BUG_ON(LOOPBACK_XFI_WS != MC_CMD_LOOPBACK_XFI_WS);
+ BUILD_BUG_ON(LOOPBACK_XFI_WS_FAR != MC_CMD_LOOPBACK_XFI_WS_FAR);
+ BUILD_BUG_ON(LOOPBACK_PHYXS_WS != MC_CMD_LOOPBACK_PHYXS_WS);
+
+ rc = efx_mcdi_loopback_modes(efx, &efx->loopback_modes);
+ if (rc != 0)
+ goto fail;
+ /* The MC indicates that LOOPBACK_NONE is a valid loopback mode,
+ * but by convention we don't
+ */
+ efx->loopback_modes &= ~(1 << LOOPBACK_NONE);
+
+ /* Set the initial link mode */
+ efx_mcdi_phy_decode_link(efx, &efx->link_state,
+ MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
+
+ /* Record the initial FEC configuration (or nearest approximation
+ * representable in the ethtool configuration space)
+ */
+ efx->fec_config = mcdi_fec_caps_to_ethtool(caps,
+ efx->link_state.speed == 25000 ||
+ efx->link_state.speed == 50000);
+
+ /* Default to Autonegotiated flow control if the PHY supports it */
+ efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
+ if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ efx->wanted_fc |= EFX_FC_AUTO;
+ efx_link_set_wanted_fc(efx, efx->wanted_fc);
+
+ return 0;
+
+fail:
+ kfree(phy_data);
+ return rc;
+}
+
+void efx_mcdi_phy_remove(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+ efx->phy_data = NULL;
+ kfree(phy_data);
+}
+
+void efx_mcdi_phy_get_link_ksettings(struct efx_nic *efx, struct ethtool_link_ksettings *cmd)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+ int rc;
+
+ cmd->base.speed = efx->link_state.speed;
+ cmd->base.duplex = efx->link_state.fd;
+ cmd->base.port = mcdi_to_ethtool_media(phy_cfg->media);
+ cmd->base.phy_address = phy_cfg->port;
+ cmd->base.autoneg = !!(efx->link_advertising[0] & ADVERTISED_Autoneg);
+ cmd->base.mdio_support = (efx->mdio.mode_support &
+ (MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22));
+
+ mcdi_to_ethtool_linkset(phy_cfg->media, phy_cfg->supported_cap,
+ cmd->link_modes.supported);
+ memcpy(cmd->link_modes.advertising, efx->link_advertising,
+ sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ return;
+ mcdi_to_ethtool_linkset(phy_cfg->media,
+ MCDI_DWORD(outbuf, GET_LINK_OUT_LP_CAP),
+ cmd->link_modes.lp_advertising);
+}
+
+int efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, const struct ethtool_link_ksettings *cmd)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ u32 caps;
+ int rc;
+
+ if (cmd->base.autoneg) {
+ caps = (ethtool_linkset_to_mcdi_cap(cmd->link_modes.advertising) |
+ 1 << MC_CMD_PHY_CAP_AN_LBN);
+ } else if (cmd->base.duplex) {
+ switch (cmd->base.speed) {
+ case 10: caps = 1 << MC_CMD_PHY_CAP_10FDX_LBN; break;
+ case 100: caps = 1 << MC_CMD_PHY_CAP_100FDX_LBN; break;
+ case 1000: caps = 1 << MC_CMD_PHY_CAP_1000FDX_LBN; break;
+ case 10000: caps = 1 << MC_CMD_PHY_CAP_10000FDX_LBN; break;
+ case 40000: caps = 1 << MC_CMD_PHY_CAP_40000FDX_LBN; break;
+ case 100000: caps = 1 << MC_CMD_PHY_CAP_100000FDX_LBN; break;
+ case 25000: caps = 1 << MC_CMD_PHY_CAP_25000FDX_LBN; break;
+ case 50000: caps = 1 << MC_CMD_PHY_CAP_50000FDX_LBN; break;
+ default: return -EINVAL;
+ }
+ } else {
+ switch (cmd->base.speed) {
+ case 10: caps = 1 << MC_CMD_PHY_CAP_10HDX_LBN; break;
+ case 100: caps = 1 << MC_CMD_PHY_CAP_100HDX_LBN; break;
+ case 1000: caps = 1 << MC_CMD_PHY_CAP_1000HDX_LBN; break;
+ default: return -EINVAL;
+ }
+ }
+
+ caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, efx->fec_config);
+
+ rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+ efx->loopback_mode, 0);
+ if (rc)
+ return rc;
+
+ if (cmd->base.autoneg) {
+ efx_link_set_advertising(efx, cmd->link_modes.advertising);
+ phy_cfg->forced_cap = 0;
+ } else {
+ efx_link_clear_advertising(efx);
+ phy_cfg->forced_cap = caps;
+ }
+ return 0;
+}
+
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+ u32 caps, active, speed; /* MCDI format */
+ bool is_25g = false;
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+ return -EOPNOTSUPP;
+
+ /* behaviour for 25G/50G links depends on 25G BASER bit */
+ speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+ is_25g = speed == 25000 || speed == 50000;
+
+ caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+ fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+ /* BASER is never supported on 100G */
+ if (speed == 100000)
+ fec->fec &= ~ETHTOOL_FEC_BASER;
+
+ active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+ switch (active) {
+ case MC_CMD_FEC_NONE:
+ fec->active_fec = ETHTOOL_FEC_OFF;
+ break;
+ case MC_CMD_FEC_BASER:
+ fec->active_fec = ETHTOOL_FEC_BASER;
+ break;
+ case MC_CMD_FEC_RS:
+ fec->active_fec = ETHTOOL_FEC_RS;
+ break;
+ default:
+ netif_warn(efx, hw, efx->net_dev,
+ "Firmware reports unrecognised FEC_TYPE %u\n",
+ active);
+ /* We don't know what firmware has picked. AUTO is as good a
+ * "can't happen" value as any other.
+ */
+ fec->active_fec = ETHTOOL_FEC_AUTO;
+ break;
+ }
+
+ return 0;
+}
+
+/* Basic validation to ensure that the caps we are going to attempt to set are
+ * in fact supported by the adapter. Note that 'no FEC' is always supported.
+ */
+static int ethtool_fec_supported(u32 supported_cap, u32 ethtool_cap)
+{
+ if (ethtool_cap & ETHTOOL_FEC_OFF)
+ return 0;
+
+ if (ethtool_cap &&
+ !ethtool_fec_caps_to_mcdi(supported_cap, ethtool_cap))
+ return -EINVAL;
+ return 0;
+}
+
+int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ u32 caps;
+ int rc;
+
+ rc = ethtool_fec_supported(phy_cfg->supported_cap, fec->fec);
+ if (rc)
+ return rc;
+
+ /* Work out what efx_mcdi_phy_set_link_ksettings() would produce from
+ * saved advertising bits
+ */
+ if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising))
+ caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) |
+ 1 << MC_CMD_PHY_CAP_AN_LBN);
+ else
+ caps = phy_cfg->forced_cap;
+
+ caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, fec->fec);
+ rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+ efx->loopback_mode, 0);
+ if (rc)
+ return rc;
+
+ /* Record the new FEC setting for subsequent set_link calls */
+ efx->fec_config = fec->fec;
+ return 0;
+}
+
+int efx_mcdi_phy_test_alive(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN)
+ return -EIO;
+ if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK)
+ return -EINVAL;
+
+ return 0;
+}
+
+int efx_mcdi_port_reconfigure(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ u32 caps = (efx->link_advertising[0] ?
+ ethtool_linkset_to_mcdi_cap(efx->link_advertising) :
+ phy_cfg->forced_cap);
+
+ caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, efx->fec_config);
+
+ return efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+ efx->loopback_mode, 0);
+}
+
+static const char *const mcdi_sft9001_cable_diag_names[] = {
+ "cable.pairA.length",
+ "cable.pairB.length",
+ "cable.pairC.length",
+ "cable.pairD.length",
+ "cable.pairA.status",
+ "cable.pairB.status",
+ "cable.pairC.status",
+ "cable.pairD.status",
+};
+
+static int efx_mcdi_bist(struct efx_nic *efx, unsigned int bist_mode,
+ int *results)
+{
+ unsigned int retry, i, count = 0;
+ size_t outlen;
+ u32 status;
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_START_BIST_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_POLL_BIST_OUT_SFT9001_LEN);
+ u8 *ptr;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_START_BIST_OUT_LEN != 0);
+ MCDI_SET_DWORD(inbuf, START_BIST_IN_TYPE, bist_mode);
+ rc = efx_mcdi_rpc(efx, MC_CMD_START_BIST,
+ inbuf, MC_CMD_START_BIST_IN_LEN, NULL, 0, NULL);
+ if (rc)
+ goto out;
+
+ /* Wait up to 10s for BIST to finish */
+ for (retry = 0; retry < 100; ++retry) {
+ BUILD_BUG_ON(MC_CMD_POLL_BIST_IN_LEN != 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_POLL_BIST, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto out;
+
+ status = MCDI_DWORD(outbuf, POLL_BIST_OUT_RESULT);
+ if (status != MC_CMD_POLL_BIST_RUNNING)
+ goto finished;
+
+ msleep(100);
+ }
+
+ rc = -ETIMEDOUT;
+ goto out;
+
+finished:
+ results[count++] = (status == MC_CMD_POLL_BIST_PASSED) ? 1 : -1;
+
+ /* SFT9001 specific cable diagnostics output */
+ if (efx->phy_type == PHY_TYPE_SFT9001B &&
+ (bist_mode == MC_CMD_PHY_BIST_CABLE_SHORT ||
+ bist_mode == MC_CMD_PHY_BIST_CABLE_LONG)) {
+ ptr = MCDI_PTR(outbuf, POLL_BIST_OUT_SFT9001_CABLE_LENGTH_A);
+ if (status == MC_CMD_POLL_BIST_PASSED &&
+ outlen >= MC_CMD_POLL_BIST_OUT_SFT9001_LEN) {
+ for (i = 0; i < 8; i++) {
+ results[count + i] =
+ EFX_DWORD_FIELD(((efx_dword_t *)ptr)[i],
+ EFX_DWORD_0);
+ }
+ }
+ count += 8;
+ }
+ rc = count;
+
+out:
+ return rc;
+}
+
+int efx_mcdi_phy_run_tests(struct efx_nic *efx, int *results, unsigned int flags)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ u32 mode;
+ int rc;
+
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) {
+ rc = efx_mcdi_bist(efx, MC_CMD_PHY_BIST, results);
+ if (rc < 0)
+ return rc;
+
+ results += rc;
+ }
+
+ /* If we support both LONG and SHORT, then run each in response to
+ * break or not. Otherwise, run the one we support
+ */
+ mode = 0;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN)) {
+ if ((flags & ETH_TEST_FL_OFFLINE) &&
+ (phy_cfg->flags &
+ (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN)))
+ mode = MC_CMD_PHY_BIST_CABLE_LONG;
+ else
+ mode = MC_CMD_PHY_BIST_CABLE_SHORT;
+ } else if (phy_cfg->flags &
+ (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))
+ mode = MC_CMD_PHY_BIST_CABLE_LONG;
+
+ if (mode != 0) {
+ rc = efx_mcdi_bist(efx, mode, results);
+ if (rc < 0)
+ return rc;
+ results += rc;
+ }
+
+ return 0;
+}
+
+const char *efx_mcdi_phy_test_name(struct efx_nic *efx, unsigned int index)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) {
+ if (index == 0)
+ return "bist";
+ --index;
+ }
+
+ if (phy_cfg->flags & ((1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN) |
+ (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))) {
+ if (index == 0)
+ return "cable";
+ --index;
+
+ if (efx->phy_type == PHY_TYPE_SFT9001B) {
+ if (index < ARRAY_SIZE(mcdi_sft9001_cable_diag_names))
+ return mcdi_sft9001_cable_diag_names[index];
+ index -= ARRAY_SIZE(mcdi_sft9001_cable_diag_names);
+ }
+ }
+
+ return NULL;
+}
+
+#define SFP_PAGE_SIZE 128
+#define SFF_DIAG_TYPE_OFFSET 92
+#define SFF_DIAG_ADDR_CHANGE BIT(2)
+#define SFF_8079_NUM_PAGES 2
+#define SFF_8472_NUM_PAGES 4
+#define SFF_8436_NUM_PAGES 5
+#define SFF_DMT_LEVEL_OFFSET 94
+
+/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom
+ * @efx: NIC context
+ * @page: EEPROM page number
+ * @data: Destination data pointer
+ * @offset: Offset in page to copy from in to data
+ * @space: Space available in data
+ *
+ * Return:
+ * >=0 - amount of data copied
+ * <0 - error
+ */
+static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx,
+ unsigned int page,
+ u8 *data, ssize_t offset,
+ ssize_t space)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN);
+ unsigned int payload_len;
+ unsigned int to_copy;
+ size_t outlen;
+ int rc;
+
+ if (offset > SFP_PAGE_SIZE)
+ return -EINVAL;
+
+ to_copy = min(space, SFP_PAGE_SIZE - offset);
+
+ MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO,
+ inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf),
+ &outlen);
+
+ if (rc)
+ return rc;
+
+ if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
+ SFP_PAGE_SIZE))
+ return -EIO;
+
+ payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN);
+ if (payload_len != SFP_PAGE_SIZE)
+ return -EIO;
+
+ memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset,
+ to_copy);
+
+ return to_copy;
+}
+
+static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx,
+ unsigned int page,
+ u8 byte)
+{
+ u8 data;
+ int rc;
+
+ rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1);
+ if (rc == 1)
+ return data;
+
+ return rc;
+}
+
+static int efx_mcdi_phy_diag_type(struct efx_nic *efx)
+{
+ /* Page zero of the EEPROM includes the diagnostic type at byte 92. */
+ return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+ SFF_DIAG_TYPE_OFFSET);
+}
+
+static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx)
+{
+ /* Page zero of the EEPROM includes the DMT level at byte 94. */
+ return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+ SFF_DMT_LEVEL_OFFSET);
+}
+
+static u32 efx_mcdi_phy_module_type(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+ if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS)
+ return phy_data->media;
+
+ /* A QSFP+ NIC may actually have an SFP+ module attached.
+ * The ID is page 0, byte 0.
+ */
+ switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) {
+ case 0x3:
+ return MC_CMD_MEDIA_SFP_PLUS;
+ case 0xc:
+ case 0xd:
+ return MC_CMD_MEDIA_QSFP_PLUS;
+ default:
+ return 0;
+ }
+}
+
+int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, struct ethtool_eeprom *ee, u8 *data)
+{
+ int rc;
+ ssize_t space_remaining = ee->len;
+ unsigned int page_off;
+ bool ignore_missing;
+ int num_pages;
+ int page;
+
+ switch (efx_mcdi_phy_module_type(efx)) {
+ case MC_CMD_MEDIA_SFP_PLUS:
+ num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ?
+ SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES;
+ page = 0;
+ ignore_missing = false;
+ break;
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ num_pages = SFF_8436_NUM_PAGES;
+ page = -1; /* We obtain the lower page by asking for -1. */
+ ignore_missing = true; /* Ignore missing pages after page 0. */
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ page_off = ee->offset % SFP_PAGE_SIZE;
+ page += ee->offset / SFP_PAGE_SIZE;
+
+ while (space_remaining && (page < num_pages)) {
+ rc = efx_mcdi_phy_get_module_eeprom_page(efx, page,
+ data, page_off,
+ space_remaining);
+
+ if (rc > 0) {
+ space_remaining -= rc;
+ data += rc;
+ page_off = 0;
+ page++;
+ } else if (rc == 0) {
+ space_remaining = 0;
+ } else if (ignore_missing && (page > 0)) {
+ int intended_size = SFP_PAGE_SIZE - page_off;
+
+ space_remaining -= intended_size;
+ if (space_remaining < 0) {
+ space_remaining = 0;
+ } else {
+ memset(data, 0, intended_size);
+ data += intended_size;
+ page_off = 0;
+ page++;
+ rc = 0;
+ }
+ } else {
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo)
+{
+ int sff_8472_level;
+ int diag_type;
+
+ switch (efx_mcdi_phy_module_type(efx)) {
+ case MC_CMD_MEDIA_SFP_PLUS:
+ sff_8472_level = efx_mcdi_phy_sff_8472_level(efx);
+
+ /* If we can't read the diagnostics level we have none. */
+ if (sff_8472_level < 0)
+ return -EOPNOTSUPP;
+
+ /* Check if this module requires the (unsupported) address
+ * change operation.
+ */
+ diag_type = efx_mcdi_phy_diag_type(efx);
+
+ if (sff_8472_level == 0 ||
+ (diag_type & SFF_DIAG_ADDR_CHANGE)) {
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ }
+ break;
+
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static unsigned int efx_calc_mac_mtu(struct efx_nic *efx)
+{
+ return EFX_MAX_FRAME_LEN(efx->net_dev->mtu);
+}
+
+int efx_mcdi_set_mac(struct efx_nic *efx)
+{
+ u32 fcntl;
+ MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN);
+
+ BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
+
+ /* This has no effect on EF10 */
+ ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR),
+ efx->net_dev->dev_addr);
+
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU, efx_calc_mac_mtu(efx));
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0);
+
+ /* Set simple MAC filter for Siena */
+ MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
+ SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
+
+ MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
+ SET_MAC_IN_FLAG_INCLUDE_FCS,
+ !!(efx->net_dev->features & NETIF_F_RXFCS));
+
+ switch (efx->wanted_fc) {
+ case EFX_FC_RX | EFX_FC_TX:
+ fcntl = MC_CMD_FCNTL_BIDIR;
+ break;
+ case EFX_FC_RX:
+ fcntl = MC_CMD_FCNTL_RESPOND;
+ break;
+ default:
+ fcntl = MC_CMD_FCNTL_OFF;
+ break;
+ }
+ if (efx->wanted_fc & EFX_FC_AUTO)
+ fcntl = MC_CMD_FCNTL_AUTO;
+ if (efx->fc_disable)
+ fcntl = MC_CMD_FCNTL_OFF;
+
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl);
+
+ return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes),
+ NULL, 0, NULL);
+}
+
+int efx_mcdi_set_mtu(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_MAC_EXT_IN_LEN);
+
+ BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
+
+ MCDI_SET_DWORD(inbuf, SET_MAC_EXT_IN_MTU, efx_calc_mac_mtu(efx));
+
+ MCDI_POPULATE_DWORD_1(inbuf, SET_MAC_EXT_IN_CONTROL,
+ SET_MAC_EXT_IN_CFG_MTU, 1);
+
+ return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+}
+
+enum efx_stats_action {
+ EFX_STATS_ENABLE,
+ EFX_STATS_DISABLE,
+ EFX_STATS_PULL,
+};
+
+static int efx_mcdi_mac_stats(struct efx_nic *efx,
+ enum efx_stats_action action, int clear)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MAC_STATS_IN_LEN);
+ int rc;
+ int change = action == EFX_STATS_PULL ? 0 : 1;
+ int enable = action == EFX_STATS_ENABLE ? 1 : 0;
+ int period = action == EFX_STATS_ENABLE ? 1000 : 0;
+ dma_addr_t dma_addr = efx->stats_buffer.dma_addr;
+ u32 dma_len = action != EFX_STATS_DISABLE ?
+ efx->num_mac_stats * sizeof(u64) : 0;
+
+ BUILD_BUG_ON(MC_CMD_MAC_STATS_OUT_DMA_LEN != 0);
+
+ MCDI_SET_QWORD(inbuf, MAC_STATS_IN_DMA_ADDR, dma_addr);
+ MCDI_POPULATE_DWORD_7(inbuf, MAC_STATS_IN_CMD,
+ MAC_STATS_IN_DMA, !!enable,
+ MAC_STATS_IN_CLEAR, clear,
+ MAC_STATS_IN_PERIODIC_CHANGE, change,
+ MAC_STATS_IN_PERIODIC_ENABLE, enable,
+ MAC_STATS_IN_PERIODIC_CLEAR, 0,
+ MAC_STATS_IN_PERIODIC_NOEVENT, 1,
+ MAC_STATS_IN_PERIOD_MS, period);
+ MCDI_SET_DWORD(inbuf, MAC_STATS_IN_DMA_LEN, dma_len);
+
+ if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+ MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, efx->vport_id);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_MAC_STATS, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ /* Expect ENOENT if DMA queues have not been set up */
+ if (rc && (rc != -ENOENT || atomic_read(&efx->active_queues)))
+ efx_mcdi_display_error(efx, MC_CMD_MAC_STATS, sizeof(inbuf),
+ NULL, 0, rc);
+ return rc;
+}
+
+void efx_mcdi_mac_start_stats(struct efx_nic *efx)
+{
+ __le64 *dma_stats = efx->stats_buffer.addr;
+
+ dma_stats[efx->num_mac_stats - 1] = EFX_MC_STATS_GENERATION_INVALID;
+
+ efx_mcdi_mac_stats(efx, EFX_STATS_ENABLE, 0);
+}
+
+void efx_mcdi_mac_stop_stats(struct efx_nic *efx)
+{
+ efx_mcdi_mac_stats(efx, EFX_STATS_DISABLE, 0);
+}
+
+#define EFX_MAC_STATS_WAIT_US 100
+#define EFX_MAC_STATS_WAIT_ATTEMPTS 10
+
+void efx_mcdi_mac_pull_stats(struct efx_nic *efx)
+{
+ __le64 *dma_stats = efx->stats_buffer.addr;
+ int attempts = EFX_MAC_STATS_WAIT_ATTEMPTS;
+
+ dma_stats[efx->num_mac_stats - 1] = EFX_MC_STATS_GENERATION_INVALID;
+ efx_mcdi_mac_stats(efx, EFX_STATS_PULL, 0);
+
+ while (dma_stats[efx->num_mac_stats - 1] ==
+ EFX_MC_STATS_GENERATION_INVALID &&
+ attempts-- != 0)
+ udelay(EFX_MAC_STATS_WAIT_US);
+}
+
+int efx_mcdi_mac_init_stats(struct efx_nic *efx)
+{
+ int rc;
+
+ if (!efx->num_mac_stats)
+ return 0;
+
+ /* Allocate buffer for stats */
+ rc = efx_nic_alloc_buffer(efx, &efx->stats_buffer,
+ efx->num_mac_stats * sizeof(u64), GFP_KERNEL);
+ if (rc) {
+ netif_warn(efx, probe, efx->net_dev,
+ "failed to allocate DMA buffer: %d\n", rc);
+ return rc;
+ }
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "stats buffer at %llx (virt %p phys %llx)\n",
+ (u64) efx->stats_buffer.dma_addr,
+ efx->stats_buffer.addr,
+ (u64) virt_to_phys(efx->stats_buffer.addr));
+
+ return 0;
+}
+
+void efx_mcdi_mac_fini_stats(struct efx_nic *efx)
+{
+ efx_nic_free_buffer(efx, &efx->stats_buffer);
+}
+
+/* Get physical port number (EF10 only; on Siena it is same as PF number) */
+int efx_mcdi_port_get_number(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ return rc;
+
+ return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT);
+}
+
+static unsigned int efx_mcdi_event_link_speed[] = {
+ [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100,
+ [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000,
+};
+
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
+{
+ u32 flags, fcntl, speed, lpa;
+
+ speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
+ EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
+ speed = efx_mcdi_event_link_speed[speed];
+
+ flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
+ fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL);
+ lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP);
+
+ /* efx->link_state is only modified by efx_mcdi_phy_get_link(),
+ * which is only run after flushing the event queues. Therefore, it
+ * is safe to modify the link state outside of the mac_lock here.
+ */
+ efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl);
+
+ efx_mcdi_phy_check_fcntl(efx, lpa);
+
+ efx_link_status_changed(efx);
+}
diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port_common.h b/drivers/net/ethernet/sfc/siena/mcdi_port_common.h
new file mode 100644
index 000000000000..ed31690e591c
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mcdi_port_common.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_PORT_COMMON_H
+#define EFX_MCDI_PORT_COMMON_H
+
+#include "net_driver.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+
+struct efx_mcdi_phy_data {
+ u32 flags;
+ u32 type;
+ u32 supported_cap;
+ u32 channel;
+ u32 port;
+ u32 stats_mask;
+ u8 name[20];
+ u32 media;
+ u32 mmd_mask;
+ u8 revision[20];
+ u32 forced_cap;
+};
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg);
+void efx_link_set_advertising(struct efx_nic *efx,
+ const unsigned long *advertising);
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+ u32 flags, u32 loopback_mode, u32 loopback_speed);
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes);
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset);
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset);
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx);
+u8 mcdi_to_ethtool_media(u32 media);
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+ struct efx_link_state *link_state,
+ u32 speed, u32 flags, u32 fcntl);
+u32 ethtool_fec_caps_to_mcdi(u32 supported_cap, u32 ethtool_cap);
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g);
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa);
+bool efx_mcdi_phy_poll(struct efx_nic *efx);
+int efx_mcdi_phy_probe(struct efx_nic *efx);
+void efx_mcdi_phy_remove(struct efx_nic *efx);
+void efx_mcdi_phy_get_link_ksettings(struct efx_nic *efx, struct ethtool_link_ksettings *cmd);
+int efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, const struct ethtool_link_ksettings *cmd);
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec);
+int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec);
+int efx_mcdi_phy_test_alive(struct efx_nic *efx);
+int efx_mcdi_port_reconfigure(struct efx_nic *efx);
+int efx_mcdi_phy_run_tests(struct efx_nic *efx, int *results, unsigned int flags);
+const char *efx_mcdi_phy_test_name(struct efx_nic *efx, unsigned int index);
+int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, struct ethtool_eeprom *ee, u8 *data);
+int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo);
+int efx_mcdi_set_mac(struct efx_nic *efx);
+int efx_mcdi_set_mtu(struct efx_nic *efx);
+int efx_mcdi_mac_init_stats(struct efx_nic *efx);
+void efx_mcdi_mac_fini_stats(struct efx_nic *efx);
+int efx_mcdi_port_get_number(struct efx_nic *efx);
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/siena/mtd.c b/drivers/net/ethernet/sfc/siena/mtd.c
new file mode 100644
index 000000000000..273c08e5455f
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/mtd.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+
+#include "net_driver.h"
+#include "efx.h"
+
+#define to_efx_mtd_partition(mtd) \
+ container_of(mtd, struct efx_mtd_partition, mtd)
+
+/* MTD interface */
+
+static int efx_mtd_erase(struct mtd_info *mtd, struct erase_info *erase)
+{
+ struct efx_nic *efx = mtd->priv;
+
+ return efx->type->mtd_erase(mtd, erase->addr, erase->len);
+}
+
+static void efx_mtd_sync(struct mtd_info *mtd)
+{
+ struct efx_mtd_partition *part = to_efx_mtd_partition(mtd);
+ struct efx_nic *efx = mtd->priv;
+ int rc;
+
+ rc = efx->type->mtd_sync(mtd);
+ if (rc)
+ pr_err("%s: %s sync failed (%d)\n",
+ part->name, part->dev_type_name, rc);
+}
+
+static void efx_mtd_remove_partition(struct efx_mtd_partition *part)
+{
+ int rc;
+
+ for (;;) {
+ rc = mtd_device_unregister(&part->mtd);
+ if (rc != -EBUSY)
+ break;
+ ssleep(1);
+ }
+ WARN_ON(rc);
+ list_del(&part->node);
+}
+
+int efx_mtd_add(struct efx_nic *efx, struct efx_mtd_partition *parts,
+ size_t n_parts, size_t sizeof_part)
+{
+ struct efx_mtd_partition *part;
+ size_t i;
+
+ for (i = 0; i < n_parts; i++) {
+ part = (struct efx_mtd_partition *)((char *)parts +
+ i * sizeof_part);
+
+ part->mtd.writesize = 1;
+
+ if (!(part->mtd.flags & MTD_NO_ERASE))
+ part->mtd.flags |= MTD_WRITEABLE;
+
+ part->mtd.owner = THIS_MODULE;
+ part->mtd.priv = efx;
+ part->mtd.name = part->name;
+ part->mtd._erase = efx_mtd_erase;
+ part->mtd._read = efx->type->mtd_read;
+ part->mtd._write = efx->type->mtd_write;
+ part->mtd._sync = efx_mtd_sync;
+
+ efx->type->mtd_rename(part);
+
+ if (mtd_device_register(&part->mtd, NULL, 0))
+ goto fail;
+
+ /* Add to list in order - efx_mtd_remove() depends on this */
+ list_add_tail(&part->node, &efx->mtd_list);
+ }
+
+ return 0;
+
+fail:
+ while (i--) {
+ part = (struct efx_mtd_partition *)((char *)parts +
+ i * sizeof_part);
+ efx_mtd_remove_partition(part);
+ }
+ /* Failure is unlikely here, but probably means we're out of memory */
+ return -ENOMEM;
+}
+
+void efx_mtd_remove(struct efx_nic *efx)
+{
+ struct efx_mtd_partition *parts, *part, *next;
+
+ WARN_ON(efx_dev_registered(efx));
+
+ if (list_empty(&efx->mtd_list))
+ return;
+
+ parts = list_first_entry(&efx->mtd_list, struct efx_mtd_partition,
+ node);
+
+ list_for_each_entry_safe(part, next, &efx->mtd_list, node)
+ efx_mtd_remove_partition(part);
+
+ kfree(parts);
+}
+
+void efx_mtd_rename(struct efx_nic *efx)
+{
+ struct efx_mtd_partition *part;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(part, &efx->mtd_list, node)
+ efx->type->mtd_rename(part);
+}
diff --git a/drivers/net/ethernet/sfc/siena/net_driver.h b/drivers/net/ethernet/sfc/siena/net_driver.h
new file mode 100644
index 000000000000..318db906a154
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/net_driver.h
@@ -0,0 +1,1716 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ */
+
+/* Common definitions for all Efx net driver code */
+
+#ifndef EFX_NET_DRIVER_H
+#define EFX_NET_DRIVER_H
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/timer.h>
+#include <linux/mdio.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/mtd/mtd.h>
+#include <net/busy_poll.h>
+#include <net/xdp.h>
+
+#include "enum.h"
+#include "bitfield.h"
+#include "filter.h"
+
+/**************************************************************************
+ *
+ * Build definitions
+ *
+ **************************************************************************/
+
+#ifdef DEBUG
+#define EFX_WARN_ON_ONCE_PARANOID(x) WARN_ON_ONCE(x)
+#define EFX_WARN_ON_PARANOID(x) WARN_ON(x)
+#else
+#define EFX_WARN_ON_ONCE_PARANOID(x) do {} while (0)
+#define EFX_WARN_ON_PARANOID(x) do {} while (0)
+#endif
+
+/**************************************************************************
+ *
+ * Efx data structures
+ *
+ **************************************************************************/
+
+#define EFX_MAX_CHANNELS 32U
+#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
+#define EFX_EXTRA_CHANNEL_IOV 0
+#define EFX_EXTRA_CHANNEL_PTP 1
+#define EFX_MAX_EXTRA_CHANNELS 2U
+
+/* Checksum generation is a per-queue option in hardware, so each
+ * queue visible to the networking core is backed by two hardware TX
+ * queues. */
+#define EFX_MAX_TX_TC 2
+#define EFX_MAX_CORE_TX_QUEUES (EFX_MAX_TX_TC * EFX_MAX_CHANNELS)
+#define EFX_TXQ_TYPE_OUTER_CSUM 1 /* Outer checksum offload */
+#define EFX_TXQ_TYPE_INNER_CSUM 2 /* Inner checksum offload */
+#define EFX_TXQ_TYPE_HIGHPRI 4 /* High-priority (for TC) */
+#define EFX_TXQ_TYPES 8
+/* HIGHPRI is Siena-only, and INNER_CSUM is EF10, so no need for both */
+#define EFX_MAX_TXQ_PER_CHANNEL 4
+#define EFX_MAX_TX_QUEUES (EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_CHANNELS)
+
+/* Maximum possible MTU the driver supports */
+#define EFX_MAX_MTU (9 * 1024)
+
+/* Minimum MTU, from RFC791 (IP) */
+#define EFX_MIN_MTU 68
+
+/* Maximum total header length for TSOv2 */
+#define EFX_TSO2_MAX_HDRLEN 208
+
+/* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page,
+ * and should be a multiple of the cache line size.
+ */
+#define EFX_RX_USR_BUF_SIZE (2048 - 256)
+
+/* If possible, we should ensure cache line alignment at start and end
+ * of every buffer. Otherwise, we just need to ensure 4-byte
+ * alignment of the network header.
+ */
+#if NET_IP_ALIGN == 0
+#define EFX_RX_BUF_ALIGNMENT L1_CACHE_BYTES
+#else
+#define EFX_RX_BUF_ALIGNMENT 4
+#endif
+
+/* Non-standard XDP_PACKET_HEADROOM and tailroom to satisfy XDP_REDIRECT and
+ * still fit two standard MTU size packets into a single 4K page.
+ */
+#define EFX_XDP_HEADROOM 128
+#define EFX_XDP_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+
+/* Forward declare Precision Time Protocol (PTP) support structure. */
+struct efx_ptp_data;
+struct hwtstamp_config;
+
+struct efx_self_tests;
+
+/**
+ * struct efx_buffer - A general-purpose DMA buffer
+ * @addr: host base address of the buffer
+ * @dma_addr: DMA base address of the buffer
+ * @len: Buffer length, in bytes
+ *
+ * The NIC uses these buffers for its interrupt status registers and
+ * MAC stats dumps.
+ */
+struct efx_buffer {
+ void *addr;
+ dma_addr_t dma_addr;
+ unsigned int len;
+};
+
+/**
+ * struct efx_special_buffer - DMA buffer entered into buffer table
+ * @buf: Standard &struct efx_buffer
+ * @index: Buffer index within controller;s buffer table
+ * @entries: Number of buffer table entries
+ *
+ * The NIC has a buffer table that maps buffers of size %EFX_BUF_SIZE.
+ * Event and descriptor rings are addressed via one or more buffer
+ * table entries (and so can be physically non-contiguous, although we
+ * currently do not take advantage of that). On Falcon and Siena we
+ * have to take care of allocating and initialising the entries
+ * ourselves. On later hardware this is managed by the firmware and
+ * @index and @entries are left as 0.
+ */
+struct efx_special_buffer {
+ struct efx_buffer buf;
+ unsigned int index;
+ unsigned int entries;
+};
+
+/**
+ * struct efx_tx_buffer - buffer state for a TX descriptor
+ * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
+ * freed when descriptor completes
+ * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data
+ * member is the associated buffer to drop a page reference on.
+ * @option: When @flags & %EFX_TX_BUF_OPTION, an EF10-specific option
+ * descriptor.
+ * @dma_addr: DMA address of the fragment.
+ * @flags: Flags for allocation and DMA mapping type
+ * @len: Length of this fragment.
+ * This field is zero when the queue slot is empty.
+ * @unmap_len: Length of this fragment to unmap
+ * @dma_offset: Offset of @dma_addr from the address of the backing DMA mapping.
+ * Only valid if @unmap_len != 0.
+ */
+struct efx_tx_buffer {
+ union {
+ const struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
+ union {
+ efx_qword_t option; /* EF10 */
+ dma_addr_t dma_addr;
+ };
+ unsigned short flags;
+ unsigned short len;
+ unsigned short unmap_len;
+ unsigned short dma_offset;
+};
+#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */
+#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */
+#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */
+#define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */
+#define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */
+#define EFX_TX_BUF_TSO_V3 0x40 /* empty buffer for a TSO_V3 descriptor */
+
+/**
+ * struct efx_tx_queue - An Efx TX queue
+ *
+ * This is a ring buffer of TX fragments.
+ * Since the TX completion path always executes on the same
+ * CPU and the xmit path can operate on different CPUs,
+ * performance is increased by ensuring that the completion
+ * path and the xmit path operate on different cache lines.
+ * This is particularly important if the xmit path is always
+ * executing on one CPU which is different from the completion
+ * path. There is also a cache line for members which are
+ * read but not written on the fast path.
+ *
+ * @efx: The associated Efx NIC
+ * @queue: DMA queue number
+ * @label: Label for TX completion events.
+ * Is our index within @channel->tx_queue array.
+ * @type: configuration type of this TX queue. A bitmask of %EFX_TXQ_TYPE_* flags.
+ * @tso_version: Version of TSO in use for this queue.
+ * @tso_encap: Is encapsulated TSO supported? Supported in TSOv2 on 8000 series.
+ * @channel: The associated channel
+ * @core_txq: The networking core TX queue structure
+ * @buffer: The software buffer ring
+ * @cb_page: Array of pages of copy buffers. Carved up according to
+ * %EFX_TX_CB_ORDER into %EFX_TX_CB_SIZE-sized chunks.
+ * @txd: The hardware descriptor ring
+ * @ptr_mask: The size of the ring minus 1.
+ * @piobuf: PIO buffer region for this TX queue (shared with its partner).
+ * Size of the region is efx_piobuf_size.
+ * @piobuf_offset: Buffer offset to be specified in PIO descriptors
+ * @initialised: Has hardware queue been initialised?
+ * @timestamping: Is timestamping enabled for this channel?
+ * @xdp_tx: Is this an XDP tx queue?
+ * @read_count: Current read pointer.
+ * This is the number of buffers that have been removed from both rings.
+ * @old_write_count: The value of @write_count when last checked.
+ * This is here for performance reasons. The xmit path will
+ * only get the up-to-date value of @write_count if this
+ * variable indicates that the queue is empty. This is to
+ * avoid cache-line ping-pong between the xmit path and the
+ * completion path.
+ * @merge_events: Number of TX merged completion events
+ * @completed_timestamp_major: Top part of the most recent tx timestamp.
+ * @completed_timestamp_minor: Low part of the most recent tx timestamp.
+ * @insert_count: Current insert pointer
+ * This is the number of buffers that have been added to the
+ * software ring.
+ * @write_count: Current write pointer
+ * This is the number of buffers that have been added to the
+ * hardware ring.
+ * @packet_write_count: Completable write pointer
+ * This is the write pointer of the last packet written.
+ * Normally this will equal @write_count, but as option descriptors
+ * don't produce completion events, they won't update this.
+ * Filled in iff @efx->type->option_descriptors; only used for PIO.
+ * Thus, this is written and used on EF10, and neither on farch.
+ * @old_read_count: The value of read_count when last checked.
+ * This is here for performance reasons. The xmit path will
+ * only get the up-to-date value of read_count if this
+ * variable indicates that the queue is full. This is to
+ * avoid cache-line ping-pong between the xmit path and the
+ * completion path.
+ * @tso_bursts: Number of times TSO xmit invoked by kernel
+ * @tso_long_headers: Number of packets with headers too long for standard
+ * blocks
+ * @tso_packets: Number of packets via the TSO xmit path
+ * @tso_fallbacks: Number of times TSO fallback used
+ * @pushes: Number of times the TX push feature has been used
+ * @pio_packets: Number of times the TX PIO feature has been used
+ * @xmit_pending: Are any packets waiting to be pushed to the NIC
+ * @cb_packets: Number of times the TX copybreak feature has been used
+ * @notify_count: Count of notified descriptors to the NIC
+ * @empty_read_count: If the completion path has seen the queue as empty
+ * and the transmission path has not yet checked this, the value of
+ * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0.
+ */
+struct efx_tx_queue {
+ /* Members which don't change on the fast path */
+ struct efx_nic *efx ____cacheline_aligned_in_smp;
+ unsigned int queue;
+ unsigned int label;
+ unsigned int type;
+ unsigned int tso_version;
+ bool tso_encap;
+ struct efx_channel *channel;
+ struct netdev_queue *core_txq;
+ struct efx_tx_buffer *buffer;
+ struct efx_buffer *cb_page;
+ struct efx_special_buffer txd;
+ unsigned int ptr_mask;
+ void __iomem *piobuf;
+ unsigned int piobuf_offset;
+ bool initialised;
+ bool timestamping;
+ bool xdp_tx;
+
+ /* Members used mainly on the completion path */
+ unsigned int read_count ____cacheline_aligned_in_smp;
+ unsigned int old_write_count;
+ unsigned int merge_events;
+ unsigned int bytes_compl;
+ unsigned int pkts_compl;
+ u32 completed_timestamp_major;
+ u32 completed_timestamp_minor;
+
+ /* Members used only on the xmit path */
+ unsigned int insert_count ____cacheline_aligned_in_smp;
+ unsigned int write_count;
+ unsigned int packet_write_count;
+ unsigned int old_read_count;
+ unsigned int tso_bursts;
+ unsigned int tso_long_headers;
+ unsigned int tso_packets;
+ unsigned int tso_fallbacks;
+ unsigned int pushes;
+ unsigned int pio_packets;
+ bool xmit_pending;
+ unsigned int cb_packets;
+ unsigned int notify_count;
+ /* Statistics to supplement MAC stats */
+ unsigned long tx_packets;
+
+ /* Members shared between paths and sometimes updated */
+ unsigned int empty_read_count ____cacheline_aligned_in_smp;
+#define EFX_EMPTY_COUNT_VALID 0x80000000
+ atomic_t flush_outstanding;
+};
+
+#define EFX_TX_CB_ORDER 7
+#define EFX_TX_CB_SIZE (1 << EFX_TX_CB_ORDER) - NET_IP_ALIGN
+
+/**
+ * struct efx_rx_buffer - An Efx RX data buffer
+ * @dma_addr: DMA base address of the buffer
+ * @page: The associated page buffer.
+ * Will be %NULL if the buffer slot is currently free.
+ * @page_offset: If pending: offset in @page of DMA base address.
+ * If completed: offset in @page of Ethernet header.
+ * @len: If pending: length for DMA descriptor.
+ * If completed: received length, excluding hash prefix.
+ * @flags: Flags for buffer and packet state. These are only set on the
+ * first buffer of a scattered packet.
+ */
+struct efx_rx_buffer {
+ dma_addr_t dma_addr;
+ struct page *page;
+ u16 page_offset;
+ u16 len;
+ u16 flags;
+};
+#define EFX_RX_BUF_LAST_IN_PAGE 0x0001
+#define EFX_RX_PKT_CSUMMED 0x0002
+#define EFX_RX_PKT_DISCARD 0x0004
+#define EFX_RX_PKT_TCP 0x0040
+#define EFX_RX_PKT_PREFIX_LEN 0x0080 /* length is in prefix only */
+#define EFX_RX_PKT_CSUM_LEVEL 0x0200
+
+/**
+ * struct efx_rx_page_state - Page-based rx buffer state
+ *
+ * Inserted at the start of every page allocated for receive buffers.
+ * Used to facilitate sharing dma mappings between recycled rx buffers
+ * and those passed up to the kernel.
+ *
+ * @dma_addr: The dma address of this page.
+ */
+struct efx_rx_page_state {
+ dma_addr_t dma_addr;
+
+ unsigned int __pad[] ____cacheline_aligned;
+};
+
+/**
+ * struct efx_rx_queue - An Efx RX queue
+ * @efx: The associated Efx NIC
+ * @core_index: Index of network core RX queue. Will be >= 0 iff this
+ * is associated with a real RX queue.
+ * @buffer: The software buffer ring
+ * @rxd: The hardware descriptor ring
+ * @ptr_mask: The size of the ring minus 1.
+ * @refill_enabled: Enable refill whenever fill level is low
+ * @flush_pending: Set when a RX flush is pending. Has the same lifetime as
+ * @rxq_flush_pending.
+ * @added_count: Number of buffers added to the receive queue.
+ * @notified_count: Number of buffers given to NIC (<= @added_count).
+ * @removed_count: Number of buffers removed from the receive queue.
+ * @scatter_n: Used by NIC specific receive code.
+ * @scatter_len: Used by NIC specific receive code.
+ * @page_ring: The ring to store DMA mapped pages for reuse.
+ * @page_add: Counter to calculate the write pointer for the recycle ring.
+ * @page_remove: Counter to calculate the read pointer for the recycle ring.
+ * @page_recycle_count: The number of pages that have been recycled.
+ * @page_recycle_failed: The number of pages that couldn't be recycled because
+ * the kernel still held a reference to them.
+ * @page_recycle_full: The number of pages that were released because the
+ * recycle ring was full.
+ * @page_ptr_mask: The number of pages in the RX recycle ring minus 1.
+ * @max_fill: RX descriptor maximum fill level (<= ring size)
+ * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
+ * (<= @max_fill)
+ * @min_fill: RX descriptor minimum non-zero fill level.
+ * This records the minimum fill level observed when a ring
+ * refill was triggered.
+ * @recycle_count: RX buffer recycle counter.
+ * @slow_fill: Timer used to defer efx_nic_generate_fill_event().
+ * @xdp_rxq_info: XDP specific RX queue information.
+ * @xdp_rxq_info_valid: Is xdp_rxq_info valid data?.
+ */
+struct efx_rx_queue {
+ struct efx_nic *efx;
+ int core_index;
+ struct efx_rx_buffer *buffer;
+ struct efx_special_buffer rxd;
+ unsigned int ptr_mask;
+ bool refill_enabled;
+ bool flush_pending;
+
+ unsigned int added_count;
+ unsigned int notified_count;
+ unsigned int removed_count;
+ unsigned int scatter_n;
+ unsigned int scatter_len;
+ struct page **page_ring;
+ unsigned int page_add;
+ unsigned int page_remove;
+ unsigned int page_recycle_count;
+ unsigned int page_recycle_failed;
+ unsigned int page_recycle_full;
+ unsigned int page_ptr_mask;
+ unsigned int max_fill;
+ unsigned int fast_fill_trigger;
+ unsigned int min_fill;
+ unsigned int min_overfill;
+ unsigned int recycle_count;
+ struct timer_list slow_fill;
+ unsigned int slow_fill_count;
+ /* Statistics to supplement MAC stats */
+ unsigned long rx_packets;
+ struct xdp_rxq_info xdp_rxq_info;
+ bool xdp_rxq_info_valid;
+};
+
+enum efx_sync_events_state {
+ SYNC_EVENTS_DISABLED = 0,
+ SYNC_EVENTS_QUIESCENT,
+ SYNC_EVENTS_REQUESTED,
+ SYNC_EVENTS_VALID,
+};
+
+/**
+ * struct efx_channel - An Efx channel
+ *
+ * A channel comprises an event queue, at least one TX queue, at least
+ * one RX queue, and an associated tasklet for processing the event
+ * queue.
+ *
+ * @efx: Associated Efx NIC
+ * @channel: Channel instance number
+ * @type: Channel type definition
+ * @eventq_init: Event queue initialised flag
+ * @enabled: Channel enabled indicator
+ * @irq: IRQ number (MSI and MSI-X only)
+ * @irq_moderation_us: IRQ moderation value (in microseconds)
+ * @napi_dev: Net device used with NAPI
+ * @napi_str: NAPI control structure
+ * @state: state for NAPI vs busy polling
+ * @state_lock: lock protecting @state
+ * @eventq: Event queue buffer
+ * @eventq_mask: Event queue pointer mask
+ * @eventq_read_ptr: Event queue read pointer
+ * @event_test_cpu: Last CPU to handle interrupt or test event for this channel
+ * @irq_count: Number of IRQs since last adaptive moderation decision
+ * @irq_mod_score: IRQ moderation score
+ * @rfs_filter_count: number of accelerated RFS filters currently in place;
+ * equals the count of @rps_flow_id slots filled
+ * @rfs_last_expiry: value of jiffies last time some accelerated RFS filters
+ * were checked for expiry
+ * @rfs_expire_index: next accelerated RFS filter ID to check for expiry
+ * @n_rfs_succeeded: number of successful accelerated RFS filter insertions
+ * @n_rfs_failed: number of failed accelerated RFS filter insertions
+ * @filter_work: Work item for efx_filter_rfs_expire()
+ * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
+ * indexed by filter ID
+ * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
+ * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors
+ * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors
+ * @n_rx_mcast_mismatch: Count of unmatched multicast frames
+ * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
+ * @n_rx_overlength: Count of RX_OVERLENGTH errors
+ * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
+ * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to
+ * lack of descriptors
+ * @n_rx_merge_events: Number of RX merged completion events
+ * @n_rx_merge_packets: Number of RX packets completed by merged events
+ * @n_rx_xdp_drops: Count of RX packets intentionally dropped due to XDP
+ * @n_rx_xdp_bad_drops: Count of RX packets dropped due to XDP errors
+ * @n_rx_xdp_tx: Count of RX packets retransmitted due to XDP
+ * @n_rx_xdp_redirect: Count of RX packets redirected to a different NIC by XDP
+ * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
+ * __efx_rx_packet(), or zero if there is none
+ * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
+ * by __efx_rx_packet(), if @rx_pkt_n_frags != 0
+ * @rx_list: list of SKBs from current RX, awaiting processing
+ * @rx_queue: RX queue for this channel
+ * @tx_queue: TX queues for this channel
+ * @tx_queue_by_type: pointers into @tx_queue, or %NULL, indexed by txq type
+ * @sync_events_state: Current state of sync events on this channel
+ * @sync_timestamp_major: Major part of the last ptp sync event
+ * @sync_timestamp_minor: Minor part of the last ptp sync event
+ */
+struct efx_channel {
+ struct efx_nic *efx;
+ int channel;
+ const struct efx_channel_type *type;
+ bool eventq_init;
+ bool enabled;
+ int irq;
+ unsigned int irq_moderation_us;
+ struct net_device *napi_dev;
+ struct napi_struct napi_str;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ unsigned long busy_poll_state;
+#endif
+ struct efx_special_buffer eventq;
+ unsigned int eventq_mask;
+ unsigned int eventq_read_ptr;
+ int event_test_cpu;
+
+ unsigned int irq_count;
+ unsigned int irq_mod_score;
+#ifdef CONFIG_RFS_ACCEL
+ unsigned int rfs_filter_count;
+ unsigned int rfs_last_expiry;
+ unsigned int rfs_expire_index;
+ unsigned int n_rfs_succeeded;
+ unsigned int n_rfs_failed;
+ struct delayed_work filter_work;
+#define RPS_FLOW_ID_INVALID 0xFFFFFFFF
+ u32 *rps_flow_id;
+#endif
+
+ unsigned int n_rx_tobe_disc;
+ unsigned int n_rx_ip_hdr_chksum_err;
+ unsigned int n_rx_tcp_udp_chksum_err;
+ unsigned int n_rx_outer_ip_hdr_chksum_err;
+ unsigned int n_rx_outer_tcp_udp_chksum_err;
+ unsigned int n_rx_inner_ip_hdr_chksum_err;
+ unsigned int n_rx_inner_tcp_udp_chksum_err;
+ unsigned int n_rx_eth_crc_err;
+ unsigned int n_rx_mcast_mismatch;
+ unsigned int n_rx_frm_trunc;
+ unsigned int n_rx_overlength;
+ unsigned int n_skbuff_leaks;
+ unsigned int n_rx_nodesc_trunc;
+ unsigned int n_rx_merge_events;
+ unsigned int n_rx_merge_packets;
+ unsigned int n_rx_xdp_drops;
+ unsigned int n_rx_xdp_bad_drops;
+ unsigned int n_rx_xdp_tx;
+ unsigned int n_rx_xdp_redirect;
+
+ unsigned int rx_pkt_n_frags;
+ unsigned int rx_pkt_index;
+
+ struct list_head *rx_list;
+
+ struct efx_rx_queue rx_queue;
+ struct efx_tx_queue tx_queue[EFX_MAX_TXQ_PER_CHANNEL];
+ struct efx_tx_queue *tx_queue_by_type[EFX_TXQ_TYPES];
+
+ enum efx_sync_events_state sync_events_state;
+ u32 sync_timestamp_major;
+ u32 sync_timestamp_minor;
+};
+
+/**
+ * struct efx_msi_context - Context for each MSI
+ * @efx: The associated NIC
+ * @index: Index of the channel/IRQ
+ * @name: Name of the channel/IRQ
+ *
+ * Unlike &struct efx_channel, this is never reallocated and is always
+ * safe for the IRQ handler to access.
+ */
+struct efx_msi_context {
+ struct efx_nic *efx;
+ unsigned int index;
+ char name[IFNAMSIZ + 6];
+};
+
+/**
+ * struct efx_channel_type - distinguishes traffic and extra channels
+ * @handle_no_channel: Handle failure to allocate an extra channel
+ * @pre_probe: Set up extra state prior to initialisation
+ * @post_remove: Tear down extra state after finalisation, if allocated.
+ * May be called on channels that have not been probed.
+ * @get_name: Generate the channel's name (used for its IRQ handler)
+ * @copy: Copy the channel state prior to reallocation. May be %NULL if
+ * reallocation is not supported.
+ * @receive_skb: Handle an skb ready to be passed to netif_receive_skb()
+ * @want_txqs: Determine whether this channel should have TX queues
+ * created. If %NULL, TX queues are not created.
+ * @keep_eventq: Flag for whether event queue should be kept initialised
+ * while the device is stopped
+ * @want_pio: Flag for whether PIO buffers should be linked to this
+ * channel's TX queues.
+ */
+struct efx_channel_type {
+ void (*handle_no_channel)(struct efx_nic *);
+ int (*pre_probe)(struct efx_channel *);
+ void (*post_remove)(struct efx_channel *);
+ void (*get_name)(struct efx_channel *, char *buf, size_t len);
+ struct efx_channel *(*copy)(const struct efx_channel *);
+ bool (*receive_skb)(struct efx_channel *, struct sk_buff *);
+ bool (*want_txqs)(struct efx_channel *);
+ bool keep_eventq;
+ bool want_pio;
+};
+
+enum efx_led_mode {
+ EFX_LED_OFF = 0,
+ EFX_LED_ON = 1,
+ EFX_LED_DEFAULT = 2
+};
+
+#define STRING_TABLE_LOOKUP(val, member) \
+ ((val) < member ## _max) ? member ## _names[val] : "(invalid)"
+
+extern const char *const efx_loopback_mode_names[];
+extern const unsigned int efx_loopback_mode_max;
+#define LOOPBACK_MODE(efx) \
+ STRING_TABLE_LOOKUP((efx)->loopback_mode, efx_loopback_mode)
+
+enum efx_int_mode {
+ /* Be careful if altering to correct macro below */
+ EFX_INT_MODE_MSIX = 0,
+ EFX_INT_MODE_MSI = 1,
+ EFX_INT_MODE_LEGACY = 2,
+ EFX_INT_MODE_MAX /* Insert any new items before this */
+};
+#define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI)
+
+enum nic_state {
+ STATE_UNINIT = 0, /* device being probed/removed or is frozen */
+ STATE_READY = 1, /* hardware ready and netdev registered */
+ STATE_DISABLED = 2, /* device disabled due to hardware errors */
+ STATE_RECOVERY = 3, /* device recovering from PCI error */
+};
+
+/* Forward declaration */
+struct efx_nic;
+
+/* Pseudo bit-mask flow control field */
+#define EFX_FC_RX FLOW_CTRL_RX
+#define EFX_FC_TX FLOW_CTRL_TX
+#define EFX_FC_AUTO 4
+
+/**
+ * struct efx_link_state - Current state of the link
+ * @up: Link is up
+ * @fd: Link is full-duplex
+ * @fc: Actual flow control flags
+ * @speed: Link speed (Mbps)
+ */
+struct efx_link_state {
+ bool up;
+ bool fd;
+ u8 fc;
+ unsigned int speed;
+};
+
+static inline bool efx_link_state_equal(const struct efx_link_state *left,
+ const struct efx_link_state *right)
+{
+ return left->up == right->up && left->fd == right->fd &&
+ left->fc == right->fc && left->speed == right->speed;
+}
+
+/**
+ * enum efx_phy_mode - PHY operating mode flags
+ * @PHY_MODE_NORMAL: on and should pass traffic
+ * @PHY_MODE_TX_DISABLED: on with TX disabled
+ * @PHY_MODE_LOW_POWER: set to low power through MDIO
+ * @PHY_MODE_OFF: switched off through external control
+ * @PHY_MODE_SPECIAL: on but will not pass traffic
+ */
+enum efx_phy_mode {
+ PHY_MODE_NORMAL = 0,
+ PHY_MODE_TX_DISABLED = 1,
+ PHY_MODE_LOW_POWER = 2,
+ PHY_MODE_OFF = 4,
+ PHY_MODE_SPECIAL = 8,
+};
+
+static inline bool efx_phy_mode_disabled(enum efx_phy_mode mode)
+{
+ return !!(mode & ~PHY_MODE_TX_DISABLED);
+}
+
+/**
+ * struct efx_hw_stat_desc - Description of a hardware statistic
+ * @name: Name of the statistic as visible through ethtool, or %NULL if
+ * it should not be exposed
+ * @dma_width: Width in bits (0 for non-DMA statistics)
+ * @offset: Offset within stats (ignored for non-DMA statistics)
+ */
+struct efx_hw_stat_desc {
+ const char *name;
+ u16 dma_width;
+ u16 offset;
+};
+
+/* Number of bits used in a multicast filter hash address */
+#define EFX_MCAST_HASH_BITS 8
+
+/* Number of (single-bit) entries in a multicast filter hash */
+#define EFX_MCAST_HASH_ENTRIES (1 << EFX_MCAST_HASH_BITS)
+
+/* An Efx multicast filter hash */
+union efx_multicast_hash {
+ u8 byte[EFX_MCAST_HASH_ENTRIES / 8];
+ efx_oword_t oword[EFX_MCAST_HASH_ENTRIES / sizeof(efx_oword_t) / 8];
+};
+
+struct vfdi_status;
+
+/* The reserved RSS context value */
+#define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff
+/**
+ * struct efx_rss_context - A user-defined RSS context for filtering
+ * @list: node of linked list on which this struct is stored
+ * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
+ * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID.
+ * @user_id: the rss_context ID exposed to userspace over ethtool.
+ * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rx_hash_key: Toeplitz hash key for this RSS context
+ * @indir_table: Indirection table for this RSS context
+ */
+struct efx_rss_context {
+ struct list_head list;
+ u32 context_id;
+ u32 user_id;
+ bool rx_hash_udp_4tuple;
+ u8 rx_hash_key[40];
+ u32 rx_indir_table[128];
+};
+
+#ifdef CONFIG_RFS_ACCEL
+/* Order of these is important, since filter_id >= %EFX_ARFS_FILTER_ID_PENDING
+ * is used to test if filter does or will exist.
+ */
+#define EFX_ARFS_FILTER_ID_PENDING -1
+#define EFX_ARFS_FILTER_ID_ERROR -2
+#define EFX_ARFS_FILTER_ID_REMOVING -3
+/**
+ * struct efx_arfs_rule - record of an ARFS filter and its IDs
+ * @node: linkage into hash table
+ * @spec: details of the filter (used as key for hash table). Use efx->type to
+ * determine which member to use.
+ * @rxq_index: channel to which the filter will steer traffic.
+ * @arfs_id: filter ID which was returned to ARFS
+ * @filter_id: index in software filter table. May be
+ * %EFX_ARFS_FILTER_ID_PENDING if filter was not inserted yet,
+ * %EFX_ARFS_FILTER_ID_ERROR if filter insertion failed, or
+ * %EFX_ARFS_FILTER_ID_REMOVING if expiry is currently removing the filter.
+ */
+struct efx_arfs_rule {
+ struct hlist_node node;
+ struct efx_filter_spec spec;
+ u16 rxq_index;
+ u16 arfs_id;
+ s32 filter_id;
+};
+
+/* Size chosen so that the table is one page (4kB) */
+#define EFX_ARFS_HASH_TABLE_SIZE 512
+
+/**
+ * struct efx_async_filter_insertion - Request to asynchronously insert a filter
+ * @net_dev: Reference to the netdevice
+ * @spec: The filter to insert
+ * @work: Workitem for this request
+ * @rxq_index: Identifies the channel for which this request was made
+ * @flow_id: Identifies the kernel-side flow for which this request was made
+ */
+struct efx_async_filter_insertion {
+ struct net_device *net_dev;
+ struct efx_filter_spec spec;
+ struct work_struct work;
+ u16 rxq_index;
+ u32 flow_id;
+};
+
+/* Maximum number of ARFS workitems that may be in flight on an efx_nic */
+#define EFX_RPS_MAX_IN_FLIGHT 8
+#endif /* CONFIG_RFS_ACCEL */
+
+enum efx_xdp_tx_queues_mode {
+ EFX_XDP_TX_QUEUES_DEDICATED, /* one queue per core, locking not needed */
+ EFX_XDP_TX_QUEUES_SHARED, /* each queue used by more than 1 core */
+ EFX_XDP_TX_QUEUES_BORROWED /* queues borrowed from net stack */
+};
+
+/**
+ * struct efx_nic - an Efx NIC
+ * @name: Device name (net device name or bus id before net device registered)
+ * @pci_dev: The PCI device
+ * @node: List node for maintaning primary/secondary function lists
+ * @primary: &struct efx_nic instance for the primary function of this
+ * controller. May be the same structure, and may be %NULL if no
+ * primary function is bound. Serialised by rtnl_lock.
+ * @secondary_list: List of &struct efx_nic instances for the secondary PCI
+ * functions of the controller, if this is for the primary function.
+ * Serialised by rtnl_lock.
+ * @type: Controller type attributes
+ * @legacy_irq: IRQ number
+ * @workqueue: Workqueue for port reconfigures and the HW monitor.
+ * Work items do not hold and must not acquire RTNL.
+ * @workqueue_name: Name of workqueue
+ * @reset_work: Scheduled reset workitem
+ * @membase_phys: Memory BAR value as physical address
+ * @membase: Memory BAR value
+ * @vi_stride: step between per-VI registers / memory regions
+ * @interrupt_mode: Interrupt mode
+ * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds
+ * @timer_max_ns: Interrupt timer maximum value, in nanoseconds
+ * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
+ * @irqs_hooked: Channel interrupts are hooked
+ * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues
+ * @irq_rx_moderation_us: IRQ moderation time for RX event queues
+ * @msg_enable: Log message enable flags
+ * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
+ * @reset_pending: Bitmask for pending resets
+ * @tx_queue: TX DMA queues
+ * @rx_queue: RX DMA queues
+ * @channel: Channels
+ * @msi_context: Context for each MSI
+ * @extra_channel_types: Types of extra (non-traffic) channels that
+ * should be allocated for this NIC
+ * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues.
+ * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit.
+ * @xdp_txq_queues_mode: XDP TX queues sharing strategy.
+ * @rxq_entries: Size of receive queues requested by user.
+ * @txq_entries: Size of transmit queues requested by user.
+ * @txq_stop_thresh: TX queue fill level at or above which we stop it.
+ * @txq_wake_thresh: TX queue fill level at or below which we wake it.
+ * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches
+ * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches
+ * @sram_lim_qw: Qword address limit of SRAM
+ * @next_buffer_table: First available buffer table id
+ * @n_channels: Number of channels in use
+ * @n_rx_channels: Number of channels used for RX (= number of RX queues)
+ * @n_tx_channels: Number of channels used for TX
+ * @n_extra_tx_channels: Number of extra channels with TX queues
+ * @tx_queues_per_channel: number of TX queues probed on each channel
+ * @n_xdp_channels: Number of channels used for XDP TX
+ * @xdp_channel_offset: Offset of zeroth channel used for XPD TX.
+ * @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel.
+ * @rx_ip_align: RX DMA address offset to have IP header aligned in
+ * in accordance with NET_IP_ALIGN
+ * @rx_dma_len: Current maximum RX DMA length
+ * @rx_buffer_order: Order (log2) of number of pages for each RX buffer
+ * @rx_buffer_truesize: Amortised allocation size of an RX buffer,
+ * for use in sk_buff::truesize
+ * @rx_prefix_size: Size of RX prefix before packet data
+ * @rx_packet_hash_offset: Offset of RX flow hash from start of packet data
+ * (valid only if @rx_prefix_size != 0; always negative)
+ * @rx_packet_len_offset: Offset of RX packet length from start of packet data
+ * (valid only for NICs that set %EFX_RX_PKT_PREFIX_LEN; always negative)
+ * @rx_packet_ts_offset: Offset of timestamp from start of packet data
+ * (valid only if channel->sync_timestamps_enabled; always negative)
+ * @rx_scatter: Scatter mode enabled for receives
+ * @rss_context: Main RSS context. Its @list member is the head of the list of
+ * RSS contexts created by user requests
+ * @rss_lock: Protects custom RSS context software state in @rss_context.list
+ * @vport_id: The function's vport ID, only relevant for PFs
+ * @int_error_count: Number of internal errors seen recently
+ * @int_error_expire: Time at which error count will be expired
+ * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot
+ * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
+ * acknowledge but do nothing else.
+ * @irq_status: Interrupt status buffer
+ * @irq_zero_count: Number of legacy IRQs seen with queue flags == 0
+ * @irq_level: IRQ level/index for IRQs not triggered by an event queue
+ * @selftest_work: Work item for asynchronous self-test
+ * @mtd_list: List of MTDs attached to the NIC
+ * @nic_data: Hardware dependent state
+ * @mcdi: Management-Controller-to-Driver Interface state
+ * @mac_lock: MAC access lock. Protects @port_enabled, @phy_mode,
+ * efx_monitor() and efx_reconfigure_port()
+ * @port_enabled: Port enabled indicator.
+ * Serialises efx_stop_all(), efx_start_all(), efx_monitor() and
+ * efx_mac_work() with kernel interfaces. Safe to read under any
+ * one of the rtnl_lock, mac_lock, or netif_tx_lock, but all three must
+ * be held to modify it.
+ * @port_initialized: Port initialized?
+ * @net_dev: Operating system network device. Consider holding the rtnl lock
+ * @fixed_features: Features which cannot be turned off
+ * @num_mac_stats: Number of MAC stats reported by firmware (MAC_STATS_NUM_STATS
+ * field of %MC_CMD_GET_CAPABILITIES_V4 response, or %MC_CMD_MAC_NSTATS)
+ * @stats_buffer: DMA buffer for statistics
+ * @phy_type: PHY type
+ * @phy_data: PHY private data (including PHY-specific stats)
+ * @mdio: PHY MDIO interface
+ * @mdio_bus: PHY MDIO bus ID (only used by Siena)
+ * @phy_mode: PHY operating mode. Serialised by @mac_lock.
+ * @link_advertising: Autonegotiation advertising flags
+ * @fec_config: Forward Error Correction configuration flags. For bit positions
+ * see &enum ethtool_fec_config_bits.
+ * @link_state: Current state of the link
+ * @n_link_state_changes: Number of times the link has changed state
+ * @unicast_filter: Flag for Falcon-arch simple unicast filter.
+ * Protected by @mac_lock.
+ * @multicast_hash: Multicast hash table for Falcon-arch.
+ * Protected by @mac_lock.
+ * @wanted_fc: Wanted flow control flags
+ * @fc_disable: When non-zero flow control is disabled. Typically used to
+ * ensure that network back pressure doesn't delay dma queue flushes.
+ * Serialised by the rtnl lock.
+ * @mac_work: Work item for changing MAC promiscuity and multicast hash
+ * @loopback_mode: Loopback status
+ * @loopback_modes: Supported loopback mode bitmask
+ * @loopback_selftest: Offline self-test private state
+ * @xdp_prog: Current XDP programme for this interface
+ * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
+ * @filter_state: Architecture-dependent filter table state
+ * @rps_mutex: Protects RPS state of all channels
+ * @rps_slot_map: bitmap of in-flight entries in @rps_slot
+ * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work()
+ * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and
+ * @rps_next_id).
+ * @rps_hash_table: Mapping between ARFS filters and their various IDs
+ * @rps_next_id: next arfs_id for an ARFS filter
+ * @active_queues: Count of RX and TX queues that haven't been flushed and drained.
+ * @rxq_flush_pending: Count of number of receive queues that need to be flushed.
+ * Decremented when the efx_flush_rx_queue() is called.
+ * @rxq_flush_outstanding: Count of number of RX flushes started but not yet
+ * completed (either success or failure). Not used when MCDI is used to
+ * flush receive queues.
+ * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions.
+ * @vf_count: Number of VFs intended to be enabled.
+ * @vf_init_count: Number of VFs that have been fully initialised.
+ * @vi_scale: log2 number of vnics per VF.
+ * @ptp_data: PTP state data
+ * @ptp_warned: has this NIC seen and warned about unexpected PTP events?
+ * @vpd_sn: Serial number read from VPD
+ * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
+ * xdp_rxq_info structures?
+ * @netdev_notifier: Netdevice notifier.
+ * @mem_bar: The BAR that is mapped into membase.
+ * @reg_base: Offset from the start of the bar to the function control window.
+ * @monitor_work: Hardware monitor workitem
+ * @biu_lock: BIU (bus interface unit) lock
+ * @last_irq_cpu: Last CPU to handle a possible test interrupt. This
+ * field is used by efx_test_interrupts() to verify that an
+ * interrupt has occurred.
+ * @stats_lock: Statistics update lock. Must be held when calling
+ * efx_nic_type::{update,start,stop}_stats.
+ * @n_rx_noskb_drops: Count of RX packets dropped due to failure to allocate an skb
+ *
+ * This is stored in the private area of the &struct net_device.
+ */
+struct efx_nic {
+ /* The following fields should be written very rarely */
+
+ char name[IFNAMSIZ];
+ struct list_head node;
+ struct efx_nic *primary;
+ struct list_head secondary_list;
+ struct pci_dev *pci_dev;
+ unsigned int port_num;
+ const struct efx_nic_type *type;
+ int legacy_irq;
+ bool eeh_disabled_legacy_irq;
+ struct workqueue_struct *workqueue;
+ char workqueue_name[16];
+ struct work_struct reset_work;
+ resource_size_t membase_phys;
+ void __iomem *membase;
+
+ unsigned int vi_stride;
+
+ enum efx_int_mode interrupt_mode;
+ unsigned int timer_quantum_ns;
+ unsigned int timer_max_ns;
+ bool irq_rx_adaptive;
+ bool irqs_hooked;
+ unsigned int irq_mod_step_us;
+ unsigned int irq_rx_moderation_us;
+ u32 msg_enable;
+
+ enum nic_state state;
+ unsigned long reset_pending;
+
+ struct efx_channel *channel[EFX_MAX_CHANNELS];
+ struct efx_msi_context msi_context[EFX_MAX_CHANNELS];
+ const struct efx_channel_type *
+ extra_channel_type[EFX_MAX_EXTRA_CHANNELS];
+
+ unsigned int xdp_tx_queue_count;
+ struct efx_tx_queue **xdp_tx_queues;
+ enum efx_xdp_tx_queues_mode xdp_txq_queues_mode;
+
+ unsigned rxq_entries;
+ unsigned txq_entries;
+ unsigned int txq_stop_thresh;
+ unsigned int txq_wake_thresh;
+
+ unsigned tx_dc_base;
+ unsigned rx_dc_base;
+ unsigned sram_lim_qw;
+ unsigned next_buffer_table;
+
+ unsigned int max_channels;
+ unsigned int max_vis;
+ unsigned int max_tx_channels;
+ unsigned n_channels;
+ unsigned n_rx_channels;
+ unsigned rss_spread;
+ unsigned tx_channel_offset;
+ unsigned n_tx_channels;
+ unsigned n_extra_tx_channels;
+ unsigned int tx_queues_per_channel;
+ unsigned int n_xdp_channels;
+ unsigned int xdp_channel_offset;
+ unsigned int xdp_tx_per_channel;
+ unsigned int rx_ip_align;
+ unsigned int rx_dma_len;
+ unsigned int rx_buffer_order;
+ unsigned int rx_buffer_truesize;
+ unsigned int rx_page_buf_step;
+ unsigned int rx_bufs_per_page;
+ unsigned int rx_pages_per_batch;
+ unsigned int rx_prefix_size;
+ int rx_packet_hash_offset;
+ int rx_packet_len_offset;
+ int rx_packet_ts_offset;
+ bool rx_scatter;
+ struct efx_rss_context rss_context;
+ struct mutex rss_lock;
+ u32 vport_id;
+
+ unsigned int_error_count;
+ unsigned long int_error_expire;
+
+ bool must_realloc_vis;
+ bool irq_soft_enabled;
+ struct efx_buffer irq_status;
+ unsigned irq_zero_count;
+ unsigned irq_level;
+ struct delayed_work selftest_work;
+
+#ifdef CONFIG_SFC_MTD
+ struct list_head mtd_list;
+#endif
+
+ void *nic_data;
+ struct efx_mcdi_data *mcdi;
+
+ struct mutex mac_lock;
+ struct work_struct mac_work;
+ bool port_enabled;
+
+ bool mc_bist_for_other_fn;
+ bool port_initialized;
+ struct net_device *net_dev;
+
+ netdev_features_t fixed_features;
+
+ u16 num_mac_stats;
+ struct efx_buffer stats_buffer;
+ u64 rx_nodesc_drops_total;
+ u64 rx_nodesc_drops_while_down;
+ bool rx_nodesc_drops_prev_state;
+
+ unsigned int phy_type;
+ void *phy_data;
+ struct mdio_if_info mdio;
+ unsigned int mdio_bus;
+ enum efx_phy_mode phy_mode;
+
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(link_advertising);
+ u32 fec_config;
+ struct efx_link_state link_state;
+ unsigned int n_link_state_changes;
+
+ bool unicast_filter;
+ union efx_multicast_hash multicast_hash;
+ u8 wanted_fc;
+ unsigned fc_disable;
+
+ atomic_t rx_reset;
+ enum efx_loopback_mode loopback_mode;
+ u64 loopback_modes;
+
+ void *loopback_selftest;
+ /* We access loopback_selftest immediately before running XDP,
+ * so we want them next to each other.
+ */
+ struct bpf_prog __rcu *xdp_prog;
+
+ struct rw_semaphore filter_sem;
+ void *filter_state;
+#ifdef CONFIG_RFS_ACCEL
+ struct mutex rps_mutex;
+ unsigned long rps_slot_map;
+ struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT];
+ spinlock_t rps_hash_lock;
+ struct hlist_head *rps_hash_table;
+ u32 rps_next_id;
+#endif
+
+ atomic_t active_queues;
+ atomic_t rxq_flush_pending;
+ atomic_t rxq_flush_outstanding;
+ wait_queue_head_t flush_wq;
+
+#ifdef CONFIG_SFC_SRIOV
+ unsigned vf_count;
+ unsigned vf_init_count;
+ unsigned vi_scale;
+#endif
+
+ struct efx_ptp_data *ptp_data;
+ bool ptp_warned;
+
+ char *vpd_sn;
+ bool xdp_rxq_info_failed;
+
+ struct notifier_block netdev_notifier;
+
+ unsigned int mem_bar;
+ u32 reg_base;
+
+ /* The following fields may be written more often */
+
+ struct delayed_work monitor_work ____cacheline_aligned_in_smp;
+ spinlock_t biu_lock;
+ int last_irq_cpu;
+ spinlock_t stats_lock;
+ atomic_t n_rx_noskb_drops;
+};
+
+static inline int efx_dev_registered(struct efx_nic *efx)
+{
+ return efx->net_dev->reg_state == NETREG_REGISTERED;
+}
+
+static inline unsigned int efx_port_num(struct efx_nic *efx)
+{
+ return efx->port_num;
+}
+
+struct efx_mtd_partition {
+ struct list_head node;
+ struct mtd_info mtd;
+ const char *dev_type_name;
+ const char *type_name;
+ char name[IFNAMSIZ + 20];
+};
+
+struct efx_udp_tunnel {
+#define TUNNEL_ENCAP_UDP_PORT_ENTRY_INVALID 0xffff
+ u16 type; /* TUNNEL_ENCAP_UDP_PORT_ENTRY_foo, see mcdi_pcol.h */
+ __be16 port;
+};
+
+/**
+ * struct efx_nic_type - Efx device type definition
+ * @mem_bar: Get the memory BAR
+ * @mem_map_size: Get memory BAR mapped size
+ * @probe: Probe the controller
+ * @remove: Free resources allocated by probe()
+ * @init: Initialise the controller
+ * @dimension_resources: Dimension controller resources (buffer table,
+ * and VIs once the available interrupt resources are clear)
+ * @fini: Shut down the controller
+ * @monitor: Periodic function for polling link state and hardware monitor
+ * @map_reset_reason: Map ethtool reset reason to a reset method
+ * @map_reset_flags: Map ethtool reset flags to a reset method, if possible
+ * @reset: Reset the controller hardware and possibly the PHY. This will
+ * be called while the controller is uninitialised.
+ * @probe_port: Probe the MAC and PHY
+ * @remove_port: Free resources allocated by probe_port()
+ * @handle_global_event: Handle a "global" event (may be %NULL)
+ * @fini_dmaq: Flush and finalise DMA queues (RX and TX queues)
+ * @prepare_flush: Prepare the hardware for flushing the DMA queues
+ * (for Falcon architecture)
+ * @finish_flush: Clean up after flushing the DMA queues (for Falcon
+ * architecture)
+ * @prepare_flr: Prepare for an FLR
+ * @finish_flr: Clean up after an FLR
+ * @describe_stats: Describe statistics for ethtool
+ * @update_stats: Update statistics not provided by event handling.
+ * Either argument may be %NULL.
+ * @update_stats_atomic: Update statistics while in atomic context, if that
+ * is more limiting than @update_stats. Otherwise, leave %NULL and
+ * driver core will call @update_stats.
+ * @start_stats: Start the regular fetching of statistics
+ * @pull_stats: Pull stats from the NIC and wait until they arrive.
+ * @stop_stats: Stop the regular fetching of statistics
+ * @push_irq_moderation: Apply interrupt moderation value
+ * @reconfigure_port: Push loopback/power/txdis changes to the MAC and PHY
+ * @prepare_enable_fc_tx: Prepare MAC to enable pause frame TX (may be %NULL)
+ * @reconfigure_mac: Push MAC address, MTU, flow control and filter settings
+ * to the hardware. Serialised by the mac_lock.
+ * @check_mac_fault: Check MAC fault state. True if fault present.
+ * @get_wol: Get WoL configuration from driver state
+ * @set_wol: Push WoL configuration to the NIC
+ * @resume_wol: Synchronise WoL state between driver and MC (e.g. after resume)
+ * @get_fec_stats: Get standard FEC statistics.
+ * @test_chip: Test registers. May use efx_farch_test_registers(), and is
+ * expected to reset the NIC.
+ * @test_nvram: Test validity of NVRAM contents
+ * @mcdi_request: Send an MCDI request with the given header and SDU.
+ * The SDU length may be any value from 0 up to the protocol-
+ * defined maximum, but its buffer will be padded to a multiple
+ * of 4 bytes.
+ * @mcdi_poll_response: Test whether an MCDI response is available.
+ * @mcdi_read_response: Read the MCDI response PDU. The offset will
+ * be a multiple of 4. The length may not be, but the buffer
+ * will be padded so it is safe to round up.
+ * @mcdi_poll_reboot: Test whether the MCDI has rebooted. If so,
+ * return an appropriate error code for aborting any current
+ * request; otherwise return 0.
+ * @irq_enable_master: Enable IRQs on the NIC. Each event queue must
+ * be separately enabled after this.
+ * @irq_test_generate: Generate a test IRQ
+ * @irq_disable_non_ev: Disable non-event IRQs on the NIC. Each event
+ * queue must be separately disabled before this.
+ * @irq_handle_msi: Handle MSI for a channel. The @dev_id argument is
+ * a pointer to the &struct efx_msi_context for the channel.
+ * @irq_handle_legacy: Handle legacy interrupt. The @dev_id argument
+ * is a pointer to the &struct efx_nic.
+ * @tx_probe: Allocate resources for TX queue (and select TXQ type)
+ * @tx_init: Initialise TX queue on the NIC
+ * @tx_remove: Free resources for TX queue
+ * @tx_write: Write TX descriptors and doorbell
+ * @tx_enqueue: Add an SKB to TX queue
+ * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
+ * @rx_pull_rss_config: Read RSS hash key and indirection table back from the NIC
+ * @rx_push_rss_context_config: Write RSS hash key and indirection table for
+ * user RSS context to the NIC
+ * @rx_pull_rss_context_config: Read RSS hash key and indirection table for user
+ * RSS context back from the NIC
+ * @rx_probe: Allocate resources for RX queue
+ * @rx_init: Initialise RX queue on the NIC
+ * @rx_remove: Free resources for RX queue
+ * @rx_write: Write RX descriptors and doorbell
+ * @rx_defer_refill: Generate a refill reminder event
+ * @rx_packet: Receive the queued RX buffer on a channel
+ * @rx_buf_hash_valid: Determine whether the RX prefix contains a valid hash
+ * @ev_probe: Allocate resources for event queue
+ * @ev_init: Initialise event queue on the NIC
+ * @ev_fini: Deinitialise event queue on the NIC
+ * @ev_remove: Free resources for event queue
+ * @ev_process: Process events for a queue, up to the given NAPI quota
+ * @ev_read_ack: Acknowledge read events on a queue, rearming its IRQ
+ * @ev_test_generate: Generate a test event
+ * @filter_table_probe: Probe filter capabilities and set up filter software state
+ * @filter_table_restore: Restore filters removed from hardware
+ * @filter_table_remove: Remove filters from hardware and tear down software state
+ * @filter_update_rx_scatter: Update filters after change to rx scatter setting
+ * @filter_insert: add or replace a filter
+ * @filter_remove_safe: remove a filter by ID, carefully
+ * @filter_get_safe: retrieve a filter by ID, carefully
+ * @filter_clear_rx: Remove all RX filters whose priority is less than or
+ * equal to the given priority and is not %EFX_FILTER_PRI_AUTO
+ * @filter_count_rx_used: Get the number of filters in use at a given priority
+ * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1
+ * @filter_get_rx_ids: Get list of RX filters at a given priority
+ * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS.
+ * This must check whether the specified table entry is used by RFS
+ * and that rps_may_expire_flow() returns true for it.
+ * @mtd_probe: Probe and add MTD partitions associated with this net device,
+ * using efx_mtd_add()
+ * @mtd_rename: Set an MTD partition name using the net device name
+ * @mtd_read: Read from an MTD partition
+ * @mtd_erase: Erase part of an MTD partition
+ * @mtd_write: Write to an MTD partition
+ * @mtd_sync: Wait for write-back to complete on MTD partition. This
+ * also notifies the driver that a writer has finished using this
+ * partition.
+ * @ptp_write_host_time: Send host time to MC as part of sync protocol
+ * @ptp_set_ts_sync_events: Enable or disable sync events for inline RX
+ * timestamping, possibly only temporarily for the purposes of a reset.
+ * @ptp_set_ts_config: Set hardware timestamp configuration. The flags
+ * and tx_type will already have been validated but this operation
+ * must validate and update rx_filter.
+ * @get_phys_port_id: Get the underlying physical port id.
+ * @set_mac_address: Set the MAC address of the device
+ * @tso_versions: Returns mask of firmware-assisted TSO versions supported.
+ * If %NULL, then device does not support any TSO version.
+ * @udp_tnl_push_ports: Push the list of UDP tunnel ports to the NIC if required.
+ * @udp_tnl_has_port: Check if a port has been added as UDP tunnel
+ * @print_additional_fwver: Dump NIC-specific additional FW version info
+ * @sensor_event: Handle a sensor event from MCDI
+ * @rx_recycle_ring_size: Size of the RX recycle ring
+ * @revision: Hardware architecture revision
+ * @txd_ptr_tbl_base: TX descriptor ring base address
+ * @rxd_ptr_tbl_base: RX descriptor ring base address
+ * @buf_tbl_base: Buffer table base address
+ * @evq_ptr_tbl_base: Event queue pointer table base address
+ * @evq_rptr_tbl_base: Event queue read-pointer table base address
+ * @max_dma_mask: Maximum possible DMA mask
+ * @rx_prefix_size: Size of RX prefix before packet data
+ * @rx_hash_offset: Offset of RX flow hash within prefix
+ * @rx_ts_offset: Offset of timestamp within prefix
+ * @rx_buffer_padding: Size of padding at end of RX packet
+ * @can_rx_scatter: NIC is able to scatter packets to multiple buffers
+ * @always_rx_scatter: NIC will always scatter packets to multiple buffers
+ * @option_descriptors: NIC supports TX option descriptors
+ * @min_interrupt_mode: Lowest capability interrupt mode supported
+ * from &enum efx_int_mode.
+ * @timer_period_max: Maximum period of interrupt timer (in ticks)
+ * @offload_features: net_device feature flags for protocol offload
+ * features implemented in hardware
+ * @mcdi_max_ver: Maximum MCDI version supported
+ * @hwtstamp_filters: Mask of hardware timestamp filter types supported
+ */
+struct efx_nic_type {
+ bool is_vf;
+ unsigned int (*mem_bar)(struct efx_nic *efx);
+ unsigned int (*mem_map_size)(struct efx_nic *efx);
+ int (*probe)(struct efx_nic *efx);
+ void (*remove)(struct efx_nic *efx);
+ int (*init)(struct efx_nic *efx);
+ int (*dimension_resources)(struct efx_nic *efx);
+ void (*fini)(struct efx_nic *efx);
+ void (*monitor)(struct efx_nic *efx);
+ enum reset_type (*map_reset_reason)(enum reset_type reason);
+ int (*map_reset_flags)(u32 *flags);
+ int (*reset)(struct efx_nic *efx, enum reset_type method);
+ int (*probe_port)(struct efx_nic *efx);
+ void (*remove_port)(struct efx_nic *efx);
+ bool (*handle_global_event)(struct efx_channel *channel, efx_qword_t *);
+ int (*fini_dmaq)(struct efx_nic *efx);
+ void (*prepare_flush)(struct efx_nic *efx);
+ void (*finish_flush)(struct efx_nic *efx);
+ void (*prepare_flr)(struct efx_nic *efx);
+ void (*finish_flr)(struct efx_nic *efx);
+ size_t (*describe_stats)(struct efx_nic *efx, u8 *names);
+ size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats);
+ size_t (*update_stats_atomic)(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats);
+ void (*start_stats)(struct efx_nic *efx);
+ void (*pull_stats)(struct efx_nic *efx);
+ void (*stop_stats)(struct efx_nic *efx);
+ void (*push_irq_moderation)(struct efx_channel *channel);
+ int (*reconfigure_port)(struct efx_nic *efx);
+ void (*prepare_enable_fc_tx)(struct efx_nic *efx);
+ int (*reconfigure_mac)(struct efx_nic *efx, bool mtu_only);
+ bool (*check_mac_fault)(struct efx_nic *efx);
+ void (*get_wol)(struct efx_nic *efx, struct ethtool_wolinfo *wol);
+ int (*set_wol)(struct efx_nic *efx, u32 type);
+ void (*resume_wol)(struct efx_nic *efx);
+ void (*get_fec_stats)(struct efx_nic *efx,
+ struct ethtool_fec_stats *fec_stats);
+ unsigned int (*check_caps)(const struct efx_nic *efx,
+ u8 flag,
+ u32 offset);
+ int (*test_chip)(struct efx_nic *efx, struct efx_self_tests *tests);
+ int (*test_nvram)(struct efx_nic *efx);
+ void (*mcdi_request)(struct efx_nic *efx,
+ const efx_dword_t *hdr, size_t hdr_len,
+ const efx_dword_t *sdu, size_t sdu_len);
+ bool (*mcdi_poll_response)(struct efx_nic *efx);
+ void (*mcdi_read_response)(struct efx_nic *efx, efx_dword_t *pdu,
+ size_t pdu_offset, size_t pdu_len);
+ int (*mcdi_poll_reboot)(struct efx_nic *efx);
+ void (*mcdi_reboot_detected)(struct efx_nic *efx);
+ void (*irq_enable_master)(struct efx_nic *efx);
+ int (*irq_test_generate)(struct efx_nic *efx);
+ void (*irq_disable_non_ev)(struct efx_nic *efx);
+ irqreturn_t (*irq_handle_msi)(int irq, void *dev_id);
+ irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id);
+ int (*tx_probe)(struct efx_tx_queue *tx_queue);
+ void (*tx_init)(struct efx_tx_queue *tx_queue);
+ void (*tx_remove)(struct efx_tx_queue *tx_queue);
+ void (*tx_write)(struct efx_tx_queue *tx_queue);
+ netdev_tx_t (*tx_enqueue)(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
+ unsigned int (*tx_limit_len)(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, unsigned int len);
+ int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
+ const u32 *rx_indir_table, const u8 *key);
+ int (*rx_pull_rss_config)(struct efx_nic *efx);
+ int (*rx_push_rss_context_config)(struct efx_nic *efx,
+ struct efx_rss_context *ctx,
+ const u32 *rx_indir_table,
+ const u8 *key);
+ int (*rx_pull_rss_context_config)(struct efx_nic *efx,
+ struct efx_rss_context *ctx);
+ void (*rx_restore_rss_contexts)(struct efx_nic *efx);
+ int (*rx_probe)(struct efx_rx_queue *rx_queue);
+ void (*rx_init)(struct efx_rx_queue *rx_queue);
+ void (*rx_remove)(struct efx_rx_queue *rx_queue);
+ void (*rx_write)(struct efx_rx_queue *rx_queue);
+ void (*rx_defer_refill)(struct efx_rx_queue *rx_queue);
+ void (*rx_packet)(struct efx_channel *channel);
+ bool (*rx_buf_hash_valid)(const u8 *prefix);
+ int (*ev_probe)(struct efx_channel *channel);
+ int (*ev_init)(struct efx_channel *channel);
+ void (*ev_fini)(struct efx_channel *channel);
+ void (*ev_remove)(struct efx_channel *channel);
+ int (*ev_process)(struct efx_channel *channel, int quota);
+ void (*ev_read_ack)(struct efx_channel *channel);
+ void (*ev_test_generate)(struct efx_channel *channel);
+ int (*filter_table_probe)(struct efx_nic *efx);
+ void (*filter_table_restore)(struct efx_nic *efx);
+ void (*filter_table_remove)(struct efx_nic *efx);
+ void (*filter_update_rx_scatter)(struct efx_nic *efx);
+ s32 (*filter_insert)(struct efx_nic *efx,
+ struct efx_filter_spec *spec, bool replace);
+ int (*filter_remove_safe)(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id);
+ int (*filter_get_safe)(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id, struct efx_filter_spec *);
+ int (*filter_clear_rx)(struct efx_nic *efx,
+ enum efx_filter_priority priority);
+ u32 (*filter_count_rx_used)(struct efx_nic *efx,
+ enum efx_filter_priority priority);
+ u32 (*filter_get_rx_id_limit)(struct efx_nic *efx);
+ s32 (*filter_get_rx_ids)(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 *buf, u32 size);
+#ifdef CONFIG_RFS_ACCEL
+ bool (*filter_rfs_expire_one)(struct efx_nic *efx, u32 flow_id,
+ unsigned int index);
+#endif
+#ifdef CONFIG_SFC_MTD
+ int (*mtd_probe)(struct efx_nic *efx);
+ void (*mtd_rename)(struct efx_mtd_partition *part);
+ int (*mtd_read)(struct mtd_info *mtd, loff_t start, size_t len,
+ size_t *retlen, u8 *buffer);
+ int (*mtd_erase)(struct mtd_info *mtd, loff_t start, size_t len);
+ int (*mtd_write)(struct mtd_info *mtd, loff_t start, size_t len,
+ size_t *retlen, const u8 *buffer);
+ int (*mtd_sync)(struct mtd_info *mtd);
+#endif
+ void (*ptp_write_host_time)(struct efx_nic *efx, u32 host_time);
+ int (*ptp_set_ts_sync_events)(struct efx_nic *efx, bool en, bool temp);
+ int (*ptp_set_ts_config)(struct efx_nic *efx,
+ struct hwtstamp_config *init);
+ int (*sriov_configure)(struct efx_nic *efx, int num_vfs);
+ int (*vlan_rx_add_vid)(struct efx_nic *efx, __be16 proto, u16 vid);
+ int (*vlan_rx_kill_vid)(struct efx_nic *efx, __be16 proto, u16 vid);
+ int (*get_phys_port_id)(struct efx_nic *efx,
+ struct netdev_phys_item_id *ppid);
+ int (*sriov_init)(struct efx_nic *efx);
+ void (*sriov_fini)(struct efx_nic *efx);
+ bool (*sriov_wanted)(struct efx_nic *efx);
+ void (*sriov_reset)(struct efx_nic *efx);
+ void (*sriov_flr)(struct efx_nic *efx, unsigned vf_i);
+ int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, const u8 *mac);
+ int (*sriov_set_vf_vlan)(struct efx_nic *efx, int vf_i, u16 vlan,
+ u8 qos);
+ int (*sriov_set_vf_spoofchk)(struct efx_nic *efx, int vf_i,
+ bool spoofchk);
+ int (*sriov_get_vf_config)(struct efx_nic *efx, int vf_i,
+ struct ifla_vf_info *ivi);
+ int (*sriov_set_vf_link_state)(struct efx_nic *efx, int vf_i,
+ int link_state);
+ int (*vswitching_probe)(struct efx_nic *efx);
+ int (*vswitching_restore)(struct efx_nic *efx);
+ void (*vswitching_remove)(struct efx_nic *efx);
+ int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr);
+ int (*set_mac_address)(struct efx_nic *efx);
+ u32 (*tso_versions)(struct efx_nic *efx);
+ int (*udp_tnl_push_ports)(struct efx_nic *efx);
+ bool (*udp_tnl_has_port)(struct efx_nic *efx, __be16 port);
+ size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf,
+ size_t len);
+ void (*sensor_event)(struct efx_nic *efx, efx_qword_t *ev);
+ unsigned int (*rx_recycle_ring_size)(const struct efx_nic *efx);
+
+ int revision;
+ unsigned int txd_ptr_tbl_base;
+ unsigned int rxd_ptr_tbl_base;
+ unsigned int buf_tbl_base;
+ unsigned int evq_ptr_tbl_base;
+ unsigned int evq_rptr_tbl_base;
+ u64 max_dma_mask;
+ unsigned int rx_prefix_size;
+ unsigned int rx_hash_offset;
+ unsigned int rx_ts_offset;
+ unsigned int rx_buffer_padding;
+ bool can_rx_scatter;
+ bool always_rx_scatter;
+ bool option_descriptors;
+ unsigned int min_interrupt_mode;
+ unsigned int timer_period_max;
+ netdev_features_t offload_features;
+ int mcdi_max_ver;
+ unsigned int max_rx_ip_filters;
+ u32 hwtstamp_filters;
+ unsigned int rx_hash_key_size;
+};
+
+/**************************************************************************
+ *
+ * Prototypes and inline functions
+ *
+ *************************************************************************/
+
+static inline struct efx_channel *
+efx_get_channel(struct efx_nic *efx, unsigned index)
+{
+ EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_channels);
+ return efx->channel[index];
+}
+
+/* Iterate over all used channels */
+#define efx_for_each_channel(_channel, _efx) \
+ for (_channel = (_efx)->channel[0]; \
+ _channel; \
+ _channel = (_channel->channel + 1 < (_efx)->n_channels) ? \
+ (_efx)->channel[_channel->channel + 1] : NULL)
+
+/* Iterate over all used channels in reverse */
+#define efx_for_each_channel_rev(_channel, _efx) \
+ for (_channel = (_efx)->channel[(_efx)->n_channels - 1]; \
+ _channel; \
+ _channel = _channel->channel ? \
+ (_efx)->channel[_channel->channel - 1] : NULL)
+
+static inline struct efx_channel *
+efx_get_tx_channel(struct efx_nic *efx, unsigned int index)
+{
+ EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_tx_channels);
+ return efx->channel[efx->tx_channel_offset + index];
+}
+
+static inline struct efx_channel *
+efx_get_xdp_channel(struct efx_nic *efx, unsigned int index)
+{
+ EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_xdp_channels);
+ return efx->channel[efx->xdp_channel_offset + index];
+}
+
+static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel)
+{
+ return channel->channel - channel->efx->xdp_channel_offset <
+ channel->efx->n_xdp_channels;
+}
+
+static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
+{
+ return true;
+}
+
+static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel)
+{
+ if (efx_channel_is_xdp_tx(channel))
+ return channel->efx->xdp_tx_per_channel;
+ return channel->efx->tx_queues_per_channel;
+}
+
+static inline struct efx_tx_queue *
+efx_channel_get_tx_queue(struct efx_channel *channel, unsigned int type)
+{
+ EFX_WARN_ON_ONCE_PARANOID(type >= EFX_TXQ_TYPES);
+ return channel->tx_queue_by_type[type];
+}
+
+static inline struct efx_tx_queue *
+efx_get_tx_queue(struct efx_nic *efx, unsigned int index, unsigned int type)
+{
+ struct efx_channel *channel = efx_get_tx_channel(efx, index);
+
+ return efx_channel_get_tx_queue(channel, type);
+}
+
+/* Iterate over all TX queues belonging to a channel */
+#define efx_for_each_channel_tx_queue(_tx_queue, _channel) \
+ if (!efx_channel_has_tx_queues(_channel)) \
+ ; \
+ else \
+ for (_tx_queue = (_channel)->tx_queue; \
+ _tx_queue < (_channel)->tx_queue + \
+ efx_channel_num_tx_queues(_channel); \
+ _tx_queue++)
+
+static inline bool efx_channel_has_rx_queue(struct efx_channel *channel)
+{
+ return channel->rx_queue.core_index >= 0;
+}
+
+static inline struct efx_rx_queue *
+efx_channel_get_rx_queue(struct efx_channel *channel)
+{
+ EFX_WARN_ON_ONCE_PARANOID(!efx_channel_has_rx_queue(channel));
+ return &channel->rx_queue;
+}
+
+/* Iterate over all RX queues belonging to a channel */
+#define efx_for_each_channel_rx_queue(_rx_queue, _channel) \
+ if (!efx_channel_has_rx_queue(_channel)) \
+ ; \
+ else \
+ for (_rx_queue = &(_channel)->rx_queue; \
+ _rx_queue; \
+ _rx_queue = NULL)
+
+static inline struct efx_channel *
+efx_rx_queue_channel(struct efx_rx_queue *rx_queue)
+{
+ return container_of(rx_queue, struct efx_channel, rx_queue);
+}
+
+static inline int efx_rx_queue_index(struct efx_rx_queue *rx_queue)
+{
+ return efx_rx_queue_channel(rx_queue)->channel;
+}
+
+/* Returns a pointer to the specified receive buffer in the RX
+ * descriptor queue.
+ */
+static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue,
+ unsigned int index)
+{
+ return &rx_queue->buffer[index];
+}
+
+static inline struct efx_rx_buffer *
+efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
+{
+ if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+ return efx_rx_buffer(rx_queue, 0);
+ else
+ return rx_buf + 1;
+}
+
+/**
+ * EFX_MAX_FRAME_LEN - calculate maximum frame length
+ *
+ * This calculates the maximum frame length that will be used for a
+ * given MTU. The frame length will be equal to the MTU plus a
+ * constant amount of header space and padding. This is the quantity
+ * that the net driver will program into the MAC as the maximum frame
+ * length.
+ *
+ * The 10G MAC requires 8-byte alignment on the frame
+ * length, so we round up to the nearest 8.
+ *
+ * Re-clocking by the XGXS on RX can reduce an IPG to 32 bits (half an
+ * XGMII cycle). If the frame length reaches the maximum value in the
+ * same cycle, the XMAC can miss the IPG altogether. We work around
+ * this by adding a further 16 bytes.
+ */
+#define EFX_FRAME_PAD 16
+#define EFX_MAX_FRAME_LEN(mtu) \
+ (ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EFX_FRAME_PAD), 8))
+
+static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP;
+}
+static inline void efx_xmit_hwtstamp_pending(struct sk_buff *skb)
+{
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
+
+/* Get the max fill level of the TX queues on this channel */
+static inline unsigned int
+efx_channel_tx_fill_level(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ unsigned int fill_level = 0;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ fill_level = max(fill_level,
+ tx_queue->insert_count - tx_queue->read_count);
+
+ return fill_level;
+}
+
+/* Conservative approximation of efx_channel_tx_fill_level using cached value */
+static inline unsigned int
+efx_channel_tx_old_fill_level(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ unsigned int fill_level = 0;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ fill_level = max(fill_level,
+ tx_queue->insert_count - tx_queue->old_read_count);
+
+ return fill_level;
+}
+
+/* Get all supported features.
+ * If a feature is not fixed, it is present in hw_features.
+ * If a feature is fixed, it does not present in hw_features, but
+ * always in features.
+ */
+static inline netdev_features_t efx_supported_features(const struct efx_nic *efx)
+{
+ const struct net_device *net_dev = efx->net_dev;
+
+ return net_dev->features | net_dev->hw_features;
+}
+
+/* Get the current TX queue insert index. */
+static inline unsigned int
+efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue)
+{
+ return tx_queue->insert_count & tx_queue->ptr_mask;
+}
+
+/* Get a TX buffer. */
+static inline struct efx_tx_buffer *
+__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+ return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)];
+}
+
+/* Get a TX buffer, checking it's not currently in use. */
+static inline struct efx_tx_buffer *
+efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+ struct efx_tx_buffer *buffer =
+ __efx_tx_queue_get_insert_buffer(tx_queue);
+
+ EFX_WARN_ON_ONCE_PARANOID(buffer->len);
+ EFX_WARN_ON_ONCE_PARANOID(buffer->flags);
+ EFX_WARN_ON_ONCE_PARANOID(buffer->unmap_len);
+
+ return buffer;
+}
+
+#endif /* EFX_NET_DRIVER_H */
diff --git a/drivers/net/ethernet/sfc/siena/nic.c b/drivers/net/ethernet/sfc/siena/nic.c
new file mode 100644
index 000000000000..22fbb0ae77fb
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/nic.c
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/cpu_rmap.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "nic.h"
+#include "ef10_regs.h"
+#include "farch_regs.h"
+#include "io.h"
+#include "workarounds.h"
+#include "mcdi_pcol.h"
+
+/**************************************************************************
+ *
+ * Generic buffer handling
+ * These buffers are used for interrupt status, MAC stats, etc.
+ *
+ **************************************************************************/
+
+int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer,
+ unsigned int len, gfp_t gfp_flags)
+{
+ buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len,
+ &buffer->dma_addr, gfp_flags);
+ if (!buffer->addr)
+ return -ENOMEM;
+ buffer->len = len;
+ return 0;
+}
+
+void efx_nic_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer)
+{
+ if (buffer->addr) {
+ dma_free_coherent(&efx->pci_dev->dev, buffer->len,
+ buffer->addr, buffer->dma_addr);
+ buffer->addr = NULL;
+ }
+}
+
+/* Check whether an event is present in the eventq at the current
+ * read pointer. Only useful for self-test.
+ */
+bool efx_nic_event_present(struct efx_channel *channel)
+{
+ return efx_event_present(efx_event(channel, channel->eventq_read_ptr));
+}
+
+void efx_nic_event_test_start(struct efx_channel *channel)
+{
+ channel->event_test_cpu = -1;
+ smp_wmb();
+ channel->efx->type->ev_test_generate(channel);
+}
+
+int efx_nic_irq_test_start(struct efx_nic *efx)
+{
+ efx->last_irq_cpu = -1;
+ smp_wmb();
+ return efx->type->irq_test_generate(efx);
+}
+
+/* Hook interrupt handler(s)
+ * Try MSI and then legacy interrupts.
+ */
+int efx_nic_init_interrupt(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ unsigned int n_irqs;
+ int rc;
+
+ if (!EFX_INT_MODE_USE_MSI(efx)) {
+ rc = request_irq(efx->legacy_irq,
+ efx->type->irq_handle_legacy, IRQF_SHARED,
+ efx->name, efx);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev,
+ "failed to hook legacy IRQ %d\n",
+ efx->pci_dev->irq);
+ goto fail1;
+ }
+ efx->irqs_hooked = true;
+ return 0;
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+ efx->net_dev->rx_cpu_rmap =
+ alloc_irq_cpu_rmap(efx->n_rx_channels);
+ if (!efx->net_dev->rx_cpu_rmap) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+ }
+#endif
+
+ /* Hook MSI or MSI-X interrupt */
+ n_irqs = 0;
+ efx_for_each_channel(channel, efx) {
+ rc = request_irq(channel->irq, efx->type->irq_handle_msi,
+ IRQF_PROBE_SHARED, /* Not shared */
+ efx->msi_context[channel->channel].name,
+ &efx->msi_context[channel->channel]);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev,
+ "failed to hook IRQ %d\n", channel->irq);
+ goto fail2;
+ }
+ ++n_irqs;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (efx->interrupt_mode == EFX_INT_MODE_MSIX &&
+ channel->channel < efx->n_rx_channels) {
+ rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
+ channel->irq);
+ if (rc)
+ goto fail2;
+ }
+#endif
+ }
+
+ efx->irqs_hooked = true;
+ return 0;
+
+ fail2:
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+ efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+ efx_for_each_channel(channel, efx) {
+ if (n_irqs-- == 0)
+ break;
+ free_irq(channel->irq, &efx->msi_context[channel->channel]);
+ }
+ fail1:
+ return rc;
+}
+
+void efx_nic_fini_interrupt(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+ efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+
+ if (!efx->irqs_hooked)
+ return;
+ if (EFX_INT_MODE_USE_MSI(efx)) {
+ /* Disable MSI/MSI-X interrupts */
+ efx_for_each_channel(channel, efx)
+ free_irq(channel->irq,
+ &efx->msi_context[channel->channel]);
+ } else {
+ /* Disable legacy interrupt */
+ free_irq(efx->legacy_irq, efx);
+ }
+ efx->irqs_hooked = false;
+}
+
+/* Register dump */
+
+#define REGISTER_REVISION_FA 1
+#define REGISTER_REVISION_FB 2
+#define REGISTER_REVISION_FC 3
+#define REGISTER_REVISION_FZ 3 /* last Falcon arch revision */
+#define REGISTER_REVISION_ED 4
+#define REGISTER_REVISION_EZ 4 /* latest EF10 revision */
+
+struct efx_nic_reg {
+ u32 offset:24;
+ u32 min_revision:3, max_revision:3;
+};
+
+#define REGISTER(name, arch, min_rev, max_rev) { \
+ arch ## R_ ## min_rev ## max_rev ## _ ## name, \
+ REGISTER_REVISION_ ## arch ## min_rev, \
+ REGISTER_REVISION_ ## arch ## max_rev \
+}
+#define REGISTER_AA(name) REGISTER(name, F, A, A)
+#define REGISTER_AB(name) REGISTER(name, F, A, B)
+#define REGISTER_AZ(name) REGISTER(name, F, A, Z)
+#define REGISTER_BB(name) REGISTER(name, F, B, B)
+#define REGISTER_BZ(name) REGISTER(name, F, B, Z)
+#define REGISTER_CZ(name) REGISTER(name, F, C, Z)
+#define REGISTER_DZ(name) REGISTER(name, E, D, Z)
+
+static const struct efx_nic_reg efx_nic_regs[] = {
+ REGISTER_AZ(ADR_REGION),
+ REGISTER_AZ(INT_EN_KER),
+ REGISTER_BZ(INT_EN_CHAR),
+ REGISTER_AZ(INT_ADR_KER),
+ REGISTER_BZ(INT_ADR_CHAR),
+ /* INT_ACK_KER is WO */
+ /* INT_ISR0 is RC */
+ REGISTER_AZ(HW_INIT),
+ REGISTER_CZ(USR_EV_CFG),
+ REGISTER_AB(EE_SPI_HCMD),
+ REGISTER_AB(EE_SPI_HADR),
+ REGISTER_AB(EE_SPI_HDATA),
+ REGISTER_AB(EE_BASE_PAGE),
+ REGISTER_AB(EE_VPD_CFG0),
+ /* EE_VPD_SW_CNTL and EE_VPD_SW_DATA are not used */
+ /* PMBX_DBG_IADDR and PBMX_DBG_IDATA are indirect */
+ /* PCIE_CORE_INDIRECT is indirect */
+ REGISTER_AB(NIC_STAT),
+ REGISTER_AB(GPIO_CTL),
+ REGISTER_AB(GLB_CTL),
+ /* FATAL_INTR_KER and FATAL_INTR_CHAR are partly RC */
+ REGISTER_BZ(DP_CTRL),
+ REGISTER_AZ(MEM_STAT),
+ REGISTER_AZ(CS_DEBUG),
+ REGISTER_AZ(ALTERA_BUILD),
+ REGISTER_AZ(CSR_SPARE),
+ REGISTER_AB(PCIE_SD_CTL0123),
+ REGISTER_AB(PCIE_SD_CTL45),
+ REGISTER_AB(PCIE_PCS_CTL_STAT),
+ /* DEBUG_DATA_OUT is not used */
+ /* DRV_EV is WO */
+ REGISTER_AZ(EVQ_CTL),
+ REGISTER_AZ(EVQ_CNT1),
+ REGISTER_AZ(EVQ_CNT2),
+ REGISTER_AZ(BUF_TBL_CFG),
+ REGISTER_AZ(SRM_RX_DC_CFG),
+ REGISTER_AZ(SRM_TX_DC_CFG),
+ REGISTER_AZ(SRM_CFG),
+ /* BUF_TBL_UPD is WO */
+ REGISTER_AZ(SRM_UPD_EVQ),
+ REGISTER_AZ(SRAM_PARITY),
+ REGISTER_AZ(RX_CFG),
+ REGISTER_BZ(RX_FILTER_CTL),
+ /* RX_FLUSH_DESCQ is WO */
+ REGISTER_AZ(RX_DC_CFG),
+ REGISTER_AZ(RX_DC_PF_WM),
+ REGISTER_BZ(RX_RSS_TKEY),
+ /* RX_NODESC_DROP is RC */
+ REGISTER_AA(RX_SELF_RST),
+ /* RX_DEBUG, RX_PUSH_DROP are not used */
+ REGISTER_CZ(RX_RSS_IPV6_REG1),
+ REGISTER_CZ(RX_RSS_IPV6_REG2),
+ REGISTER_CZ(RX_RSS_IPV6_REG3),
+ /* TX_FLUSH_DESCQ is WO */
+ REGISTER_AZ(TX_DC_CFG),
+ REGISTER_AA(TX_CHKSM_CFG),
+ REGISTER_AZ(TX_CFG),
+ /* TX_PUSH_DROP is not used */
+ REGISTER_AZ(TX_RESERVED),
+ REGISTER_BZ(TX_PACE),
+ /* TX_PACE_DROP_QID is RC */
+ REGISTER_BB(TX_VLAN),
+ REGISTER_BZ(TX_IPFIL_PORTEN),
+ REGISTER_AB(MD_TXD),
+ REGISTER_AB(MD_RXD),
+ REGISTER_AB(MD_CS),
+ REGISTER_AB(MD_PHY_ADR),
+ REGISTER_AB(MD_ID),
+ /* MD_STAT is RC */
+ REGISTER_AB(MAC_STAT_DMA),
+ REGISTER_AB(MAC_CTRL),
+ REGISTER_BB(GEN_MODE),
+ REGISTER_AB(MAC_MC_HASH_REG0),
+ REGISTER_AB(MAC_MC_HASH_REG1),
+ REGISTER_AB(GM_CFG1),
+ REGISTER_AB(GM_CFG2),
+ /* GM_IPG and GM_HD are not used */
+ REGISTER_AB(GM_MAX_FLEN),
+ /* GM_TEST is not used */
+ REGISTER_AB(GM_ADR1),
+ REGISTER_AB(GM_ADR2),
+ REGISTER_AB(GMF_CFG0),
+ REGISTER_AB(GMF_CFG1),
+ REGISTER_AB(GMF_CFG2),
+ REGISTER_AB(GMF_CFG3),
+ REGISTER_AB(GMF_CFG4),
+ REGISTER_AB(GMF_CFG5),
+ REGISTER_BB(TX_SRC_MAC_CTL),
+ REGISTER_AB(XM_ADR_LO),
+ REGISTER_AB(XM_ADR_HI),
+ REGISTER_AB(XM_GLB_CFG),
+ REGISTER_AB(XM_TX_CFG),
+ REGISTER_AB(XM_RX_CFG),
+ REGISTER_AB(XM_MGT_INT_MASK),
+ REGISTER_AB(XM_FC),
+ REGISTER_AB(XM_PAUSE_TIME),
+ REGISTER_AB(XM_TX_PARAM),
+ REGISTER_AB(XM_RX_PARAM),
+ /* XM_MGT_INT_MSK (note no 'A') is RC */
+ REGISTER_AB(XX_PWR_RST),
+ REGISTER_AB(XX_SD_CTL),
+ REGISTER_AB(XX_TXDRV_CTL),
+ /* XX_PRBS_CTL, XX_PRBS_CHK and XX_PRBS_ERR are not used */
+ /* XX_CORE_STAT is partly RC */
+ REGISTER_DZ(BIU_HW_REV_ID),
+ REGISTER_DZ(MC_DB_LWRD),
+ REGISTER_DZ(MC_DB_HWRD),
+};
+
+struct efx_nic_reg_table {
+ u32 offset:24;
+ u32 min_revision:3, max_revision:3;
+ u32 step:6, rows:21;
+};
+
+#define REGISTER_TABLE_DIMENSIONS(_, offset, arch, min_rev, max_rev, step, rows) { \
+ offset, \
+ REGISTER_REVISION_ ## arch ## min_rev, \
+ REGISTER_REVISION_ ## arch ## max_rev, \
+ step, rows \
+}
+#define REGISTER_TABLE(name, arch, min_rev, max_rev) \
+ REGISTER_TABLE_DIMENSIONS( \
+ name, arch ## R_ ## min_rev ## max_rev ## _ ## name, \
+ arch, min_rev, max_rev, \
+ arch ## R_ ## min_rev ## max_rev ## _ ## name ## _STEP, \
+ arch ## R_ ## min_rev ## max_rev ## _ ## name ## _ROWS)
+#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, F, A, A)
+#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, F, A, Z)
+#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, F, B, B)
+#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, F, B, Z)
+#define REGISTER_TABLE_BB_CZ(name) \
+ REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, B, B, \
+ FR_BZ_ ## name ## _STEP, \
+ FR_BB_ ## name ## _ROWS), \
+ REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, C, Z, \
+ FR_BZ_ ## name ## _STEP, \
+ FR_CZ_ ## name ## _ROWS)
+#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, F, C, Z)
+#define REGISTER_TABLE_DZ(name) REGISTER_TABLE(name, E, D, Z)
+
+static const struct efx_nic_reg_table efx_nic_reg_tables[] = {
+ /* DRIVER is not used */
+ /* EVQ_RPTR, TIMER_COMMAND, USR_EV and {RX,TX}_DESC_UPD are WO */
+ REGISTER_TABLE_BB(TX_IPFIL_TBL),
+ REGISTER_TABLE_BB(TX_SRC_MAC_TBL),
+ REGISTER_TABLE_AA(RX_DESC_PTR_TBL_KER),
+ REGISTER_TABLE_BB_CZ(RX_DESC_PTR_TBL),
+ REGISTER_TABLE_AA(TX_DESC_PTR_TBL_KER),
+ REGISTER_TABLE_BB_CZ(TX_DESC_PTR_TBL),
+ REGISTER_TABLE_AA(EVQ_PTR_TBL_KER),
+ REGISTER_TABLE_BB_CZ(EVQ_PTR_TBL),
+ /* We can't reasonably read all of the buffer table (up to 8MB!).
+ * However this driver will only use a few entries. Reading
+ * 1K entries allows for some expansion of queue count and
+ * size before we need to change the version. */
+ REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL_KER, FR_AA_BUF_FULL_TBL_KER,
+ F, A, A, 8, 1024),
+ REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL, FR_BZ_BUF_FULL_TBL,
+ F, B, Z, 8, 1024),
+ REGISTER_TABLE_CZ(RX_MAC_FILTER_TBL0),
+ REGISTER_TABLE_BB_CZ(TIMER_TBL),
+ REGISTER_TABLE_BB_CZ(TX_PACE_TBL),
+ REGISTER_TABLE_BZ(RX_INDIRECTION_TBL),
+ /* TX_FILTER_TBL0 is huge and not used by this driver */
+ REGISTER_TABLE_CZ(TX_MAC_FILTER_TBL0),
+ REGISTER_TABLE_CZ(MC_TREG_SMEM),
+ /* MSIX_PBA_TABLE is not mapped */
+ /* SRM_DBG is not mapped (and is redundant with BUF_FLL_TBL) */
+ REGISTER_TABLE_BZ(RX_FILTER_TBL0),
+ REGISTER_TABLE_DZ(BIU_MC_SFT_STATUS),
+};
+
+size_t efx_nic_get_regs_len(struct efx_nic *efx)
+{
+ const struct efx_nic_reg *reg;
+ const struct efx_nic_reg_table *table;
+ size_t len = 0;
+
+ for (reg = efx_nic_regs;
+ reg < efx_nic_regs + ARRAY_SIZE(efx_nic_regs);
+ reg++)
+ if (efx->type->revision >= reg->min_revision &&
+ efx->type->revision <= reg->max_revision)
+ len += sizeof(efx_oword_t);
+
+ for (table = efx_nic_reg_tables;
+ table < efx_nic_reg_tables + ARRAY_SIZE(efx_nic_reg_tables);
+ table++)
+ if (efx->type->revision >= table->min_revision &&
+ efx->type->revision <= table->max_revision)
+ len += table->rows * min_t(size_t, table->step, 16);
+
+ return len;
+}
+
+void efx_nic_get_regs(struct efx_nic *efx, void *buf)
+{
+ const struct efx_nic_reg *reg;
+ const struct efx_nic_reg_table *table;
+
+ for (reg = efx_nic_regs;
+ reg < efx_nic_regs + ARRAY_SIZE(efx_nic_regs);
+ reg++) {
+ if (efx->type->revision >= reg->min_revision &&
+ efx->type->revision <= reg->max_revision) {
+ efx_reado(efx, (efx_oword_t *)buf, reg->offset);
+ buf += sizeof(efx_oword_t);
+ }
+ }
+
+ for (table = efx_nic_reg_tables;
+ table < efx_nic_reg_tables + ARRAY_SIZE(efx_nic_reg_tables);
+ table++) {
+ size_t size, i;
+
+ if (!(efx->type->revision >= table->min_revision &&
+ efx->type->revision <= table->max_revision))
+ continue;
+
+ size = min_t(size_t, table->step, 16);
+
+ for (i = 0; i < table->rows; i++) {
+ switch (table->step) {
+ case 4: /* 32-bit SRAM */
+ efx_readd(efx, buf, table->offset + 4 * i);
+ break;
+ case 8: /* 64-bit SRAM */
+ efx_sram_readq(efx,
+ efx->membase + table->offset,
+ buf, i);
+ break;
+ case 16: /* 128-bit-readable register */
+ efx_reado_table(efx, buf, table->offset, i);
+ break;
+ case 32: /* 128-bit register, interleaved */
+ efx_reado_table(efx, buf, table->offset, 2 * i);
+ break;
+ default:
+ WARN_ON(1);
+ return;
+ }
+ buf += size;
+ }
+ }
+}
+
+/**
+ * efx_nic_describe_stats - Describe supported statistics for ethtool
+ * @desc: Array of &struct efx_hw_stat_desc describing the statistics
+ * @count: Length of the @desc array
+ * @mask: Bitmask of which elements of @desc are enabled
+ * @names: Buffer to copy names to, or %NULL. The names are copied
+ * starting at intervals of %ETH_GSTRING_LEN bytes.
+ *
+ * Returns the number of visible statistics, i.e. the number of set
+ * bits in the first @count bits of @mask for which a name is defined.
+ */
+size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count,
+ const unsigned long *mask, u8 *names)
+{
+ size_t visible = 0;
+ size_t index;
+
+ for_each_set_bit(index, mask, count) {
+ if (desc[index].name) {
+ if (names) {
+ strlcpy(names, desc[index].name,
+ ETH_GSTRING_LEN);
+ names += ETH_GSTRING_LEN;
+ }
+ ++visible;
+ }
+ }
+
+ return visible;
+}
+
+/**
+ * efx_nic_copy_stats - Copy stats from the DMA buffer in to an
+ * intermediate buffer. This is used to get a consistent
+ * set of stats while the DMA buffer can be written at any time
+ * by the NIC.
+ * @efx: The associated NIC.
+ * @dest: Destination buffer. Must be the same size as the DMA buffer.
+ */
+int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest)
+{
+ __le64 *dma_stats = efx->stats_buffer.addr;
+ __le64 generation_start, generation_end;
+ int rc = 0, retry;
+
+ if (!dest)
+ return 0;
+
+ if (!dma_stats)
+ goto return_zeroes;
+
+ /* If we're unlucky enough to read statistics during the DMA, wait
+ * up to 10ms for it to finish (typically takes <500us)
+ */
+ for (retry = 0; retry < 100; ++retry) {
+ generation_end = dma_stats[efx->num_mac_stats - 1];
+ if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
+ goto return_zeroes;
+ rmb();
+ memcpy(dest, dma_stats, efx->num_mac_stats * sizeof(__le64));
+ rmb();
+ generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
+ if (generation_end == generation_start)
+ return 0; /* return good data */
+ udelay(100);
+ }
+
+ rc = -EIO;
+
+return_zeroes:
+ memset(dest, 0, efx->num_mac_stats * sizeof(u64));
+ return rc;
+}
+
+/**
+ * efx_nic_update_stats - Convert statistics DMA buffer to array of u64
+ * @desc: Array of &struct efx_hw_stat_desc describing the DMA buffer
+ * layout. DMA widths of 0, 16, 32 and 64 are supported; where
+ * the width is specified as 0 the corresponding element of
+ * @stats is not updated.
+ * @count: Length of the @desc array
+ * @mask: Bitmask of which elements of @desc are enabled
+ * @stats: Buffer to update with the converted statistics. The length
+ * of this array must be at least @count.
+ * @dma_buf: DMA buffer containing hardware statistics
+ * @accumulate: If set, the converted values will be added rather than
+ * directly stored to the corresponding elements of @stats
+ */
+void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count,
+ const unsigned long *mask,
+ u64 *stats, const void *dma_buf, bool accumulate)
+{
+ size_t index;
+
+ for_each_set_bit(index, mask, count) {
+ if (desc[index].dma_width) {
+ const void *addr = dma_buf + desc[index].offset;
+ u64 val;
+
+ switch (desc[index].dma_width) {
+ case 16:
+ val = le16_to_cpup((__le16 *)addr);
+ break;
+ case 32:
+ val = le32_to_cpup((__le32 *)addr);
+ break;
+ case 64:
+ val = le64_to_cpup((__le64 *)addr);
+ break;
+ default:
+ WARN_ON(1);
+ val = 0;
+ break;
+ }
+
+ if (accumulate)
+ stats[index] += val;
+ else
+ stats[index] = val;
+ }
+ }
+}
+
+void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *rx_nodesc_drops)
+{
+ /* if down, or this is the first update after coming up */
+ if (!(efx->net_dev->flags & IFF_UP) || !efx->rx_nodesc_drops_prev_state)
+ efx->rx_nodesc_drops_while_down +=
+ *rx_nodesc_drops - efx->rx_nodesc_drops_total;
+ efx->rx_nodesc_drops_total = *rx_nodesc_drops;
+ efx->rx_nodesc_drops_prev_state = !!(efx->net_dev->flags & IFF_UP);
+ *rx_nodesc_drops -= efx->rx_nodesc_drops_while_down;
+}
diff --git a/drivers/net/ethernet/sfc/siena/nic.h b/drivers/net/ethernet/sfc/siena/nic.h
new file mode 100644
index 000000000000..251868235ae4
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/nic.h
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ */
+
+#ifndef EFX_NIC_H
+#define EFX_NIC_H
+
+#include "nic_common.h"
+#include "efx.h"
+
+u32 efx_farch_fpga_ver(struct efx_nic *efx);
+
+enum {
+ PHY_TYPE_NONE = 0,
+ PHY_TYPE_TXC43128 = 1,
+ PHY_TYPE_88E1111 = 2,
+ PHY_TYPE_SFX7101 = 3,
+ PHY_TYPE_QT2022C2 = 4,
+ PHY_TYPE_PM8358 = 6,
+ PHY_TYPE_SFT9001A = 8,
+ PHY_TYPE_QT2025C = 9,
+ PHY_TYPE_SFT9001B = 10,
+};
+
+enum {
+ SIENA_STAT_tx_bytes = GENERIC_STAT_COUNT,
+ SIENA_STAT_tx_good_bytes,
+ SIENA_STAT_tx_bad_bytes,
+ SIENA_STAT_tx_packets,
+ SIENA_STAT_tx_bad,
+ SIENA_STAT_tx_pause,
+ SIENA_STAT_tx_control,
+ SIENA_STAT_tx_unicast,
+ SIENA_STAT_tx_multicast,
+ SIENA_STAT_tx_broadcast,
+ SIENA_STAT_tx_lt64,
+ SIENA_STAT_tx_64,
+ SIENA_STAT_tx_65_to_127,
+ SIENA_STAT_tx_128_to_255,
+ SIENA_STAT_tx_256_to_511,
+ SIENA_STAT_tx_512_to_1023,
+ SIENA_STAT_tx_1024_to_15xx,
+ SIENA_STAT_tx_15xx_to_jumbo,
+ SIENA_STAT_tx_gtjumbo,
+ SIENA_STAT_tx_collision,
+ SIENA_STAT_tx_single_collision,
+ SIENA_STAT_tx_multiple_collision,
+ SIENA_STAT_tx_excessive_collision,
+ SIENA_STAT_tx_deferred,
+ SIENA_STAT_tx_late_collision,
+ SIENA_STAT_tx_excessive_deferred,
+ SIENA_STAT_tx_non_tcpudp,
+ SIENA_STAT_tx_mac_src_error,
+ SIENA_STAT_tx_ip_src_error,
+ SIENA_STAT_rx_bytes,
+ SIENA_STAT_rx_good_bytes,
+ SIENA_STAT_rx_bad_bytes,
+ SIENA_STAT_rx_packets,
+ SIENA_STAT_rx_good,
+ SIENA_STAT_rx_bad,
+ SIENA_STAT_rx_pause,
+ SIENA_STAT_rx_control,
+ SIENA_STAT_rx_unicast,
+ SIENA_STAT_rx_multicast,
+ SIENA_STAT_rx_broadcast,
+ SIENA_STAT_rx_lt64,
+ SIENA_STAT_rx_64,
+ SIENA_STAT_rx_65_to_127,
+ SIENA_STAT_rx_128_to_255,
+ SIENA_STAT_rx_256_to_511,
+ SIENA_STAT_rx_512_to_1023,
+ SIENA_STAT_rx_1024_to_15xx,
+ SIENA_STAT_rx_15xx_to_jumbo,
+ SIENA_STAT_rx_gtjumbo,
+ SIENA_STAT_rx_bad_gtjumbo,
+ SIENA_STAT_rx_overflow,
+ SIENA_STAT_rx_false_carrier,
+ SIENA_STAT_rx_symbol_error,
+ SIENA_STAT_rx_align_error,
+ SIENA_STAT_rx_length_error,
+ SIENA_STAT_rx_internal_error,
+ SIENA_STAT_rx_nodesc_drop_cnt,
+ SIENA_STAT_COUNT
+};
+
+/**
+ * struct siena_nic_data - Siena NIC state
+ * @efx: Pointer back to main interface structure
+ * @wol_filter_id: Wake-on-LAN packet filter id
+ * @stats: Hardware statistics
+ * @vf: Array of &struct siena_vf objects
+ * @vf_buftbl_base: The zeroth buffer table index used to back VF queues.
+ * @vfdi_status: Common VFDI status page to be dmad to VF address space.
+ * @local_addr_list: List of local addresses. Protected by %local_lock.
+ * @local_page_list: List of DMA addressable pages used to broadcast
+ * %local_addr_list. Protected by %local_lock.
+ * @local_lock: Mutex protecting %local_addr_list and %local_page_list.
+ * @peer_work: Work item to broadcast peer addresses to VMs.
+ */
+struct siena_nic_data {
+ struct efx_nic *efx;
+ int wol_filter_id;
+ u64 stats[SIENA_STAT_COUNT];
+#ifdef CONFIG_SFC_SRIOV
+ struct siena_vf *vf;
+ struct efx_channel *vfdi_channel;
+ unsigned vf_buftbl_base;
+ struct efx_buffer vfdi_status;
+ struct list_head local_addr_list;
+ struct list_head local_page_list;
+ struct mutex local_lock;
+ struct work_struct peer_work;
+#endif
+};
+
+enum {
+ EF10_STAT_port_tx_bytes = GENERIC_STAT_COUNT,
+ EF10_STAT_port_tx_packets,
+ EF10_STAT_port_tx_pause,
+ EF10_STAT_port_tx_control,
+ EF10_STAT_port_tx_unicast,
+ EF10_STAT_port_tx_multicast,
+ EF10_STAT_port_tx_broadcast,
+ EF10_STAT_port_tx_lt64,
+ EF10_STAT_port_tx_64,
+ EF10_STAT_port_tx_65_to_127,
+ EF10_STAT_port_tx_128_to_255,
+ EF10_STAT_port_tx_256_to_511,
+ EF10_STAT_port_tx_512_to_1023,
+ EF10_STAT_port_tx_1024_to_15xx,
+ EF10_STAT_port_tx_15xx_to_jumbo,
+ EF10_STAT_port_rx_bytes,
+ EF10_STAT_port_rx_bytes_minus_good_bytes,
+ EF10_STAT_port_rx_good_bytes,
+ EF10_STAT_port_rx_bad_bytes,
+ EF10_STAT_port_rx_packets,
+ EF10_STAT_port_rx_good,
+ EF10_STAT_port_rx_bad,
+ EF10_STAT_port_rx_pause,
+ EF10_STAT_port_rx_control,
+ EF10_STAT_port_rx_unicast,
+ EF10_STAT_port_rx_multicast,
+ EF10_STAT_port_rx_broadcast,
+ EF10_STAT_port_rx_lt64,
+ EF10_STAT_port_rx_64,
+ EF10_STAT_port_rx_65_to_127,
+ EF10_STAT_port_rx_128_to_255,
+ EF10_STAT_port_rx_256_to_511,
+ EF10_STAT_port_rx_512_to_1023,
+ EF10_STAT_port_rx_1024_to_15xx,
+ EF10_STAT_port_rx_15xx_to_jumbo,
+ EF10_STAT_port_rx_gtjumbo,
+ EF10_STAT_port_rx_bad_gtjumbo,
+ EF10_STAT_port_rx_overflow,
+ EF10_STAT_port_rx_align_error,
+ EF10_STAT_port_rx_length_error,
+ EF10_STAT_port_rx_nodesc_drops,
+ EF10_STAT_port_rx_pm_trunc_bb_overflow,
+ EF10_STAT_port_rx_pm_discard_bb_overflow,
+ EF10_STAT_port_rx_pm_trunc_vfifo_full,
+ EF10_STAT_port_rx_pm_discard_vfifo_full,
+ EF10_STAT_port_rx_pm_trunc_qbb,
+ EF10_STAT_port_rx_pm_discard_qbb,
+ EF10_STAT_port_rx_pm_discard_mapping,
+ EF10_STAT_port_rx_dp_q_disabled_packets,
+ EF10_STAT_port_rx_dp_di_dropped_packets,
+ EF10_STAT_port_rx_dp_streaming_packets,
+ EF10_STAT_port_rx_dp_hlb_fetch,
+ EF10_STAT_port_rx_dp_hlb_wait,
+ EF10_STAT_rx_unicast,
+ EF10_STAT_rx_unicast_bytes,
+ EF10_STAT_rx_multicast,
+ EF10_STAT_rx_multicast_bytes,
+ EF10_STAT_rx_broadcast,
+ EF10_STAT_rx_broadcast_bytes,
+ EF10_STAT_rx_bad,
+ EF10_STAT_rx_bad_bytes,
+ EF10_STAT_rx_overflow,
+ EF10_STAT_tx_unicast,
+ EF10_STAT_tx_unicast_bytes,
+ EF10_STAT_tx_multicast,
+ EF10_STAT_tx_multicast_bytes,
+ EF10_STAT_tx_broadcast,
+ EF10_STAT_tx_broadcast_bytes,
+ EF10_STAT_tx_bad,
+ EF10_STAT_tx_bad_bytes,
+ EF10_STAT_tx_overflow,
+ EF10_STAT_V1_COUNT,
+ EF10_STAT_fec_uncorrected_errors = EF10_STAT_V1_COUNT,
+ EF10_STAT_fec_corrected_errors,
+ EF10_STAT_fec_corrected_symbols_lane0,
+ EF10_STAT_fec_corrected_symbols_lane1,
+ EF10_STAT_fec_corrected_symbols_lane2,
+ EF10_STAT_fec_corrected_symbols_lane3,
+ EF10_STAT_ctpio_vi_busy_fallback,
+ EF10_STAT_ctpio_long_write_success,
+ EF10_STAT_ctpio_missing_dbell_fail,
+ EF10_STAT_ctpio_overflow_fail,
+ EF10_STAT_ctpio_underflow_fail,
+ EF10_STAT_ctpio_timeout_fail,
+ EF10_STAT_ctpio_noncontig_wr_fail,
+ EF10_STAT_ctpio_frm_clobber_fail,
+ EF10_STAT_ctpio_invalid_wr_fail,
+ EF10_STAT_ctpio_vi_clobber_fallback,
+ EF10_STAT_ctpio_unqualified_fallback,
+ EF10_STAT_ctpio_runt_fallback,
+ EF10_STAT_ctpio_success,
+ EF10_STAT_ctpio_fallback,
+ EF10_STAT_ctpio_poison,
+ EF10_STAT_ctpio_erase,
+ EF10_STAT_COUNT
+};
+
+/* Maximum number of TX PIO buffers we may allocate to a function.
+ * This matches the total number of buffers on each SFC9100-family
+ * controller.
+ */
+#define EF10_TX_PIOBUF_COUNT 16
+
+/**
+ * struct efx_ef10_nic_data - EF10 architecture NIC state
+ * @mcdi_buf: DMA buffer for MCDI
+ * @warm_boot_count: Last seen MC warm boot count
+ * @vi_base: Absolute index of first VI in this function
+ * @n_allocated_vis: Number of VIs allocated to this function
+ * @n_piobufs: Number of PIO buffers allocated to this function
+ * @wc_membase: Base address of write-combining mapping of the memory BAR
+ * @pio_write_base: Base address for writing PIO buffers
+ * @pio_write_vi_base: Relative VI number for @pio_write_base
+ * @piobuf_handle: Handle of each PIO buffer allocated
+ * @piobuf_size: size of a single PIO buffer
+ * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC
+ * reboot
+ * @mc_stats: Scratch buffer for converting statistics to the kernel's format
+ * @stats: Hardware statistics
+ * @workaround_35388: Flag: firmware supports workaround for bug 35388
+ * @workaround_26807: Flag: firmware supports workaround for bug 26807
+ * @workaround_61265: Flag: firmware supports workaround for bug 61265
+ * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated
+ * after MC reboot
+ * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of
+ * %MC_CMD_GET_CAPABILITIES response)
+ * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of
+ * %MC_CMD_GET_CAPABILITIES response)
+ * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU
+ * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU
+ * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot
+ * @pf_index: The number for this PF, or the parent PF if this is a VF
+#ifdef CONFIG_SFC_SRIOV
+ * @vf: Pointer to VF data structure
+#endif
+ * @vport_mac: The MAC address on the vport, only for PFs; VFs will be zero
+ * @vlan_list: List of VLANs added over the interface. Serialised by vlan_lock.
+ * @vlan_lock: Lock to serialize access to vlan_list.
+ * @udp_tunnels: UDP tunnel port numbers and types.
+ * @udp_tunnels_dirty: flag indicating a reboot occurred while pushing
+ * @udp_tunnels to hardware and thus the push must be re-done.
+ * @udp_tunnels_lock: Serialises writes to @udp_tunnels and @udp_tunnels_dirty.
+ */
+struct efx_ef10_nic_data {
+ struct efx_buffer mcdi_buf;
+ u16 warm_boot_count;
+ unsigned int vi_base;
+ unsigned int n_allocated_vis;
+ unsigned int n_piobufs;
+ void __iomem *wc_membase, *pio_write_base;
+ unsigned int pio_write_vi_base;
+ unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT];
+ u16 piobuf_size;
+ bool must_restore_piobufs;
+ __le64 *mc_stats;
+ u64 stats[EF10_STAT_COUNT];
+ bool workaround_35388;
+ bool workaround_26807;
+ bool workaround_61265;
+ bool must_check_datapath_caps;
+ u32 datapath_caps;
+ u32 datapath_caps2;
+ unsigned int rx_dpcpu_fw_id;
+ unsigned int tx_dpcpu_fw_id;
+ bool must_probe_vswitching;
+ unsigned int pf_index;
+ u8 port_id[ETH_ALEN];
+#ifdef CONFIG_SFC_SRIOV
+ unsigned int vf_index;
+ struct ef10_vf *vf;
+#endif
+ u8 vport_mac[ETH_ALEN];
+ struct list_head vlan_list;
+ struct mutex vlan_lock;
+ struct efx_udp_tunnel udp_tunnels[16];
+ bool udp_tunnels_dirty;
+ struct mutex udp_tunnels_lock;
+ u64 licensed_features;
+};
+
+/* TSOv2 */
+int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ bool *data_mapped);
+
+extern const struct efx_nic_type efx_hunt_a0_nic_type;
+extern const struct efx_nic_type efx_hunt_a0_vf_nic_type;
+
+int falcon_probe_board(struct efx_nic *efx, u16 revision_info);
+
+/* Falcon/Siena queue operations */
+int efx_farch_tx_probe(struct efx_tx_queue *tx_queue);
+void efx_farch_tx_init(struct efx_tx_queue *tx_queue);
+void efx_farch_tx_fini(struct efx_tx_queue *tx_queue);
+void efx_farch_tx_remove(struct efx_tx_queue *tx_queue);
+void efx_farch_tx_write(struct efx_tx_queue *tx_queue);
+unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, unsigned int len);
+int efx_farch_rx_probe(struct efx_rx_queue *rx_queue);
+void efx_farch_rx_init(struct efx_rx_queue *rx_queue);
+void efx_farch_rx_fini(struct efx_rx_queue *rx_queue);
+void efx_farch_rx_remove(struct efx_rx_queue *rx_queue);
+void efx_farch_rx_write(struct efx_rx_queue *rx_queue);
+void efx_farch_rx_defer_refill(struct efx_rx_queue *rx_queue);
+int efx_farch_ev_probe(struct efx_channel *channel);
+int efx_farch_ev_init(struct efx_channel *channel);
+void efx_farch_ev_fini(struct efx_channel *channel);
+void efx_farch_ev_remove(struct efx_channel *channel);
+int efx_farch_ev_process(struct efx_channel *channel, int quota);
+void efx_farch_ev_read_ack(struct efx_channel *channel);
+void efx_farch_ev_test_generate(struct efx_channel *channel);
+
+/* Falcon/Siena filter operations */
+int efx_farch_filter_table_probe(struct efx_nic *efx);
+void efx_farch_filter_table_restore(struct efx_nic *efx);
+void efx_farch_filter_table_remove(struct efx_nic *efx);
+void efx_farch_filter_update_rx_scatter(struct efx_nic *efx);
+s32 efx_farch_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec,
+ bool replace);
+int efx_farch_filter_remove_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id);
+int efx_farch_filter_get_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority, u32 filter_id,
+ struct efx_filter_spec *);
+int efx_farch_filter_clear_rx(struct efx_nic *efx,
+ enum efx_filter_priority priority);
+u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
+ enum efx_filter_priority priority);
+u32 efx_farch_filter_get_rx_id_limit(struct efx_nic *efx);
+s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
+ enum efx_filter_priority priority, u32 *buf,
+ u32 size);
+#ifdef CONFIG_RFS_ACCEL
+bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
+ unsigned int index);
+#endif
+void efx_farch_filter_sync_rx_mode(struct efx_nic *efx);
+
+/* Falcon/Siena interrupts */
+void efx_farch_irq_enable_master(struct efx_nic *efx);
+int efx_farch_irq_test_generate(struct efx_nic *efx);
+void efx_farch_irq_disable_master(struct efx_nic *efx);
+irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id);
+irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id);
+irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx);
+
+/* Global Resources */
+void siena_prepare_flush(struct efx_nic *efx);
+int efx_farch_fini_dmaq(struct efx_nic *efx);
+void efx_farch_finish_flr(struct efx_nic *efx);
+void siena_finish_flush(struct efx_nic *efx);
+void falcon_start_nic_stats(struct efx_nic *efx);
+void falcon_stop_nic_stats(struct efx_nic *efx);
+int falcon_reset_xaui(struct efx_nic *efx);
+void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw);
+void efx_farch_init_common(struct efx_nic *efx);
+void efx_farch_rx_push_indir_table(struct efx_nic *efx);
+void efx_farch_rx_pull_indir_table(struct efx_nic *efx);
+
+/* Tests */
+struct efx_farch_register_test {
+ unsigned address;
+ efx_oword_t mask;
+};
+
+int efx_farch_test_registers(struct efx_nic *efx,
+ const struct efx_farch_register_test *regs,
+ size_t n_regs);
+
+void efx_farch_generate_event(struct efx_nic *efx, unsigned int evq,
+ efx_qword_t *event);
+
+#endif /* EFX_NIC_H */
diff --git a/drivers/net/ethernet/sfc/siena/nic_common.h b/drivers/net/ethernet/sfc/siena/nic_common.h
new file mode 100644
index 000000000000..0cef35c0c559
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/nic_common.h
@@ -0,0 +1,262 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ * Copyright 2019-2020 Xilinx Inc.
+ */
+
+#ifndef EFX_NIC_COMMON_H
+#define EFX_NIC_COMMON_H
+
+#include "net_driver.h"
+#include "efx_common.h"
+#include "mcdi.h"
+#include "ptp.h"
+
+enum {
+ /* Revisions 0-2 were Falcon A0, A1 and B0 respectively.
+ * They are not supported by this driver but these revision numbers
+ * form part of the ethtool API for register dumping.
+ */
+ EFX_REV_SIENA_A0 = 3,
+ EFX_REV_HUNT_A0 = 4,
+ EFX_REV_EF100 = 5,
+};
+
+static inline int efx_nic_rev(struct efx_nic *efx)
+{
+ return efx->type->revision;
+}
+
+/* Read the current event from the event queue */
+static inline efx_qword_t *efx_event(struct efx_channel *channel,
+ unsigned int index)
+{
+ return ((efx_qword_t *) (channel->eventq.buf.addr)) +
+ (index & channel->eventq_mask);
+}
+
+/* See if an event is present
+ *
+ * We check both the high and low dword of the event for all ones. We
+ * wrote all ones when we cleared the event, and no valid event can
+ * have all ones in either its high or low dwords. This approach is
+ * robust against reordering.
+ *
+ * Note that using a single 64-bit comparison is incorrect; even
+ * though the CPU read will be atomic, the DMA write may not be.
+ */
+static inline int efx_event_present(efx_qword_t *event)
+{
+ return !(EFX_DWORD_IS_ALL_ONES(event->dword[0]) |
+ EFX_DWORD_IS_ALL_ONES(event->dword[1]));
+}
+
+/* Returns a pointer to the specified transmit descriptor in the TX
+ * descriptor queue belonging to the specified channel.
+ */
+static inline efx_qword_t *
+efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
+{
+ return ((efx_qword_t *) (tx_queue->txd.buf.addr)) + index;
+}
+
+/* Report whether this TX queue would be empty for the given write_count.
+ * May return false negative.
+ */
+static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned int write_count)
+{
+ unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
+
+ if (empty_read_count == 0)
+ return false;
+
+ return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
+}
+
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ bool *data_mapped);
+
+/* Decide whether to push a TX descriptor to the NIC vs merely writing
+ * the doorbell. This can reduce latency when we are adding a single
+ * descriptor to an empty queue, but is otherwise pointless. Further,
+ * Falcon and Siena have hardware bugs (SF bug 33851) that may be
+ * triggered if we don't check this.
+ * We use the write_count used for the last doorbell push, to get the
+ * NIC's view of the tx queue.
+ */
+static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue,
+ unsigned int write_count)
+{
+ bool was_empty = efx_nic_tx_is_empty(tx_queue, write_count);
+
+ tx_queue->empty_read_count = 0;
+ return was_empty && tx_queue->write_count - write_count == 1;
+}
+
+/* Returns a pointer to the specified descriptor in the RX descriptor queue */
+static inline efx_qword_t *
+efx_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
+{
+ return ((efx_qword_t *) (rx_queue->rxd.buf.addr)) + index;
+}
+
+/* Alignment of PCIe DMA boundaries (4KB) */
+#define EFX_PAGE_SIZE 4096
+/* Size and alignment of buffer table entries (same) */
+#define EFX_BUF_SIZE EFX_PAGE_SIZE
+
+/* NIC-generic software stats */
+enum {
+ GENERIC_STAT_rx_noskb_drops,
+ GENERIC_STAT_rx_nodesc_trunc,
+ GENERIC_STAT_COUNT
+};
+
+#define EFX_GENERIC_SW_STAT(ext_name) \
+ [GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
+
+/* TX data path */
+static inline int efx_nic_probe_tx(struct efx_tx_queue *tx_queue)
+{
+ return tx_queue->efx->type->tx_probe(tx_queue);
+}
+static inline void efx_nic_init_tx(struct efx_tx_queue *tx_queue)
+{
+ tx_queue->efx->type->tx_init(tx_queue);
+}
+static inline void efx_nic_remove_tx(struct efx_tx_queue *tx_queue)
+{
+ if (tx_queue->efx->type->tx_remove)
+ tx_queue->efx->type->tx_remove(tx_queue);
+}
+static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
+{
+ tx_queue->efx->type->tx_write(tx_queue);
+}
+
+/* RX data path */
+static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue)
+{
+ return rx_queue->efx->type->rx_probe(rx_queue);
+}
+static inline void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
+{
+ rx_queue->efx->type->rx_init(rx_queue);
+}
+static inline void efx_nic_remove_rx(struct efx_rx_queue *rx_queue)
+{
+ rx_queue->efx->type->rx_remove(rx_queue);
+}
+static inline void efx_nic_notify_rx_desc(struct efx_rx_queue *rx_queue)
+{
+ rx_queue->efx->type->rx_write(rx_queue);
+}
+static inline void efx_nic_generate_fill_event(struct efx_rx_queue *rx_queue)
+{
+ rx_queue->efx->type->rx_defer_refill(rx_queue);
+}
+
+/* Event data path */
+static inline int efx_nic_probe_eventq(struct efx_channel *channel)
+{
+ return channel->efx->type->ev_probe(channel);
+}
+static inline int efx_nic_init_eventq(struct efx_channel *channel)
+{
+ return channel->efx->type->ev_init(channel);
+}
+static inline void efx_nic_fini_eventq(struct efx_channel *channel)
+{
+ channel->efx->type->ev_fini(channel);
+}
+static inline void efx_nic_remove_eventq(struct efx_channel *channel)
+{
+ channel->efx->type->ev_remove(channel);
+}
+static inline int
+efx_nic_process_eventq(struct efx_channel *channel, int quota)
+{
+ return channel->efx->type->ev_process(channel, quota);
+}
+static inline void efx_nic_eventq_read_ack(struct efx_channel *channel)
+{
+ channel->efx->type->ev_read_ack(channel);
+}
+
+void efx_nic_event_test_start(struct efx_channel *channel);
+
+bool efx_nic_event_present(struct efx_channel *channel);
+
+static inline void efx_sensor_event(struct efx_nic *efx, efx_qword_t *ev)
+{
+ if (efx->type->sensor_event)
+ efx->type->sensor_event(efx, ev);
+}
+
+static inline unsigned int efx_rx_recycle_ring_size(const struct efx_nic *efx)
+{
+ return efx->type->rx_recycle_ring_size(efx);
+}
+
+/* Some statistics are computed as A - B where A and B each increase
+ * linearly with some hardware counter(s) and the counters are read
+ * asynchronously. If the counters contributing to B are always read
+ * after those contributing to A, the computed value may be lower than
+ * the true value by some variable amount, and may decrease between
+ * subsequent computations.
+ *
+ * We should never allow statistics to decrease or to exceed the true
+ * value. Since the computed value will never be greater than the
+ * true value, we can achieve this by only storing the computed value
+ * when it increases.
+ */
+static inline void efx_update_diff_stat(u64 *stat, u64 diff)
+{
+ if ((s64)(diff - *stat) > 0)
+ *stat = diff;
+}
+
+/* Interrupts */
+int efx_nic_init_interrupt(struct efx_nic *efx);
+int efx_nic_irq_test_start(struct efx_nic *efx);
+void efx_nic_fini_interrupt(struct efx_nic *efx);
+
+static inline int efx_nic_event_test_irq_cpu(struct efx_channel *channel)
+{
+ return READ_ONCE(channel->event_test_cpu);
+}
+static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx)
+{
+ return READ_ONCE(efx->last_irq_cpu);
+}
+
+/* Global Resources */
+int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer,
+ unsigned int len, gfp_t gfp_flags);
+void efx_nic_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer);
+
+size_t efx_nic_get_regs_len(struct efx_nic *efx);
+void efx_nic_get_regs(struct efx_nic *efx, void *buf);
+
+#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1))
+
+size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count,
+ const unsigned long *mask, u8 *names);
+int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest);
+void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count,
+ const unsigned long *mask, u64 *stats,
+ const void *dma_buf, bool accumulate);
+void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *stat);
+static inline size_t efx_nic_update_stats_atomic(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats)
+{
+ if (efx->type->update_stats_atomic)
+ return efx->type->update_stats_atomic(efx, full_stats, core_stats);
+ return efx->type->update_stats(efx, full_stats, core_stats);
+}
+
+#define EFX_MAX_FLUSH_TIME 5000
+
+#endif /* EFX_NIC_COMMON_H */
diff --git a/drivers/net/ethernet/sfc/siena/ptp.c b/drivers/net/ethernet/sfc/siena/ptp.c
new file mode 100644
index 000000000000..f0ef515e2ade
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/ptp.c
@@ -0,0 +1,2210 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2011-2013 Solarflare Communications Inc.
+ */
+
+/* Theory of operation:
+ *
+ * PTP support is assisted by firmware running on the MC, which provides
+ * the hardware timestamping capabilities. Both transmitted and received
+ * PTP event packets are queued onto internal queues for subsequent processing;
+ * this is because the MC operations are relatively long and would block
+ * block NAPI/interrupt operation.
+ *
+ * Receive event processing:
+ * The event contains the packet's UUID and sequence number, together
+ * with the hardware timestamp. The PTP receive packet queue is searched
+ * for this UUID/sequence number and, if found, put on a pending queue.
+ * Packets not matching are delivered without timestamps (MCDI events will
+ * always arrive after the actual packet).
+ * It is important for the operation of the PTP protocol that the ordering
+ * of packets between the event and general port is maintained.
+ *
+ * Work queue processing:
+ * If work waiting, synchronise host/hardware time
+ *
+ * Transmit: send packet through MC, which returns the transmission time
+ * that is converted to an appropriate timestamp.
+ *
+ * Receive: the packet's reception time is converted to an appropriate
+ * timestamp.
+ */
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/pps_kernel.h>
+#include <linux/ptp_clock_kernel.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+#include "io.h"
+#include "farch_regs.h"
+#include "tx.h"
+#include "nic.h" /* indirectly includes ptp.h */
+
+/* Maximum number of events expected to make up a PTP event */
+#define MAX_EVENT_FRAGS 3
+
+/* Maximum delay, ms, to begin synchronisation */
+#define MAX_SYNCHRONISE_WAIT_MS 2
+
+/* How long, at most, to spend synchronising */
+#define SYNCHRONISE_PERIOD_NS 250000
+
+/* How often to update the shared memory time */
+#define SYNCHRONISATION_GRANULARITY_NS 200
+
+/* Minimum permitted length of a (corrected) synchronisation time */
+#define DEFAULT_MIN_SYNCHRONISATION_NS 120
+
+/* Maximum permitted length of a (corrected) synchronisation time */
+#define MAX_SYNCHRONISATION_NS 1000
+
+/* How many (MC) receive events that can be queued */
+#define MAX_RECEIVE_EVENTS 8
+
+/* Length of (modified) moving average. */
+#define AVERAGE_LENGTH 16
+
+/* How long an unmatched event or packet can be held */
+#define PKT_EVENT_LIFETIME_MS 10
+
+/* Offsets into PTP packet for identification. These offsets are from the
+ * start of the IP header, not the MAC header. Note that neither PTP V1 nor
+ * PTP V2 permit the use of IPV4 options.
+ */
+#define PTP_DPORT_OFFSET 22
+
+#define PTP_V1_VERSION_LENGTH 2
+#define PTP_V1_VERSION_OFFSET 28
+
+#define PTP_V1_UUID_LENGTH 6
+#define PTP_V1_UUID_OFFSET 50
+
+#define PTP_V1_SEQUENCE_LENGTH 2
+#define PTP_V1_SEQUENCE_OFFSET 58
+
+/* The minimum length of a PTP V1 packet for offsets, etc. to be valid:
+ * includes IP header.
+ */
+#define PTP_V1_MIN_LENGTH 64
+
+#define PTP_V2_VERSION_LENGTH 1
+#define PTP_V2_VERSION_OFFSET 29
+
+#define PTP_V2_UUID_LENGTH 8
+#define PTP_V2_UUID_OFFSET 48
+
+/* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2),
+ * the MC only captures the last six bytes of the clock identity. These values
+ * reflect those, not the ones used in the standard. The standard permits
+ * mapping of V1 UUIDs to V2 UUIDs with these same values.
+ */
+#define PTP_V2_MC_UUID_LENGTH 6
+#define PTP_V2_MC_UUID_OFFSET 50
+
+#define PTP_V2_SEQUENCE_LENGTH 2
+#define PTP_V2_SEQUENCE_OFFSET 58
+
+/* The minimum length of a PTP V2 packet for offsets, etc. to be valid:
+ * includes IP header.
+ */
+#define PTP_V2_MIN_LENGTH 63
+
+#define PTP_MIN_LENGTH 63
+
+#define PTP_ADDRESS 0xe0000181 /* 224.0.1.129 */
+#define PTP_EVENT_PORT 319
+#define PTP_GENERAL_PORT 320
+
+/* Annoyingly the format of the version numbers are different between
+ * versions 1 and 2 so it isn't possible to simply look for 1 or 2.
+ */
+#define PTP_VERSION_V1 1
+
+#define PTP_VERSION_V2 2
+#define PTP_VERSION_V2_MASK 0x0f
+
+enum ptp_packet_state {
+ PTP_PACKET_STATE_UNMATCHED = 0,
+ PTP_PACKET_STATE_MATCHED,
+ PTP_PACKET_STATE_TIMED_OUT,
+ PTP_PACKET_STATE_MATCH_UNWANTED
+};
+
+/* NIC synchronised with single word of time only comprising
+ * partial seconds and full nanoseconds: 10^9 ~ 2^30 so 2 bits for seconds.
+ */
+#define MC_NANOSECOND_BITS 30
+#define MC_NANOSECOND_MASK ((1 << MC_NANOSECOND_BITS) - 1)
+#define MC_SECOND_MASK ((1 << (32 - MC_NANOSECOND_BITS)) - 1)
+
+/* Maximum parts-per-billion adjustment that is acceptable */
+#define MAX_PPB 1000000
+
+/* Precalculate scale word to avoid long long division at runtime */
+/* This is equivalent to 2^66 / 10^9. */
+#define PPB_SCALE_WORD ((1LL << (57)) / 1953125LL)
+
+/* How much to shift down after scaling to convert to FP40 */
+#define PPB_SHIFT_FP40 26
+/* ... and FP44. */
+#define PPB_SHIFT_FP44 22
+
+#define PTP_SYNC_ATTEMPTS 4
+
+/**
+ * struct efx_ptp_match - Matching structure, stored in sk_buff's cb area.
+ * @words: UUID and (partial) sequence number
+ * @expiry: Time after which the packet should be delivered irrespective of
+ * event arrival.
+ * @state: The state of the packet - whether it is ready for processing or
+ * whether that is of no interest.
+ */
+struct efx_ptp_match {
+ u32 words[DIV_ROUND_UP(PTP_V1_UUID_LENGTH, 4)];
+ unsigned long expiry;
+ enum ptp_packet_state state;
+};
+
+/**
+ * struct efx_ptp_event_rx - A PTP receive event (from MC)
+ * @link: list of events
+ * @seq0: First part of (PTP) UUID
+ * @seq1: Second part of (PTP) UUID and sequence number
+ * @hwtimestamp: Event timestamp
+ * @expiry: Time which the packet arrived
+ */
+struct efx_ptp_event_rx {
+ struct list_head link;
+ u32 seq0;
+ u32 seq1;
+ ktime_t hwtimestamp;
+ unsigned long expiry;
+};
+
+/**
+ * struct efx_ptp_timeset - Synchronisation between host and MC
+ * @host_start: Host time immediately before hardware timestamp taken
+ * @major: Hardware timestamp, major
+ * @minor: Hardware timestamp, minor
+ * @host_end: Host time immediately after hardware timestamp taken
+ * @wait: Number of NIC clock ticks between hardware timestamp being read and
+ * host end time being seen
+ * @window: Difference of host_end and host_start
+ * @valid: Whether this timeset is valid
+ */
+struct efx_ptp_timeset {
+ u32 host_start;
+ u32 major;
+ u32 minor;
+ u32 host_end;
+ u32 wait;
+ u32 window; /* Derived: end - start, allowing for wrap */
+};
+
+/**
+ * struct efx_ptp_data - Precision Time Protocol (PTP) state
+ * @efx: The NIC context
+ * @channel: The PTP channel (Siena only)
+ * @rx_ts_inline: Flag for whether RX timestamps are inline (else they are
+ * separate events)
+ * @rxq: Receive SKB queue (awaiting timestamps)
+ * @txq: Transmit SKB queue
+ * @evt_list: List of MC receive events awaiting packets
+ * @evt_free_list: List of free events
+ * @evt_lock: Lock for manipulating evt_list and evt_free_list
+ * @rx_evts: Instantiated events (on evt_list and evt_free_list)
+ * @workwq: Work queue for processing pending PTP operations
+ * @work: Work task
+ * @reset_required: A serious error has occurred and the PTP task needs to be
+ * reset (disable, enable).
+ * @rxfilter_event: Receive filter when operating
+ * @rxfilter_general: Receive filter when operating
+ * @rxfilter_installed: Receive filter installed
+ * @config: Current timestamp configuration
+ * @enabled: PTP operation enabled
+ * @mode: Mode in which PTP operating (PTP version)
+ * @ns_to_nic_time: Function to convert from scalar nanoseconds to NIC time
+ * @nic_to_kernel_time: Function to convert from NIC to kernel time
+ * @nic_time: contains time details
+ * @nic_time.minor_max: Wrap point for NIC minor times
+ * @nic_time.sync_event_diff_min: Minimum acceptable difference between time
+ * in packet prefix and last MCDI time sync event i.e. how much earlier than
+ * the last sync event time a packet timestamp can be.
+ * @nic_time.sync_event_diff_max: Maximum acceptable difference between time
+ * in packet prefix and last MCDI time sync event i.e. how much later than
+ * the last sync event time a packet timestamp can be.
+ * @nic_time.sync_event_minor_shift: Shift required to make minor time from
+ * field in MCDI time sync event.
+ * @min_synchronisation_ns: Minimum acceptable corrected sync window
+ * @capabilities: Capabilities flags from the NIC
+ * @ts_corrections: contains corrections details
+ * @ts_corrections.ptp_tx: Required driver correction of PTP packet transmit
+ * timestamps
+ * @ts_corrections.ptp_rx: Required driver correction of PTP packet receive
+ * timestamps
+ * @ts_corrections.pps_out: PPS output error (information only)
+ * @ts_corrections.pps_in: Required driver correction of PPS input timestamps
+ * @ts_corrections.general_tx: Required driver correction of general packet
+ * transmit timestamps
+ * @ts_corrections.general_rx: Required driver correction of general packet
+ * receive timestamps
+ * @evt_frags: Partly assembled PTP events
+ * @evt_frag_idx: Current fragment number
+ * @evt_code: Last event code
+ * @start: Address at which MC indicates ready for synchronisation
+ * @host_time_pps: Host time at last PPS
+ * @adjfreq_ppb_shift: Shift required to convert scaled parts-per-billion
+ * frequency adjustment into a fixed point fractional nanosecond format.
+ * @current_adjfreq: Current ppb adjustment.
+ * @phc_clock: Pointer to registered phc device (if primary function)
+ * @phc_clock_info: Registration structure for phc device
+ * @pps_work: pps work task for handling pps events
+ * @pps_workwq: pps work queue
+ * @nic_ts_enabled: Flag indicating if NIC generated TS events are handled
+ * @txbuf: Buffer for use when transmitting (PTP) packets to MC (avoids
+ * allocations in main data path).
+ * @good_syncs: Number of successful synchronisations.
+ * @fast_syncs: Number of synchronisations requiring short delay
+ * @bad_syncs: Number of failed synchronisations.
+ * @sync_timeouts: Number of synchronisation timeouts
+ * @no_time_syncs: Number of synchronisations with no good times.
+ * @invalid_sync_windows: Number of sync windows with bad durations.
+ * @undersize_sync_windows: Number of corrected sync windows that are too small
+ * @oversize_sync_windows: Number of corrected sync windows that are too large
+ * @rx_no_timestamp: Number of packets received without a timestamp.
+ * @timeset: Last set of synchronisation statistics.
+ * @xmit_skb: Transmit SKB function.
+ */
+struct efx_ptp_data {
+ struct efx_nic *efx;
+ struct efx_channel *channel;
+ bool rx_ts_inline;
+ struct sk_buff_head rxq;
+ struct sk_buff_head txq;
+ struct list_head evt_list;
+ struct list_head evt_free_list;
+ spinlock_t evt_lock;
+ struct efx_ptp_event_rx rx_evts[MAX_RECEIVE_EVENTS];
+ struct workqueue_struct *workwq;
+ struct work_struct work;
+ bool reset_required;
+ u32 rxfilter_event;
+ u32 rxfilter_general;
+ bool rxfilter_installed;
+ struct hwtstamp_config config;
+ bool enabled;
+ unsigned int mode;
+ void (*ns_to_nic_time)(s64 ns, u32 *nic_major, u32 *nic_minor);
+ ktime_t (*nic_to_kernel_time)(u32 nic_major, u32 nic_minor,
+ s32 correction);
+ struct {
+ u32 minor_max;
+ u32 sync_event_diff_min;
+ u32 sync_event_diff_max;
+ unsigned int sync_event_minor_shift;
+ } nic_time;
+ unsigned int min_synchronisation_ns;
+ unsigned int capabilities;
+ struct {
+ s32 ptp_tx;
+ s32 ptp_rx;
+ s32 pps_out;
+ s32 pps_in;
+ s32 general_tx;
+ s32 general_rx;
+ } ts_corrections;
+ efx_qword_t evt_frags[MAX_EVENT_FRAGS];
+ int evt_frag_idx;
+ int evt_code;
+ struct efx_buffer start;
+ struct pps_event_time host_time_pps;
+ unsigned int adjfreq_ppb_shift;
+ s64 current_adjfreq;
+ struct ptp_clock *phc_clock;
+ struct ptp_clock_info phc_clock_info;
+ struct work_struct pps_work;
+ struct workqueue_struct *pps_workwq;
+ bool nic_ts_enabled;
+ efx_dword_t txbuf[MCDI_TX_BUF_LEN(MC_CMD_PTP_IN_TRANSMIT_LENMAX)];
+
+ unsigned int good_syncs;
+ unsigned int fast_syncs;
+ unsigned int bad_syncs;
+ unsigned int sync_timeouts;
+ unsigned int no_time_syncs;
+ unsigned int invalid_sync_windows;
+ unsigned int undersize_sync_windows;
+ unsigned int oversize_sync_windows;
+ unsigned int rx_no_timestamp;
+ struct efx_ptp_timeset
+ timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM];
+ void (*xmit_skb)(struct efx_nic *efx, struct sk_buff *skb);
+};
+
+static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta);
+static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta);
+static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts);
+static int efx_phc_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *e_ts);
+static int efx_phc_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *request, int on);
+
+bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx)
+{
+ return efx_has_cap(efx, TX_MAC_TIMESTAMPING);
+}
+
+/* PTP 'extra' channel is still a traffic channel, but we only create TX queues
+ * if PTP uses MAC TX timestamps, not if PTP uses the MC directly to transmit.
+ */
+static bool efx_ptp_want_txqs(struct efx_channel *channel)
+{
+ return efx_ptp_use_mac_tx_timestamps(channel->efx);
+}
+
+#define PTP_SW_STAT(ext_name, field_name) \
+ { #ext_name, 0, offsetof(struct efx_ptp_data, field_name) }
+#define PTP_MC_STAT(ext_name, mcdi_name) \
+ { #ext_name, 32, MC_CMD_PTP_OUT_STATUS_STATS_ ## mcdi_name ## _OFST }
+static const struct efx_hw_stat_desc efx_ptp_stat_desc[] = {
+ PTP_SW_STAT(ptp_good_syncs, good_syncs),
+ PTP_SW_STAT(ptp_fast_syncs, fast_syncs),
+ PTP_SW_STAT(ptp_bad_syncs, bad_syncs),
+ PTP_SW_STAT(ptp_sync_timeouts, sync_timeouts),
+ PTP_SW_STAT(ptp_no_time_syncs, no_time_syncs),
+ PTP_SW_STAT(ptp_invalid_sync_windows, invalid_sync_windows),
+ PTP_SW_STAT(ptp_undersize_sync_windows, undersize_sync_windows),
+ PTP_SW_STAT(ptp_oversize_sync_windows, oversize_sync_windows),
+ PTP_SW_STAT(ptp_rx_no_timestamp, rx_no_timestamp),
+ PTP_MC_STAT(ptp_tx_timestamp_packets, TX),
+ PTP_MC_STAT(ptp_rx_timestamp_packets, RX),
+ PTP_MC_STAT(ptp_timestamp_packets, TS),
+ PTP_MC_STAT(ptp_filter_matches, FM),
+ PTP_MC_STAT(ptp_non_filter_matches, NFM),
+};
+#define PTP_STAT_COUNT ARRAY_SIZE(efx_ptp_stat_desc)
+static const unsigned long efx_ptp_stat_mask[] = {
+ [0 ... BITS_TO_LONGS(PTP_STAT_COUNT) - 1] = ~0UL,
+};
+
+size_t efx_ptp_describe_stats(struct efx_nic *efx, u8 *strings)
+{
+ if (!efx->ptp_data)
+ return 0;
+
+ return efx_nic_describe_stats(efx_ptp_stat_desc, PTP_STAT_COUNT,
+ efx_ptp_stat_mask, strings);
+}
+
+size_t efx_ptp_update_stats(struct efx_nic *efx, u64 *stats)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_STATUS_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_STATUS_LEN);
+ size_t i;
+ int rc;
+
+ if (!efx->ptp_data)
+ return 0;
+
+ /* Copy software statistics */
+ for (i = 0; i < PTP_STAT_COUNT; i++) {
+ if (efx_ptp_stat_desc[i].dma_width)
+ continue;
+ stats[i] = *(unsigned int *)((char *)efx->ptp_data +
+ efx_ptp_stat_desc[i].offset);
+ }
+
+ /* Fetch MC statistics. We *must* fill in all statistics or
+ * risk leaking kernel memory to userland, so if the MCDI
+ * request fails we pretend we got zeroes.
+ */
+ MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_STATUS);
+ MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ memset(outbuf, 0, sizeof(outbuf));
+ efx_nic_update_stats(efx_ptp_stat_desc, PTP_STAT_COUNT,
+ efx_ptp_stat_mask,
+ stats, _MCDI_PTR(outbuf, 0), false);
+
+ return PTP_STAT_COUNT;
+}
+
+/* For Siena platforms NIC time is s and ns */
+static void efx_ptp_ns_to_s_ns(s64 ns, u32 *nic_major, u32 *nic_minor)
+{
+ struct timespec64 ts = ns_to_timespec64(ns);
+ *nic_major = (u32)ts.tv_sec;
+ *nic_minor = ts.tv_nsec;
+}
+
+static ktime_t efx_ptp_s_ns_to_ktime_correction(u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ ktime_t kt = ktime_set(nic_major, nic_minor);
+ if (correction >= 0)
+ kt = ktime_add_ns(kt, (u64)correction);
+ else
+ kt = ktime_sub_ns(kt, (u64)-correction);
+ return kt;
+}
+
+/* To convert from s27 format to ns we multiply then divide by a power of 2.
+ * For the conversion from ns to s27, the operation is also converted to a
+ * multiply and shift.
+ */
+#define S27_TO_NS_SHIFT (27)
+#define NS_TO_S27_MULT (((1ULL << 63) + NSEC_PER_SEC / 2) / NSEC_PER_SEC)
+#define NS_TO_S27_SHIFT (63 - S27_TO_NS_SHIFT)
+#define S27_MINOR_MAX (1 << S27_TO_NS_SHIFT)
+
+/* For Huntington platforms NIC time is in seconds and fractions of a second
+ * where the minor register only uses 27 bits in units of 2^-27s.
+ */
+static void efx_ptp_ns_to_s27(s64 ns, u32 *nic_major, u32 *nic_minor)
+{
+ struct timespec64 ts = ns_to_timespec64(ns);
+ u32 maj = (u32)ts.tv_sec;
+ u32 min = (u32)(((u64)ts.tv_nsec * NS_TO_S27_MULT +
+ (1ULL << (NS_TO_S27_SHIFT - 1))) >> NS_TO_S27_SHIFT);
+
+ /* The conversion can result in the minor value exceeding the maximum.
+ * In this case, round up to the next second.
+ */
+ if (min >= S27_MINOR_MAX) {
+ min -= S27_MINOR_MAX;
+ maj++;
+ }
+
+ *nic_major = maj;
+ *nic_minor = min;
+}
+
+static inline ktime_t efx_ptp_s27_to_ktime(u32 nic_major, u32 nic_minor)
+{
+ u32 ns = (u32)(((u64)nic_minor * NSEC_PER_SEC +
+ (1ULL << (S27_TO_NS_SHIFT - 1))) >> S27_TO_NS_SHIFT);
+ return ktime_set(nic_major, ns);
+}
+
+static ktime_t efx_ptp_s27_to_ktime_correction(u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ /* Apply the correction and deal with carry */
+ nic_minor += correction;
+ if ((s32)nic_minor < 0) {
+ nic_minor += S27_MINOR_MAX;
+ nic_major--;
+ } else if (nic_minor >= S27_MINOR_MAX) {
+ nic_minor -= S27_MINOR_MAX;
+ nic_major++;
+ }
+
+ return efx_ptp_s27_to_ktime(nic_major, nic_minor);
+}
+
+/* For Medford2 platforms the time is in seconds and quarter nanoseconds. */
+static void efx_ptp_ns_to_s_qns(s64 ns, u32 *nic_major, u32 *nic_minor)
+{
+ struct timespec64 ts = ns_to_timespec64(ns);
+
+ *nic_major = (u32)ts.tv_sec;
+ *nic_minor = ts.tv_nsec * 4;
+}
+
+static ktime_t efx_ptp_s_qns_to_ktime_correction(u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ ktime_t kt;
+
+ nic_minor = DIV_ROUND_CLOSEST(nic_minor, 4);
+ correction = DIV_ROUND_CLOSEST(correction, 4);
+
+ kt = ktime_set(nic_major, nic_minor);
+
+ if (correction >= 0)
+ kt = ktime_add_ns(kt, (u64)correction);
+ else
+ kt = ktime_sub_ns(kt, (u64)-correction);
+ return kt;
+}
+
+struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
+{
+ return efx->ptp_data ? efx->ptp_data->channel : NULL;
+}
+
+static u32 last_sync_timestamp_major(struct efx_nic *efx)
+{
+ struct efx_channel *channel = efx_ptp_channel(efx);
+ u32 major = 0;
+
+ if (channel)
+ major = channel->sync_timestamp_major;
+ return major;
+}
+
+/* The 8000 series and later can provide the time from the MAC, which is only
+ * 48 bits long and provides meta-information in the top 2 bits.
+ */
+static ktime_t
+efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx,
+ struct efx_ptp_data *ptp,
+ u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ u32 sync_timestamp;
+ ktime_t kt = { 0 };
+ s16 delta;
+
+ if (!(nic_major & 0x80000000)) {
+ WARN_ON_ONCE(nic_major >> 16);
+
+ /* Medford provides 48 bits of timestamp, so we must get the top
+ * 16 bits from the timesync event state.
+ *
+ * We only have the lower 16 bits of the time now, but we do
+ * have a full resolution timestamp at some point in past. As
+ * long as the difference between the (real) now and the sync
+ * is less than 2^15, then we can reconstruct the difference
+ * between those two numbers using only the lower 16 bits of
+ * each.
+ *
+ * Put another way
+ *
+ * a - b = ((a mod k) - b) mod k
+ *
+ * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know
+ * (a mod k) and b, so can calculate the delta, a - b.
+ *
+ */
+ sync_timestamp = last_sync_timestamp_major(efx);
+
+ /* Because delta is s16 this does an implicit mask down to
+ * 16 bits which is what we need, assuming
+ * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that
+ * we can deal with the (unlikely) case of sync timestamps
+ * arriving from the future.
+ */
+ delta = nic_major - sync_timestamp;
+
+ /* Recover the fully specified time now, by applying the offset
+ * to the (fully specified) sync time.
+ */
+ nic_major = sync_timestamp + delta;
+
+ kt = ptp->nic_to_kernel_time(nic_major, nic_minor,
+ correction);
+ }
+ return kt;
+}
+
+ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ ktime_t kt;
+
+ if (efx_ptp_use_mac_tx_timestamps(efx))
+ kt = efx_ptp_mac_nic_to_ktime_correction(efx, ptp,
+ tx_queue->completed_timestamp_major,
+ tx_queue->completed_timestamp_minor,
+ ptp->ts_corrections.general_tx);
+ else
+ kt = ptp->nic_to_kernel_time(
+ tx_queue->completed_timestamp_major,
+ tx_queue->completed_timestamp_minor,
+ ptp->ts_corrections.general_tx);
+ return kt;
+}
+
+/* Get PTP attributes and set up time conversions */
+static int efx_ptp_get_attributes(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_ATTRIBUTES_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN);
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ int rc;
+ u32 fmt;
+ size_t out_len;
+
+ /* Get the PTP attributes. If the NIC doesn't support the operation we
+ * use the default format for compatibility with older NICs i.e.
+ * seconds and nanoseconds.
+ */
+ MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_GET_ATTRIBUTES);
+ MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &out_len);
+ if (rc == 0) {
+ fmt = MCDI_DWORD(outbuf, PTP_OUT_GET_ATTRIBUTES_TIME_FORMAT);
+ } else if (rc == -EINVAL) {
+ fmt = MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS;
+ } else if (rc == -EPERM) {
+ pci_info(efx->pci_dev, "no PTP support\n");
+ return rc;
+ } else {
+ efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf),
+ outbuf, sizeof(outbuf), rc);
+ return rc;
+ }
+
+ switch (fmt) {
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION:
+ ptp->ns_to_nic_time = efx_ptp_ns_to_s27;
+ ptp->nic_to_kernel_time = efx_ptp_s27_to_ktime_correction;
+ ptp->nic_time.minor_max = 1 << 27;
+ ptp->nic_time.sync_event_minor_shift = 19;
+ break;
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS:
+ ptp->ns_to_nic_time = efx_ptp_ns_to_s_ns;
+ ptp->nic_to_kernel_time = efx_ptp_s_ns_to_ktime_correction;
+ ptp->nic_time.minor_max = 1000000000;
+ ptp->nic_time.sync_event_minor_shift = 22;
+ break;
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_QTR_NANOSECONDS:
+ ptp->ns_to_nic_time = efx_ptp_ns_to_s_qns;
+ ptp->nic_to_kernel_time = efx_ptp_s_qns_to_ktime_correction;
+ ptp->nic_time.minor_max = 4000000000UL;
+ ptp->nic_time.sync_event_minor_shift = 24;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /* Precalculate acceptable difference between the minor time in the
+ * packet prefix and the last MCDI time sync event. We expect the
+ * packet prefix timestamp to be after of sync event by up to one
+ * sync event interval (0.25s) but we allow it to exceed this by a
+ * fuzz factor of (0.1s)
+ */
+ ptp->nic_time.sync_event_diff_min = ptp->nic_time.minor_max
+ - (ptp->nic_time.minor_max / 10);
+ ptp->nic_time.sync_event_diff_max = (ptp->nic_time.minor_max / 4)
+ + (ptp->nic_time.minor_max / 10);
+
+ /* MC_CMD_PTP_OP_GET_ATTRIBUTES has been extended twice from an older
+ * operation MC_CMD_PTP_OP_GET_TIME_FORMAT. The function now may return
+ * a value to use for the minimum acceptable corrected synchronization
+ * window and may return further capabilities.
+ * If we have the extra information store it. For older firmware that
+ * does not implement the extended command use the default value.
+ */
+ if (rc == 0 &&
+ out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST)
+ ptp->min_synchronisation_ns =
+ MCDI_DWORD(outbuf,
+ PTP_OUT_GET_ATTRIBUTES_SYNC_WINDOW_MIN);
+ else
+ ptp->min_synchronisation_ns = DEFAULT_MIN_SYNCHRONISATION_NS;
+
+ if (rc == 0 &&
+ out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN)
+ ptp->capabilities = MCDI_DWORD(outbuf,
+ PTP_OUT_GET_ATTRIBUTES_CAPABILITIES);
+ else
+ ptp->capabilities = 0;
+
+ /* Set up the shift for conversion between frequency
+ * adjustments in parts-per-billion and the fixed-point
+ * fractional ns format that the adapter uses.
+ */
+ if (ptp->capabilities & (1 << MC_CMD_PTP_OUT_GET_ATTRIBUTES_FP44_FREQ_ADJ_LBN))
+ ptp->adjfreq_ppb_shift = PPB_SHIFT_FP44;
+ else
+ ptp->adjfreq_ppb_shift = PPB_SHIFT_FP40;
+
+ return 0;
+}
+
+/* Get PTP timestamp corrections */
+static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_TIMESTAMP_CORRECTIONS_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN);
+ int rc;
+ size_t out_len;
+
+ /* Get the timestamp corrections from the NIC. If this operation is
+ * not supported (older NICs) then no correction is required.
+ */
+ MCDI_SET_DWORD(inbuf, PTP_IN_OP,
+ MC_CMD_PTP_OP_GET_TIMESTAMP_CORRECTIONS);
+ MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &out_len);
+ if (rc == 0) {
+ efx->ptp_data->ts_corrections.ptp_tx = MCDI_DWORD(outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT);
+ efx->ptp_data->ts_corrections.ptp_rx = MCDI_DWORD(outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_RECEIVE);
+ efx->ptp_data->ts_corrections.pps_out = MCDI_DWORD(outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_OUT);
+ efx->ptp_data->ts_corrections.pps_in = MCDI_DWORD(outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_IN);
+
+ if (out_len >= MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN) {
+ efx->ptp_data->ts_corrections.general_tx = MCDI_DWORD(
+ outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_TX);
+ efx->ptp_data->ts_corrections.general_rx = MCDI_DWORD(
+ outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_RX);
+ } else {
+ efx->ptp_data->ts_corrections.general_tx =
+ efx->ptp_data->ts_corrections.ptp_tx;
+ efx->ptp_data->ts_corrections.general_rx =
+ efx->ptp_data->ts_corrections.ptp_rx;
+ }
+ } else if (rc == -EINVAL) {
+ efx->ptp_data->ts_corrections.ptp_tx = 0;
+ efx->ptp_data->ts_corrections.ptp_rx = 0;
+ efx->ptp_data->ts_corrections.pps_out = 0;
+ efx->ptp_data->ts_corrections.pps_in = 0;
+ efx->ptp_data->ts_corrections.general_tx = 0;
+ efx->ptp_data->ts_corrections.general_rx = 0;
+ } else {
+ efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), outbuf,
+ sizeof(outbuf), rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/* Enable MCDI PTP support. */
+static int efx_ptp_enable(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_ENABLE_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ENABLE);
+ MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
+ MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_QUEUE,
+ efx->ptp_data->channel ?
+ efx->ptp_data->channel->channel : 0);
+ MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_MODE, efx->ptp_data->mode);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), NULL);
+ rc = (rc == -EALREADY) ? 0 : rc;
+ if (rc)
+ efx_mcdi_display_error(efx, MC_CMD_PTP,
+ MC_CMD_PTP_IN_ENABLE_LEN,
+ outbuf, sizeof(outbuf), rc);
+ return rc;
+}
+
+/* Disable MCDI PTP support.
+ *
+ * Note that this function should never rely on the presence of ptp_data -
+ * may be called before that exists.
+ */
+static int efx_ptp_disable(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_DISABLE_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_DISABLE);
+ MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), NULL);
+ rc = (rc == -EALREADY) ? 0 : rc;
+ /* If we get ENOSYS, the NIC doesn't support PTP, and thus this function
+ * should only have been called during probe.
+ */
+ if (rc == -ENOSYS || rc == -EPERM)
+ pci_info(efx->pci_dev, "no PTP support\n");
+ else if (rc)
+ efx_mcdi_display_error(efx, MC_CMD_PTP,
+ MC_CMD_PTP_IN_DISABLE_LEN,
+ outbuf, sizeof(outbuf), rc);
+ return rc;
+}
+
+static void efx_ptp_deliver_rx_queue(struct sk_buff_head *q)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(q))) {
+ local_bh_disable();
+ netif_receive_skb(skb);
+ local_bh_enable();
+ }
+}
+
+static void efx_ptp_handle_no_channel(struct efx_nic *efx)
+{
+ netif_err(efx, drv, efx->net_dev,
+ "ERROR: PTP requires MSI-X and 1 additional interrupt"
+ "vector. PTP disabled\n");
+}
+
+/* Repeatedly send the host time to the MC which will capture the hardware
+ * time.
+ */
+static void efx_ptp_send_times(struct efx_nic *efx,
+ struct pps_event_time *last_time)
+{
+ struct pps_event_time now;
+ struct timespec64 limit;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ int *mc_running = ptp->start.addr;
+
+ pps_get_ts(&now);
+ limit = now.ts_real;
+ timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
+
+ /* Write host time for specified period or until MC is done */
+ while ((timespec64_compare(&now.ts_real, &limit) < 0) &&
+ READ_ONCE(*mc_running)) {
+ struct timespec64 update_time;
+ unsigned int host_time;
+
+ /* Don't update continuously to avoid saturating the PCIe bus */
+ update_time = now.ts_real;
+ timespec64_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS);
+ do {
+ pps_get_ts(&now);
+ } while ((timespec64_compare(&now.ts_real, &update_time) < 0) &&
+ READ_ONCE(*mc_running));
+
+ /* Synchronise NIC with single word of time only */
+ host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS |
+ now.ts_real.tv_nsec);
+ /* Update host time in NIC memory */
+ efx->type->ptp_write_host_time(efx, host_time);
+ }
+ *last_time = now;
+}
+
+/* Read a timeset from the MC's results and partial process. */
+static void efx_ptp_read_timeset(MCDI_DECLARE_STRUCT_PTR(data),
+ struct efx_ptp_timeset *timeset)
+{
+ unsigned start_ns, end_ns;
+
+ timeset->host_start = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTSTART);
+ timeset->major = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MAJOR);
+ timeset->minor = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MINOR);
+ timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND),
+ timeset->wait = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_WAITNS);
+
+ /* Ignore seconds */
+ start_ns = timeset->host_start & MC_NANOSECOND_MASK;
+ end_ns = timeset->host_end & MC_NANOSECOND_MASK;
+ /* Allow for rollover */
+ if (end_ns < start_ns)
+ end_ns += NSEC_PER_SEC;
+ /* Determine duration of operation */
+ timeset->window = end_ns - start_ns;
+}
+
+/* Process times received from MC.
+ *
+ * Extract times from returned results, and establish the minimum value
+ * seen. The minimum value represents the "best" possible time and events
+ * too much greater than this are rejected - the machine is, perhaps, too
+ * busy. A number of readings are taken so that, hopefully, at least one good
+ * synchronisation will be seen in the results.
+ */
+static int
+efx_ptp_process_times(struct efx_nic *efx, MCDI_DECLARE_STRUCT_PTR(synch_buf),
+ size_t response_length,
+ const struct pps_event_time *last_time)
+{
+ unsigned number_readings =
+ MCDI_VAR_ARRAY_LEN(response_length,
+ PTP_OUT_SYNCHRONIZE_TIMESET);
+ unsigned i;
+ unsigned ngood = 0;
+ unsigned last_good = 0;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ u32 last_sec;
+ u32 start_sec;
+ struct timespec64 delta;
+ ktime_t mc_time;
+
+ if (number_readings == 0)
+ return -EAGAIN;
+
+ /* Read the set of results and find the last good host-MC
+ * synchronization result. The MC times when it finishes reading the
+ * host time so the corrected window time should be fairly constant
+ * for a given platform. Increment stats for any results that appear
+ * to be erroneous.
+ */
+ for (i = 0; i < number_readings; i++) {
+ s32 window, corrected;
+ struct timespec64 wait;
+
+ efx_ptp_read_timeset(
+ MCDI_ARRAY_STRUCT_PTR(synch_buf,
+ PTP_OUT_SYNCHRONIZE_TIMESET, i),
+ &ptp->timeset[i]);
+
+ wait = ktime_to_timespec64(
+ ptp->nic_to_kernel_time(0, ptp->timeset[i].wait, 0));
+ window = ptp->timeset[i].window;
+ corrected = window - wait.tv_nsec;
+
+ /* We expect the uncorrected synchronization window to be at
+ * least as large as the interval between host start and end
+ * times. If it is smaller than this then this is mostly likely
+ * to be a consequence of the host's time being adjusted.
+ * Check that the corrected sync window is in a reasonable
+ * range. If it is out of range it is likely to be because an
+ * interrupt or other delay occurred between reading the system
+ * time and writing it to MC memory.
+ */
+ if (window < SYNCHRONISATION_GRANULARITY_NS) {
+ ++ptp->invalid_sync_windows;
+ } else if (corrected >= MAX_SYNCHRONISATION_NS) {
+ ++ptp->oversize_sync_windows;
+ } else if (corrected < ptp->min_synchronisation_ns) {
+ ++ptp->undersize_sync_windows;
+ } else {
+ ngood++;
+ last_good = i;
+ }
+ }
+
+ if (ngood == 0) {
+ netif_warn(efx, drv, efx->net_dev,
+ "PTP no suitable synchronisations\n");
+ return -EAGAIN;
+ }
+
+ /* Calculate delay from last good sync (host time) to last_time.
+ * It is possible that the seconds rolled over between taking
+ * the start reading and the last value written by the host. The
+ * timescales are such that a gap of more than one second is never
+ * expected. delta is *not* normalised.
+ */
+ start_sec = ptp->timeset[last_good].host_start >> MC_NANOSECOND_BITS;
+ last_sec = last_time->ts_real.tv_sec & MC_SECOND_MASK;
+ if (start_sec != last_sec &&
+ ((start_sec + 1) & MC_SECOND_MASK) != last_sec) {
+ netif_warn(efx, hw, efx->net_dev,
+ "PTP bad synchronisation seconds\n");
+ return -EAGAIN;
+ }
+ delta.tv_sec = (last_sec - start_sec) & 1;
+ delta.tv_nsec =
+ last_time->ts_real.tv_nsec -
+ (ptp->timeset[last_good].host_start & MC_NANOSECOND_MASK);
+
+ /* Convert the NIC time at last good sync into kernel time.
+ * No correction is required - this time is the output of a
+ * firmware process.
+ */
+ mc_time = ptp->nic_to_kernel_time(ptp->timeset[last_good].major,
+ ptp->timeset[last_good].minor, 0);
+
+ /* Calculate delay from NIC top of second to last_time */
+ delta.tv_nsec += ktime_to_timespec64(mc_time).tv_nsec;
+
+ /* Set PPS timestamp to match NIC top of second */
+ ptp->host_time_pps = *last_time;
+ pps_sub_ts(&ptp->host_time_pps, delta);
+
+ return 0;
+}
+
+/* Synchronize times between the host and the MC */
+static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ MCDI_DECLARE_BUF(synch_buf, MC_CMD_PTP_OUT_SYNCHRONIZE_LENMAX);
+ size_t response_length;
+ int rc;
+ unsigned long timeout;
+ struct pps_event_time last_time = {};
+ unsigned int loops = 0;
+ int *start = ptp->start.addr;
+
+ MCDI_SET_DWORD(synch_buf, PTP_IN_OP, MC_CMD_PTP_OP_SYNCHRONIZE);
+ MCDI_SET_DWORD(synch_buf, PTP_IN_PERIPH_ID, 0);
+ MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_NUMTIMESETS,
+ num_readings);
+ MCDI_SET_QWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR,
+ ptp->start.dma_addr);
+
+ /* Clear flag that signals MC ready */
+ WRITE_ONCE(*start, 0);
+ rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf,
+ MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
+ EFX_WARN_ON_ONCE_PARANOID(rc);
+
+ /* Wait for start from MCDI (or timeout) */
+ timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS);
+ while (!READ_ONCE(*start) && (time_before(jiffies, timeout))) {
+ udelay(20); /* Usually start MCDI execution quickly */
+ loops++;
+ }
+
+ if (loops <= 1)
+ ++ptp->fast_syncs;
+ if (!time_before(jiffies, timeout))
+ ++ptp->sync_timeouts;
+
+ if (READ_ONCE(*start))
+ efx_ptp_send_times(efx, &last_time);
+
+ /* Collect results */
+ rc = efx_mcdi_rpc_finish(efx, MC_CMD_PTP,
+ MC_CMD_PTP_IN_SYNCHRONIZE_LEN,
+ synch_buf, sizeof(synch_buf),
+ &response_length);
+ if (rc == 0) {
+ rc = efx_ptp_process_times(efx, synch_buf, response_length,
+ &last_time);
+ if (rc == 0)
+ ++ptp->good_syncs;
+ else
+ ++ptp->no_time_syncs;
+ }
+
+ /* Increment the bad syncs counter if the synchronize fails, whatever
+ * the reason.
+ */
+ if (rc != 0)
+ ++ptp->bad_syncs;
+
+ return rc;
+}
+
+/* Transmit a PTP packet via the dedicated hardware timestamped queue. */
+static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+{
+ struct efx_ptp_data *ptp_data = efx->ptp_data;
+ u8 type = efx_tx_csum_type_skb(skb);
+ struct efx_tx_queue *tx_queue;
+
+ tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
+ if (tx_queue && tx_queue->timestamping) {
+ efx_enqueue_skb(tx_queue, skb);
+ } else {
+ WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+ dev_kfree_skb_any(skb);
+ }
+}
+
+/* Transmit a PTP packet, via the MCDI interface, to the wire. */
+static void efx_ptp_xmit_skb_mc(struct efx_nic *efx, struct sk_buff *skb)
+{
+ struct efx_ptp_data *ptp_data = efx->ptp_data;
+ struct skb_shared_hwtstamps timestamps;
+ int rc = -EIO;
+ MCDI_DECLARE_BUF(txtime, MC_CMD_PTP_OUT_TRANSMIT_LEN);
+ size_t len;
+
+ MCDI_SET_DWORD(ptp_data->txbuf, PTP_IN_OP, MC_CMD_PTP_OP_TRANSMIT);
+ MCDI_SET_DWORD(ptp_data->txbuf, PTP_IN_PERIPH_ID, 0);
+ MCDI_SET_DWORD(ptp_data->txbuf, PTP_IN_TRANSMIT_LENGTH, skb->len);
+ if (skb_shinfo(skb)->nr_frags != 0) {
+ rc = skb_linearize(skb);
+ if (rc != 0)
+ goto fail;
+ }
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ rc = skb_checksum_help(skb);
+ if (rc != 0)
+ goto fail;
+ }
+ skb_copy_from_linear_data(skb,
+ MCDI_PTR(ptp_data->txbuf,
+ PTP_IN_TRANSMIT_PACKET),
+ skb->len);
+ rc = efx_mcdi_rpc(efx, MC_CMD_PTP,
+ ptp_data->txbuf, MC_CMD_PTP_IN_TRANSMIT_LEN(skb->len),
+ txtime, sizeof(txtime), &len);
+ if (rc != 0)
+ goto fail;
+
+ memset(&timestamps, 0, sizeof(timestamps));
+ timestamps.hwtstamp = ptp_data->nic_to_kernel_time(
+ MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MAJOR),
+ MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MINOR),
+ ptp_data->ts_corrections.ptp_tx);
+
+ skb_tstamp_tx(skb, &timestamps);
+
+ rc = 0;
+
+fail:
+ dev_kfree_skb_any(skb);
+
+ return;
+}
+
+static void efx_ptp_drop_time_expired_events(struct efx_nic *efx)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ struct list_head *cursor;
+ struct list_head *next;
+
+ if (ptp->rx_ts_inline)
+ return;
+
+ /* Drop time-expired events */
+ spin_lock_bh(&ptp->evt_lock);
+ list_for_each_safe(cursor, next, &ptp->evt_list) {
+ struct efx_ptp_event_rx *evt;
+
+ evt = list_entry(cursor, struct efx_ptp_event_rx,
+ link);
+ if (time_after(jiffies, evt->expiry)) {
+ list_move(&evt->link, &ptp->evt_free_list);
+ netif_warn(efx, hw, efx->net_dev,
+ "PTP rx event dropped\n");
+ }
+ }
+ spin_unlock_bh(&ptp->evt_lock);
+}
+
+static enum ptp_packet_state efx_ptp_match_rx(struct efx_nic *efx,
+ struct sk_buff *skb)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ bool evts_waiting;
+ struct list_head *cursor;
+ struct list_head *next;
+ struct efx_ptp_match *match;
+ enum ptp_packet_state rc = PTP_PACKET_STATE_UNMATCHED;
+
+ WARN_ON_ONCE(ptp->rx_ts_inline);
+
+ spin_lock_bh(&ptp->evt_lock);
+ evts_waiting = !list_empty(&ptp->evt_list);
+ spin_unlock_bh(&ptp->evt_lock);
+
+ if (!evts_waiting)
+ return PTP_PACKET_STATE_UNMATCHED;
+
+ match = (struct efx_ptp_match *)skb->cb;
+ /* Look for a matching timestamp in the event queue */
+ spin_lock_bh(&ptp->evt_lock);
+ list_for_each_safe(cursor, next, &ptp->evt_list) {
+ struct efx_ptp_event_rx *evt;
+
+ evt = list_entry(cursor, struct efx_ptp_event_rx, link);
+ if ((evt->seq0 == match->words[0]) &&
+ (evt->seq1 == match->words[1])) {
+ struct skb_shared_hwtstamps *timestamps;
+
+ /* Match - add in hardware timestamp */
+ timestamps = skb_hwtstamps(skb);
+ timestamps->hwtstamp = evt->hwtimestamp;
+
+ match->state = PTP_PACKET_STATE_MATCHED;
+ rc = PTP_PACKET_STATE_MATCHED;
+ list_move(&evt->link, &ptp->evt_free_list);
+ break;
+ }
+ }
+ spin_unlock_bh(&ptp->evt_lock);
+
+ return rc;
+}
+
+/* Process any queued receive events and corresponding packets
+ *
+ * q is returned with all the packets that are ready for delivery.
+ */
+static void efx_ptp_process_events(struct efx_nic *efx, struct sk_buff_head *q)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&ptp->rxq))) {
+ struct efx_ptp_match *match;
+
+ match = (struct efx_ptp_match *)skb->cb;
+ if (match->state == PTP_PACKET_STATE_MATCH_UNWANTED) {
+ __skb_queue_tail(q, skb);
+ } else if (efx_ptp_match_rx(efx, skb) ==
+ PTP_PACKET_STATE_MATCHED) {
+ __skb_queue_tail(q, skb);
+ } else if (time_after(jiffies, match->expiry)) {
+ match->state = PTP_PACKET_STATE_TIMED_OUT;
+ ++ptp->rx_no_timestamp;
+ __skb_queue_tail(q, skb);
+ } else {
+ /* Replace unprocessed entry and stop */
+ skb_queue_head(&ptp->rxq, skb);
+ break;
+ }
+ }
+}
+
+/* Complete processing of a received packet */
+static inline void efx_ptp_process_rx(struct efx_nic *efx, struct sk_buff *skb)
+{
+ local_bh_disable();
+ netif_receive_skb(skb);
+ local_bh_enable();
+}
+
+static void efx_ptp_remove_multicast_filters(struct efx_nic *efx)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+
+ if (ptp->rxfilter_installed) {
+ efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+ ptp->rxfilter_general);
+ efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+ ptp->rxfilter_event);
+ ptp->rxfilter_installed = false;
+ }
+}
+
+static int efx_ptp_insert_multicast_filters(struct efx_nic *efx)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ struct efx_filter_spec rxfilter;
+ int rc;
+
+ if (!ptp->channel || ptp->rxfilter_installed)
+ return 0;
+
+ /* Must filter on both event and general ports to ensure
+ * that there is no packet re-ordering.
+ */
+ efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0,
+ efx_rx_queue_index(
+ efx_channel_get_rx_queue(ptp->channel)));
+ rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP,
+ htonl(PTP_ADDRESS),
+ htons(PTP_EVENT_PORT));
+ if (rc != 0)
+ return rc;
+
+ rc = efx_filter_insert_filter(efx, &rxfilter, true);
+ if (rc < 0)
+ return rc;
+ ptp->rxfilter_event = rc;
+
+ efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0,
+ efx_rx_queue_index(
+ efx_channel_get_rx_queue(ptp->channel)));
+ rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP,
+ htonl(PTP_ADDRESS),
+ htons(PTP_GENERAL_PORT));
+ if (rc != 0)
+ goto fail;
+
+ rc = efx_filter_insert_filter(efx, &rxfilter, true);
+ if (rc < 0)
+ goto fail;
+ ptp->rxfilter_general = rc;
+
+ ptp->rxfilter_installed = true;
+ return 0;
+
+fail:
+ efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+ ptp->rxfilter_event);
+ return rc;
+}
+
+static int efx_ptp_start(struct efx_nic *efx)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ int rc;
+
+ ptp->reset_required = false;
+
+ rc = efx_ptp_insert_multicast_filters(efx);
+ if (rc)
+ return rc;
+
+ rc = efx_ptp_enable(efx);
+ if (rc != 0)
+ goto fail;
+
+ ptp->evt_frag_idx = 0;
+ ptp->current_adjfreq = 0;
+
+ return 0;
+
+fail:
+ efx_ptp_remove_multicast_filters(efx);
+ return rc;
+}
+
+static int efx_ptp_stop(struct efx_nic *efx)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ struct list_head *cursor;
+ struct list_head *next;
+ int rc;
+
+ if (ptp == NULL)
+ return 0;
+
+ rc = efx_ptp_disable(efx);
+
+ efx_ptp_remove_multicast_filters(efx);
+
+ /* Make sure RX packets are really delivered */
+ efx_ptp_deliver_rx_queue(&efx->ptp_data->rxq);
+ skb_queue_purge(&efx->ptp_data->txq);
+
+ /* Drop any pending receive events */
+ spin_lock_bh(&efx->ptp_data->evt_lock);
+ list_for_each_safe(cursor, next, &efx->ptp_data->evt_list) {
+ list_move(cursor, &efx->ptp_data->evt_free_list);
+ }
+ spin_unlock_bh(&efx->ptp_data->evt_lock);
+
+ return rc;
+}
+
+static int efx_ptp_restart(struct efx_nic *efx)
+{
+ if (efx->ptp_data && efx->ptp_data->enabled)
+ return efx_ptp_start(efx);
+ return 0;
+}
+
+static void efx_ptp_pps_worker(struct work_struct *work)
+{
+ struct efx_ptp_data *ptp =
+ container_of(work, struct efx_ptp_data, pps_work);
+ struct efx_nic *efx = ptp->efx;
+ struct ptp_clock_event ptp_evt;
+
+ if (efx_ptp_synchronize(efx, PTP_SYNC_ATTEMPTS))
+ return;
+
+ ptp_evt.type = PTP_CLOCK_PPSUSR;
+ ptp_evt.pps_times = ptp->host_time_pps;
+ ptp_clock_event(ptp->phc_clock, &ptp_evt);
+}
+
+static void efx_ptp_worker(struct work_struct *work)
+{
+ struct efx_ptp_data *ptp_data =
+ container_of(work, struct efx_ptp_data, work);
+ struct efx_nic *efx = ptp_data->efx;
+ struct sk_buff *skb;
+ struct sk_buff_head tempq;
+
+ if (ptp_data->reset_required) {
+ efx_ptp_stop(efx);
+ efx_ptp_start(efx);
+ return;
+ }
+
+ efx_ptp_drop_time_expired_events(efx);
+
+ __skb_queue_head_init(&tempq);
+ efx_ptp_process_events(efx, &tempq);
+
+ while ((skb = skb_dequeue(&ptp_data->txq)))
+ ptp_data->xmit_skb(efx, skb);
+
+ while ((skb = __skb_dequeue(&tempq)))
+ efx_ptp_process_rx(efx, skb);
+}
+
+static const struct ptp_clock_info efx_phc_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "sfc",
+ .max_adj = MAX_PPB,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 1,
+ .adjfreq = efx_phc_adjfreq,
+ .adjtime = efx_phc_adjtime,
+ .gettime64 = efx_phc_gettime,
+ .settime64 = efx_phc_settime,
+ .enable = efx_phc_enable,
+};
+
+/* Initialise PTP state. */
+int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
+{
+ struct efx_ptp_data *ptp;
+ int rc = 0;
+ unsigned int pos;
+
+ ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL);
+ efx->ptp_data = ptp;
+ if (!efx->ptp_data)
+ return -ENOMEM;
+
+ ptp->efx = efx;
+ ptp->channel = channel;
+ ptp->rx_ts_inline = efx_nic_rev(efx) >= EFX_REV_HUNT_A0;
+
+ rc = efx_nic_alloc_buffer(efx, &ptp->start, sizeof(int), GFP_KERNEL);
+ if (rc != 0)
+ goto fail1;
+
+ skb_queue_head_init(&ptp->rxq);
+ skb_queue_head_init(&ptp->txq);
+ ptp->workwq = create_singlethread_workqueue("sfc_ptp");
+ if (!ptp->workwq) {
+ rc = -ENOMEM;
+ goto fail2;
+ }
+
+ if (efx_ptp_use_mac_tx_timestamps(efx)) {
+ ptp->xmit_skb = efx_ptp_xmit_skb_queue;
+ /* Request sync events on this channel. */
+ channel->sync_events_state = SYNC_EVENTS_QUIESCENT;
+ } else {
+ ptp->xmit_skb = efx_ptp_xmit_skb_mc;
+ }
+
+ INIT_WORK(&ptp->work, efx_ptp_worker);
+ ptp->config.flags = 0;
+ ptp->config.tx_type = HWTSTAMP_TX_OFF;
+ ptp->config.rx_filter = HWTSTAMP_FILTER_NONE;
+ INIT_LIST_HEAD(&ptp->evt_list);
+ INIT_LIST_HEAD(&ptp->evt_free_list);
+ spin_lock_init(&ptp->evt_lock);
+ for (pos = 0; pos < MAX_RECEIVE_EVENTS; pos++)
+ list_add(&ptp->rx_evts[pos].link, &ptp->evt_free_list);
+
+ /* Get the NIC PTP attributes and set up time conversions */
+ rc = efx_ptp_get_attributes(efx);
+ if (rc < 0)
+ goto fail3;
+
+ /* Get the timestamp corrections */
+ rc = efx_ptp_get_timestamp_corrections(efx);
+ if (rc < 0)
+ goto fail3;
+
+ if (efx->mcdi->fn_flags &
+ (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_PRIMARY)) {
+ ptp->phc_clock_info = efx_phc_clock_info;
+ ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info,
+ &efx->pci_dev->dev);
+ if (IS_ERR(ptp->phc_clock)) {
+ rc = PTR_ERR(ptp->phc_clock);
+ goto fail3;
+ } else if (ptp->phc_clock) {
+ INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
+ ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
+ if (!ptp->pps_workwq) {
+ rc = -ENOMEM;
+ goto fail4;
+ }
+ }
+ }
+ ptp->nic_ts_enabled = false;
+
+ return 0;
+fail4:
+ ptp_clock_unregister(efx->ptp_data->phc_clock);
+
+fail3:
+ destroy_workqueue(efx->ptp_data->workwq);
+
+fail2:
+ efx_nic_free_buffer(efx, &ptp->start);
+
+fail1:
+ kfree(efx->ptp_data);
+ efx->ptp_data = NULL;
+
+ return rc;
+}
+
+/* Initialise PTP channel.
+ *
+ * Setting core_index to zero causes the queue to be initialised and doesn't
+ * overlap with 'rxq0' because ptp.c doesn't use skb_record_rx_queue.
+ */
+static int efx_ptp_probe_channel(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ int rc;
+
+ channel->irq_moderation_us = 0;
+ channel->rx_queue.core_index = 0;
+
+ rc = efx_ptp_probe(efx, channel);
+ /* Failure to probe PTP is not fatal; this channel will just not be
+ * used for anything.
+ * In the case of EPERM, efx_ptp_probe will print its own message (in
+ * efx_ptp_get_attributes()), so we don't need to.
+ */
+ if (rc && rc != -EPERM)
+ netif_warn(efx, drv, efx->net_dev,
+ "Failed to probe PTP, rc=%d\n", rc);
+ return 0;
+}
+
+void efx_ptp_remove(struct efx_nic *efx)
+{
+ if (!efx->ptp_data)
+ return;
+
+ (void)efx_ptp_disable(efx);
+
+ cancel_work_sync(&efx->ptp_data->work);
+ if (efx->ptp_data->pps_workwq)
+ cancel_work_sync(&efx->ptp_data->pps_work);
+
+ skb_queue_purge(&efx->ptp_data->rxq);
+ skb_queue_purge(&efx->ptp_data->txq);
+
+ if (efx->ptp_data->phc_clock) {
+ destroy_workqueue(efx->ptp_data->pps_workwq);
+ ptp_clock_unregister(efx->ptp_data->phc_clock);
+ }
+
+ destroy_workqueue(efx->ptp_data->workwq);
+
+ efx_nic_free_buffer(efx, &efx->ptp_data->start);
+ kfree(efx->ptp_data);
+ efx->ptp_data = NULL;
+}
+
+static void efx_ptp_remove_channel(struct efx_channel *channel)
+{
+ efx_ptp_remove(channel->efx);
+}
+
+static void efx_ptp_get_channel_name(struct efx_channel *channel,
+ char *buf, size_t len)
+{
+ snprintf(buf, len, "%s-ptp", channel->efx->name);
+}
+
+/* Determine whether this packet should be processed by the PTP module
+ * or transmitted conventionally.
+ */
+bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb)
+{
+ return efx->ptp_data &&
+ efx->ptp_data->enabled &&
+ skb->len >= PTP_MIN_LENGTH &&
+ skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM &&
+ likely(skb->protocol == htons(ETH_P_IP)) &&
+ skb_transport_header_was_set(skb) &&
+ skb_network_header_len(skb) >= sizeof(struct iphdr) &&
+ ip_hdr(skb)->protocol == IPPROTO_UDP &&
+ skb_headlen(skb) >=
+ skb_transport_offset(skb) + sizeof(struct udphdr) &&
+ udp_hdr(skb)->dest == htons(PTP_EVENT_PORT);
+}
+
+/* Receive a PTP packet. Packets are queued until the arrival of
+ * the receive timestamp from the MC - this will probably occur after the
+ * packet arrival because of the processing in the MC.
+ */
+static bool efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb)
+{
+ struct efx_nic *efx = channel->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb;
+ u8 *match_data_012, *match_data_345;
+ unsigned int version;
+ u8 *data;
+
+ match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS);
+
+ /* Correct version? */
+ if (ptp->mode == MC_CMD_PTP_MODE_V1) {
+ if (!pskb_may_pull(skb, PTP_V1_MIN_LENGTH)) {
+ return false;
+ }
+ data = skb->data;
+ version = ntohs(*(__be16 *)&data[PTP_V1_VERSION_OFFSET]);
+ if (version != PTP_VERSION_V1) {
+ return false;
+ }
+
+ /* PTP V1 uses all six bytes of the UUID to match the packet
+ * to the timestamp
+ */
+ match_data_012 = data + PTP_V1_UUID_OFFSET;
+ match_data_345 = data + PTP_V1_UUID_OFFSET + 3;
+ } else {
+ if (!pskb_may_pull(skb, PTP_V2_MIN_LENGTH)) {
+ return false;
+ }
+ data = skb->data;
+ version = data[PTP_V2_VERSION_OFFSET];
+ if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) {
+ return false;
+ }
+
+ /* The original V2 implementation uses bytes 2-7 of
+ * the UUID to match the packet to the timestamp. This
+ * discards two of the bytes of the MAC address used
+ * to create the UUID (SF bug 33070). The PTP V2
+ * enhanced mode fixes this issue and uses bytes 0-2
+ * and byte 5-7 of the UUID.
+ */
+ match_data_345 = data + PTP_V2_UUID_OFFSET + 5;
+ if (ptp->mode == MC_CMD_PTP_MODE_V2) {
+ match_data_012 = data + PTP_V2_UUID_OFFSET + 2;
+ } else {
+ match_data_012 = data + PTP_V2_UUID_OFFSET + 0;
+ BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2_ENHANCED);
+ }
+ }
+
+ /* Does this packet require timestamping? */
+ if (ntohs(*(__be16 *)&data[PTP_DPORT_OFFSET]) == PTP_EVENT_PORT) {
+ match->state = PTP_PACKET_STATE_UNMATCHED;
+
+ /* We expect the sequence number to be in the same position in
+ * the packet for PTP V1 and V2
+ */
+ BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET);
+ BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH);
+
+ /* Extract UUID/Sequence information */
+ match->words[0] = (match_data_012[0] |
+ (match_data_012[1] << 8) |
+ (match_data_012[2] << 16) |
+ (match_data_345[0] << 24));
+ match->words[1] = (match_data_345[1] |
+ (match_data_345[2] << 8) |
+ (data[PTP_V1_SEQUENCE_OFFSET +
+ PTP_V1_SEQUENCE_LENGTH - 1] <<
+ 16));
+ } else {
+ match->state = PTP_PACKET_STATE_MATCH_UNWANTED;
+ }
+
+ skb_queue_tail(&ptp->rxq, skb);
+ queue_work(ptp->workwq, &ptp->work);
+
+ return true;
+}
+
+/* Transmit a PTP packet. This has to be transmitted by the MC
+ * itself, through an MCDI call. MCDI calls aren't permitted
+ * in the transmit path so defer the actual transmission to a suitable worker.
+ */
+int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+
+ skb_queue_tail(&ptp->txq, skb);
+
+ if ((udp_hdr(skb)->dest == htons(PTP_EVENT_PORT)) &&
+ (skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM))
+ efx_xmit_hwtstamp_pending(skb);
+ queue_work(ptp->workwq, &ptp->work);
+
+ return NETDEV_TX_OK;
+}
+
+int efx_ptp_get_mode(struct efx_nic *efx)
+{
+ return efx->ptp_data->mode;
+}
+
+int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted,
+ unsigned int new_mode)
+{
+ if ((enable_wanted != efx->ptp_data->enabled) ||
+ (enable_wanted && (efx->ptp_data->mode != new_mode))) {
+ int rc = 0;
+
+ if (enable_wanted) {
+ /* Change of mode requires disable */
+ if (efx->ptp_data->enabled &&
+ (efx->ptp_data->mode != new_mode)) {
+ efx->ptp_data->enabled = false;
+ rc = efx_ptp_stop(efx);
+ if (rc != 0)
+ return rc;
+ }
+
+ /* Set new operating mode and establish
+ * baseline synchronisation, which must
+ * succeed.
+ */
+ efx->ptp_data->mode = new_mode;
+ if (netif_running(efx->net_dev))
+ rc = efx_ptp_start(efx);
+ if (rc == 0) {
+ rc = efx_ptp_synchronize(efx,
+ PTP_SYNC_ATTEMPTS * 2);
+ if (rc != 0)
+ efx_ptp_stop(efx);
+ }
+ } else {
+ rc = efx_ptp_stop(efx);
+ }
+
+ if (rc != 0)
+ return rc;
+
+ efx->ptp_data->enabled = enable_wanted;
+ }
+
+ return 0;
+}
+
+static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init)
+{
+ int rc;
+
+ if ((init->tx_type != HWTSTAMP_TX_OFF) &&
+ (init->tx_type != HWTSTAMP_TX_ON))
+ return -ERANGE;
+
+ rc = efx->type->ptp_set_ts_config(efx, init);
+ if (rc)
+ return rc;
+
+ efx->ptp_data->config = *init;
+ return 0;
+}
+
+void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ struct efx_nic *primary = efx->primary;
+
+ ASSERT_RTNL();
+
+ if (!ptp)
+ return;
+
+ ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE);
+ /* Check licensed features. If we don't have the license for TX
+ * timestamps, the NIC will not support them.
+ */
+ if (efx_ptp_use_mac_tx_timestamps(efx)) {
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+ if (!(nic_data->licensed_features &
+ (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN)))
+ ts_info->so_timestamping &=
+ ~SOF_TIMESTAMPING_TX_HARDWARE;
+ }
+ if (primary && primary->ptp_data && primary->ptp_data->phc_clock)
+ ts_info->phc_index =
+ ptp_clock_index(primary->ptp_data->phc_clock);
+ ts_info->tx_types = 1 << HWTSTAMP_TX_OFF | 1 << HWTSTAMP_TX_ON;
+ ts_info->rx_filters = ptp->efx->type->hwtstamp_filters;
+}
+
+int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int rc;
+
+ /* Not a PTP enabled port */
+ if (!efx->ptp_data)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ rc = efx_ptp_ts_init(efx, &config);
+ if (rc != 0)
+ return rc;
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config))
+ ? -EFAULT : 0;
+}
+
+int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr)
+{
+ if (!efx->ptp_data)
+ return -EOPNOTSUPP;
+
+ return copy_to_user(ifr->ifr_data, &efx->ptp_data->config,
+ sizeof(efx->ptp_data->config)) ? -EFAULT : 0;
+}
+
+static void ptp_event_failure(struct efx_nic *efx, int expected_frag_len)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+
+ netif_err(efx, hw, efx->net_dev,
+ "PTP unexpected event length: got %d expected %d\n",
+ ptp->evt_frag_idx, expected_frag_len);
+ ptp->reset_required = true;
+ queue_work(ptp->workwq, &ptp->work);
+}
+
+/* Process a completed receive event. Put it on the event queue and
+ * start worker thread. This is required because event and their
+ * correspoding packets may come in either order.
+ */
+static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp)
+{
+ struct efx_ptp_event_rx *evt = NULL;
+
+ if (WARN_ON_ONCE(ptp->rx_ts_inline))
+ return;
+
+ if (ptp->evt_frag_idx != 3) {
+ ptp_event_failure(efx, 3);
+ return;
+ }
+
+ spin_lock_bh(&ptp->evt_lock);
+ if (!list_empty(&ptp->evt_free_list)) {
+ evt = list_first_entry(&ptp->evt_free_list,
+ struct efx_ptp_event_rx, link);
+ list_del(&evt->link);
+
+ evt->seq0 = EFX_QWORD_FIELD(ptp->evt_frags[2], MCDI_EVENT_DATA);
+ evt->seq1 = (EFX_QWORD_FIELD(ptp->evt_frags[2],
+ MCDI_EVENT_SRC) |
+ (EFX_QWORD_FIELD(ptp->evt_frags[1],
+ MCDI_EVENT_SRC) << 8) |
+ (EFX_QWORD_FIELD(ptp->evt_frags[0],
+ MCDI_EVENT_SRC) << 16));
+ evt->hwtimestamp = efx->ptp_data->nic_to_kernel_time(
+ EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA),
+ EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA),
+ ptp->ts_corrections.ptp_rx);
+ evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS);
+ list_add_tail(&evt->link, &ptp->evt_list);
+
+ queue_work(ptp->workwq, &ptp->work);
+ } else if (net_ratelimit()) {
+ /* Log a rate-limited warning message. */
+ netif_err(efx, rx_err, efx->net_dev, "PTP event queue overflow\n");
+ }
+ spin_unlock_bh(&ptp->evt_lock);
+}
+
+static void ptp_event_fault(struct efx_nic *efx, struct efx_ptp_data *ptp)
+{
+ int code = EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA);
+ if (ptp->evt_frag_idx != 1) {
+ ptp_event_failure(efx, 1);
+ return;
+ }
+
+ netif_err(efx, hw, efx->net_dev, "PTP error %d\n", code);
+}
+
+static void ptp_event_pps(struct efx_nic *efx, struct efx_ptp_data *ptp)
+{
+ if (ptp->nic_ts_enabled)
+ queue_work(ptp->pps_workwq, &ptp->pps_work);
+}
+
+void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev)
+{
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE);
+
+ if (!ptp) {
+ if (!efx->ptp_warned) {
+ netif_warn(efx, drv, efx->net_dev,
+ "Received PTP event but PTP not set up\n");
+ efx->ptp_warned = true;
+ }
+ return;
+ }
+
+ if (!ptp->enabled)
+ return;
+
+ if (ptp->evt_frag_idx == 0) {
+ ptp->evt_code = code;
+ } else if (ptp->evt_code != code) {
+ netif_err(efx, hw, efx->net_dev,
+ "PTP out of sequence event %d\n", code);
+ ptp->evt_frag_idx = 0;
+ }
+
+ ptp->evt_frags[ptp->evt_frag_idx++] = *ev;
+ if (!MCDI_EVENT_FIELD(*ev, CONT)) {
+ /* Process resulting event */
+ switch (code) {
+ case MCDI_EVENT_CODE_PTP_RX:
+ ptp_event_rx(efx, ptp);
+ break;
+ case MCDI_EVENT_CODE_PTP_FAULT:
+ ptp_event_fault(efx, ptp);
+ break;
+ case MCDI_EVENT_CODE_PTP_PPS:
+ ptp_event_pps(efx, ptp);
+ break;
+ default:
+ netif_err(efx, hw, efx->net_dev,
+ "PTP unknown event %d\n", code);
+ break;
+ }
+ ptp->evt_frag_idx = 0;
+ } else if (MAX_EVENT_FRAGS == ptp->evt_frag_idx) {
+ netif_err(efx, hw, efx->net_dev,
+ "PTP too many event fragments\n");
+ ptp->evt_frag_idx = 0;
+ }
+}
+
+void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev)
+{
+ struct efx_nic *efx = channel->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+
+ /* When extracting the sync timestamp minor value, we should discard
+ * the least significant two bits. These are not required in order
+ * to reconstruct full-range timestamps and they are optionally used
+ * to report status depending on the options supplied when subscribing
+ * for sync events.
+ */
+ channel->sync_timestamp_major = MCDI_EVENT_FIELD(*ev, PTP_TIME_MAJOR);
+ channel->sync_timestamp_minor =
+ (MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_MS_8BITS) & 0xFC)
+ << ptp->nic_time.sync_event_minor_shift;
+
+ /* if sync events have been disabled then we want to silently ignore
+ * this event, so throw away result.
+ */
+ (void) cmpxchg(&channel->sync_events_state, SYNC_EVENTS_REQUESTED,
+ SYNC_EVENTS_VALID);
+}
+
+static inline u32 efx_rx_buf_timestamp_minor(struct efx_nic *efx, const u8 *eh)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_ts_offset));
+#else
+ const u8 *data = eh + efx->rx_packet_ts_offset;
+ return (u32)data[0] |
+ (u32)data[1] << 8 |
+ (u32)data[2] << 16 |
+ (u32)data[3] << 24;
+#endif
+}
+
+void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
+ struct sk_buff *skb)
+{
+ struct efx_nic *efx = channel->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ u32 pkt_timestamp_major, pkt_timestamp_minor;
+ u32 diff, carry;
+ struct skb_shared_hwtstamps *timestamps;
+
+ if (channel->sync_events_state != SYNC_EVENTS_VALID)
+ return;
+
+ pkt_timestamp_minor = efx_rx_buf_timestamp_minor(efx, skb_mac_header(skb));
+
+ /* get the difference between the packet and sync timestamps,
+ * modulo one second
+ */
+ diff = pkt_timestamp_minor - channel->sync_timestamp_minor;
+ if (pkt_timestamp_minor < channel->sync_timestamp_minor)
+ diff += ptp->nic_time.minor_max;
+
+ /* do we roll over a second boundary and need to carry the one? */
+ carry = (channel->sync_timestamp_minor >= ptp->nic_time.minor_max - diff) ?
+ 1 : 0;
+
+ if (diff <= ptp->nic_time.sync_event_diff_max) {
+ /* packet is ahead of the sync event by a quarter of a second or
+ * less (allowing for fuzz)
+ */
+ pkt_timestamp_major = channel->sync_timestamp_major + carry;
+ } else if (diff >= ptp->nic_time.sync_event_diff_min) {
+ /* packet is behind the sync event but within the fuzz factor.
+ * This means the RX packet and sync event crossed as they were
+ * placed on the event queue, which can sometimes happen.
+ */
+ pkt_timestamp_major = channel->sync_timestamp_major - 1 + carry;
+ } else {
+ /* it's outside tolerance in both directions. this might be
+ * indicative of us missing sync events for some reason, so
+ * we'll call it an error rather than risk giving a bogus
+ * timestamp.
+ */
+ netif_vdbg(efx, drv, efx->net_dev,
+ "packet timestamp %x too far from sync event %x:%x\n",
+ pkt_timestamp_minor, channel->sync_timestamp_major,
+ channel->sync_timestamp_minor);
+ return;
+ }
+
+ /* attach the timestamps to the skb */
+ timestamps = skb_hwtstamps(skb);
+ timestamps->hwtstamp =
+ ptp->nic_to_kernel_time(pkt_timestamp_major,
+ pkt_timestamp_minor,
+ ptp->ts_corrections.general_rx);
+}
+
+static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+ struct efx_ptp_data *ptp_data = container_of(ptp,
+ struct efx_ptp_data,
+ phc_clock_info);
+ struct efx_nic *efx = ptp_data->efx;
+ MCDI_DECLARE_BUF(inadj, MC_CMD_PTP_IN_ADJUST_LEN);
+ s64 adjustment_ns;
+ int rc;
+
+ if (delta > MAX_PPB)
+ delta = MAX_PPB;
+ else if (delta < -MAX_PPB)
+ delta = -MAX_PPB;
+
+ /* Convert ppb to fixed point ns taking care to round correctly. */
+ adjustment_ns = ((s64)delta * PPB_SCALE_WORD +
+ (1 << (ptp_data->adjfreq_ppb_shift - 1))) >>
+ ptp_data->adjfreq_ppb_shift;
+
+ MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST);
+ MCDI_SET_DWORD(inadj, PTP_IN_PERIPH_ID, 0);
+ MCDI_SET_QWORD(inadj, PTP_IN_ADJUST_FREQ, adjustment_ns);
+ MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_SECONDS, 0);
+ MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_NANOSECONDS, 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inadj, sizeof(inadj),
+ NULL, 0, NULL);
+ if (rc != 0)
+ return rc;
+
+ ptp_data->current_adjfreq = adjustment_ns;
+ return 0;
+}
+
+static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ u32 nic_major, nic_minor;
+ struct efx_ptp_data *ptp_data = container_of(ptp,
+ struct efx_ptp_data,
+ phc_clock_info);
+ struct efx_nic *efx = ptp_data->efx;
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_ADJUST_LEN);
+
+ efx->ptp_data->ns_to_nic_time(delta, &nic_major, &nic_minor);
+
+ MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST);
+ MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
+ MCDI_SET_QWORD(inbuf, PTP_IN_ADJUST_FREQ, ptp_data->current_adjfreq);
+ MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_MAJOR, nic_major);
+ MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_MINOR, nic_minor);
+ return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+}
+
+static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+ struct efx_ptp_data *ptp_data = container_of(ptp,
+ struct efx_ptp_data,
+ phc_clock_info);
+ struct efx_nic *efx = ptp_data->efx;
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_READ_NIC_TIME_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_READ_NIC_TIME_LEN);
+ int rc;
+ ktime_t kt;
+
+ MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_READ_NIC_TIME);
+ MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), NULL);
+ if (rc != 0)
+ return rc;
+
+ kt = ptp_data->nic_to_kernel_time(
+ MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_MAJOR),
+ MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_MINOR), 0);
+ *ts = ktime_to_timespec64(kt);
+ return 0;
+}
+
+static int efx_phc_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *e_ts)
+{
+ /* Get the current NIC time, efx_phc_gettime.
+ * Subtract from the desired time to get the offset
+ * call efx_phc_adjtime with the offset
+ */
+ int rc;
+ struct timespec64 time_now;
+ struct timespec64 delta;
+
+ rc = efx_phc_gettime(ptp, &time_now);
+ if (rc != 0)
+ return rc;
+
+ delta = timespec64_sub(*e_ts, time_now);
+
+ rc = efx_phc_adjtime(ptp, timespec64_to_ns(&delta));
+ if (rc != 0)
+ return rc;
+
+ return 0;
+}
+
+static int efx_phc_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *request,
+ int enable)
+{
+ struct efx_ptp_data *ptp_data = container_of(ptp,
+ struct efx_ptp_data,
+ phc_clock_info);
+ if (request->type != PTP_CLK_REQ_PPS)
+ return -EOPNOTSUPP;
+
+ ptp_data->nic_ts_enabled = !!enable;
+ return 0;
+}
+
+static const struct efx_channel_type efx_ptp_channel_type = {
+ .handle_no_channel = efx_ptp_handle_no_channel,
+ .pre_probe = efx_ptp_probe_channel,
+ .post_remove = efx_ptp_remove_channel,
+ .get_name = efx_ptp_get_channel_name,
+ /* no copy operation; there is no need to reallocate this channel */
+ .receive_skb = efx_ptp_rx,
+ .want_txqs = efx_ptp_want_txqs,
+ .keep_eventq = false,
+};
+
+void efx_ptp_defer_probe_with_channel(struct efx_nic *efx)
+{
+ /* Check whether PTP is implemented on this NIC. The DISABLE
+ * operation will succeed if and only if it is implemented.
+ */
+ if (efx_ptp_disable(efx) == 0)
+ efx->extra_channel_type[EFX_EXTRA_CHANNEL_PTP] =
+ &efx_ptp_channel_type;
+}
+
+void efx_ptp_start_datapath(struct efx_nic *efx)
+{
+ if (efx_ptp_restart(efx))
+ netif_err(efx, drv, efx->net_dev, "Failed to restart PTP.\n");
+ /* re-enable timestamping if it was previously enabled */
+ if (efx->type->ptp_set_ts_sync_events)
+ efx->type->ptp_set_ts_sync_events(efx, true, true);
+}
+
+void efx_ptp_stop_datapath(struct efx_nic *efx)
+{
+ /* temporarily disable timestamping */
+ if (efx->type->ptp_set_ts_sync_events)
+ efx->type->ptp_set_ts_sync_events(efx, false, true);
+ efx_ptp_stop(efx);
+}
diff --git a/drivers/net/ethernet/sfc/siena/ptp.h b/drivers/net/ethernet/sfc/siena/ptp.h
new file mode 100644
index 000000000000..9855e8c9e544
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/ptp.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ * Copyright 2019-2020 Xilinx Inc.
+ */
+
+#ifndef EFX_PTP_H
+#define EFX_PTP_H
+
+#include <linux/net_tstamp.h>
+#include "net_driver.h"
+
+struct ethtool_ts_info;
+int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
+void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
+struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
+void efx_ptp_remove(struct efx_nic *efx);
+int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
+int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
+void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info);
+bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
+int efx_ptp_get_mode(struct efx_nic *efx);
+int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted,
+ unsigned int new_mode);
+int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
+void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev);
+size_t efx_ptp_describe_stats(struct efx_nic *efx, u8 *strings);
+size_t efx_ptp_update_stats(struct efx_nic *efx, u64 *stats);
+void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev);
+void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
+ struct sk_buff *skb);
+static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel,
+ struct sk_buff *skb)
+{
+ if (channel->sync_events_state == SYNC_EVENTS_VALID)
+ __efx_rx_skb_attach_timestamp(channel, skb);
+}
+void efx_ptp_start_datapath(struct efx_nic *efx);
+void efx_ptp_stop_datapath(struct efx_nic *efx);
+bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx);
+ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue);
+
+#endif /* EFX_PTP_H */
diff --git a/drivers/net/ethernet/sfc/siena/rx.c b/drivers/net/ethernet/sfc/siena/rx.c
new file mode 100644
index 000000000000..2375cef577e4
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/rx.c
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/prefetch.h>
+#include <linux/moduleparam.h>
+#include <linux/iommu.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/xdp.h>
+#include <linux/bpf_trace.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "rx_common.h"
+#include "filter.h"
+#include "nic.h"
+#include "selftest.h"
+#include "workarounds.h"
+
+/* Preferred number of descriptors to fill at once */
+#define EFX_RX_PREFERRED_BATCH 8U
+
+/* Maximum rx prefix used by any architecture. */
+#define EFX_MAX_RX_PREFIX_SIZE 16
+
+/* Size of buffer allocated for skb header area. */
+#define EFX_SKB_HEADERS 128u
+
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
+ EFX_RX_USR_BUF_SIZE)
+
+static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ int len)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
+
+ if (likely(len <= max_len))
+ return;
+
+ /* The packet must be discarded, but this is only a fatal error
+ * if the caller indicated it was
+ */
+ rx_buf->flags |= EFX_RX_PKT_DISCARD;
+
+ if (net_ratelimit())
+ netif_err(efx, rx_err, efx->net_dev,
+ "RX queue %d overlength RX event (%#x > %#x)\n",
+ efx_rx_queue_index(rx_queue), len, max_len);
+
+ efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
+}
+
+/* Allocate and construct an SKB around page fragments */
+static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags,
+ u8 *eh, int hdr_len)
+{
+ struct efx_nic *efx = channel->efx;
+ struct sk_buff *skb;
+
+ /* Allocate an SKB to store the headers */
+ skb = netdev_alloc_skb(efx->net_dev,
+ efx->rx_ip_align + efx->rx_prefix_size +
+ hdr_len);
+ if (unlikely(skb == NULL)) {
+ atomic_inc(&efx->n_rx_noskb_drops);
+ return NULL;
+ }
+
+ EFX_WARN_ON_ONCE_PARANOID(rx_buf->len < hdr_len);
+
+ memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size,
+ efx->rx_prefix_size + hdr_len);
+ skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size);
+ __skb_put(skb, hdr_len);
+
+ /* Append the remaining page(s) onto the frag list */
+ if (rx_buf->len > hdr_len) {
+ rx_buf->page_offset += hdr_len;
+ rx_buf->len -= hdr_len;
+
+ for (;;) {
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ rx_buf->page, rx_buf->page_offset,
+ rx_buf->len, efx->rx_buffer_truesize);
+ rx_buf->page = NULL;
+
+ if (skb_shinfo(skb)->nr_frags == n_frags)
+ break;
+
+ rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+ }
+ } else {
+ __free_pages(rx_buf->page, efx->rx_buffer_order);
+ rx_buf->page = NULL;
+ n_frags = 0;
+ }
+
+ /* Move past the ethernet header */
+ skb->protocol = eth_type_trans(skb, efx->net_dev);
+
+ skb_mark_napi_id(skb, &channel->napi_str);
+
+ return skb;
+}
+
+void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
+ unsigned int n_frags, unsigned int len, u16 flags)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
+ struct efx_rx_buffer *rx_buf;
+
+ rx_queue->rx_packets++;
+
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ rx_buf->flags |= flags;
+
+ /* Validate the number of fragments and completed length */
+ if (n_frags == 1) {
+ if (!(flags & EFX_RX_PKT_PREFIX_LEN))
+ efx_rx_packet__check_len(rx_queue, rx_buf, len);
+ } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) ||
+ unlikely(len <= (n_frags - 1) * efx->rx_dma_len) ||
+ unlikely(len > n_frags * efx->rx_dma_len) ||
+ unlikely(!efx->rx_scatter)) {
+ /* If this isn't an explicit discard request, either
+ * the hardware or the driver is broken.
+ */
+ WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD));
+ rx_buf->flags |= EFX_RX_PKT_DISCARD;
+ }
+
+ netif_vdbg(efx, rx_status, efx->net_dev,
+ "RX queue %d received ids %x-%x len %d %s%s\n",
+ efx_rx_queue_index(rx_queue), index,
+ (index + n_frags - 1) & rx_queue->ptr_mask, len,
+ (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
+ (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "");
+
+ /* Discard packet, if instructed to do so. Process the
+ * previous receive first.
+ */
+ if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
+ efx_rx_flush_packet(channel);
+ efx_discard_rx_packet(channel, rx_buf, n_frags);
+ return;
+ }
+
+ if (n_frags == 1 && !(flags & EFX_RX_PKT_PREFIX_LEN))
+ rx_buf->len = len;
+
+ /* Release and/or sync the DMA mapping - assumes all RX buffers
+ * consumed in-order per RX queue.
+ */
+ efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
+
+ /* Prefetch nice and early so data will (hopefully) be in cache by
+ * the time we look at it.
+ */
+ prefetch(efx_rx_buf_va(rx_buf));
+
+ rx_buf->page_offset += efx->rx_prefix_size;
+ rx_buf->len -= efx->rx_prefix_size;
+
+ if (n_frags > 1) {
+ /* Release/sync DMA mapping for additional fragments.
+ * Fix length for last fragment.
+ */
+ unsigned int tail_frags = n_frags - 1;
+
+ for (;;) {
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ if (--tail_frags == 0)
+ break;
+ efx_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len);
+ }
+ rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len;
+ efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
+ }
+
+ /* All fragments have been DMA-synced, so recycle pages. */
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ efx_recycle_rx_pages(channel, rx_buf, n_frags);
+
+ /* Pipeline receives so that we give time for packet headers to be
+ * prefetched into cache.
+ */
+ efx_rx_flush_packet(channel);
+ channel->rx_pkt_n_frags = n_frags;
+ channel->rx_pkt_index = index;
+}
+
+static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct sk_buff *skb;
+ u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
+
+ skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
+ if (unlikely(skb == NULL)) {
+ struct efx_rx_queue *rx_queue;
+
+ rx_queue = efx_channel_get_rx_queue(channel);
+ efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+ return;
+ }
+ skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+ /* Set the SKB flags */
+ skb_checksum_none_assert(skb);
+ if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
+ }
+
+ efx_rx_skb_attach_timestamp(channel, skb);
+
+ if (channel->type->receive_skb)
+ if (channel->type->receive_skb(channel, skb))
+ return;
+
+ /* Pass the packet up */
+ if (channel->rx_list != NULL)
+ /* Add to list, will pass up later */
+ list_add_tail(&skb->list, channel->rx_list);
+ else
+ /* No list, so pass it up now */
+ netif_receive_skb(skb);
+}
+
+/** efx_do_xdp: perform XDP processing on a received packet
+ *
+ * Returns true if packet should still be delivered.
+ */
+static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf, u8 **ehp)
+{
+ u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE];
+ struct efx_rx_queue *rx_queue;
+ struct bpf_prog *xdp_prog;
+ struct xdp_frame *xdpf;
+ struct xdp_buff xdp;
+ u32 xdp_act;
+ s16 offset;
+ int err;
+
+ xdp_prog = rcu_dereference_bh(efx->xdp_prog);
+ if (!xdp_prog)
+ return true;
+
+ rx_queue = efx_channel_get_rx_queue(channel);
+
+ if (unlikely(channel->rx_pkt_n_frags > 1)) {
+ /* We can't do XDP on fragmented packets - drop. */
+ efx_free_rx_buffers(rx_queue, rx_buf,
+ channel->rx_pkt_n_frags);
+ if (net_ratelimit())
+ netif_err(efx, rx_err, efx->net_dev,
+ "XDP is not possible with multiple receive fragments (%d)\n",
+ channel->rx_pkt_n_frags);
+ channel->n_rx_xdp_bad_drops++;
+ return false;
+ }
+
+ dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr,
+ rx_buf->len, DMA_FROM_DEVICE);
+
+ /* Save the rx prefix. */
+ EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE);
+ memcpy(rx_prefix, *ehp - efx->rx_prefix_size,
+ efx->rx_prefix_size);
+
+ xdp_init_buff(&xdp, efx->rx_page_buf_step, &rx_queue->xdp_rxq_info);
+ /* No support yet for XDP metadata */
+ xdp_prepare_buff(&xdp, *ehp - EFX_XDP_HEADROOM, EFX_XDP_HEADROOM,
+ rx_buf->len, false);
+
+ xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ offset = (u8 *)xdp.data - *ehp;
+
+ switch (xdp_act) {
+ case XDP_PASS:
+ /* Fix up rx prefix. */
+ if (offset) {
+ *ehp += offset;
+ rx_buf->page_offset += offset;
+ rx_buf->len -= offset;
+ memcpy(*ehp - efx->rx_prefix_size, rx_prefix,
+ efx->rx_prefix_size);
+ }
+ break;
+
+ case XDP_TX:
+ /* Buffer ownership passes to tx on success. */
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
+ if (unlikely(err != 1)) {
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ if (net_ratelimit())
+ netif_err(efx, rx_err, efx->net_dev,
+ "XDP TX failed (%d)\n", err);
+ channel->n_rx_xdp_bad_drops++;
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
+ } else {
+ channel->n_rx_xdp_tx++;
+ }
+ break;
+
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog);
+ if (unlikely(err)) {
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ if (net_ratelimit())
+ netif_err(efx, rx_err, efx->net_dev,
+ "XDP redirect failed (%d)\n", err);
+ channel->n_rx_xdp_bad_drops++;
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
+ } else {
+ channel->n_rx_xdp_redirect++;
+ }
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(efx->net_dev, xdp_prog, xdp_act);
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ channel->n_rx_xdp_bad_drops++;
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
+ break;
+
+ case XDP_ABORTED:
+ trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
+ fallthrough;
+ case XDP_DROP:
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ channel->n_rx_xdp_drops++;
+ break;
+ }
+
+ return xdp_act == XDP_PASS;
+}
+
+/* Handle a received packet. Second half: Touches packet payload. */
+void __efx_rx_packet(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ struct efx_rx_buffer *rx_buf =
+ efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
+ u8 *eh = efx_rx_buf_va(rx_buf);
+
+ /* Read length from the prefix if necessary. This already
+ * excludes the length of the prefix itself.
+ */
+ if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN)
+ rx_buf->len = le16_to_cpup((__le16 *)
+ (eh + efx->rx_packet_len_offset));
+
+ /* If we're in loopback test, then pass the packet directly to the
+ * loopback layer, and free the rx_buf here
+ */
+ if (unlikely(efx->loopback_selftest)) {
+ struct efx_rx_queue *rx_queue;
+
+ efx_loopback_rx_packet(efx, eh, rx_buf->len);
+ rx_queue = efx_channel_get_rx_queue(channel);
+ efx_free_rx_buffers(rx_queue, rx_buf,
+ channel->rx_pkt_n_frags);
+ goto out;
+ }
+
+ if (!efx_do_xdp(efx, channel, rx_buf, &eh))
+ goto out;
+
+ if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
+ rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
+
+ if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
+ efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh, 0);
+ else
+ efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
+out:
+ channel->rx_pkt_n_frags = 0;
+}
diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c
new file mode 100644
index 000000000000..fa8b9aacca11
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/rx_common.c
@@ -0,0 +1,1086 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include <linux/iommu.h>
+#include "efx.h"
+#include "nic.h"
+#include "rx_common.h"
+
+/* This is the percentage fill level below which new RX descriptors
+ * will be added to the RX descriptor ring.
+ */
+static unsigned int rx_refill_threshold;
+module_param(rx_refill_threshold, uint, 0444);
+MODULE_PARM_DESC(rx_refill_threshold,
+ "RX descriptor ring refill threshold (%)");
+
+/* RX maximum head room required.
+ *
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
+ */
+#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
+
+/* Check the RX page recycle ring for a page that can be reused. */
+static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_rx_page_state *state;
+ unsigned int index;
+ struct page *page;
+
+ if (unlikely(!rx_queue->page_ring))
+ return NULL;
+ index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+ page = rx_queue->page_ring[index];
+ if (page == NULL)
+ return NULL;
+
+ rx_queue->page_ring[index] = NULL;
+ /* page_remove cannot exceed page_add. */
+ if (rx_queue->page_remove != rx_queue->page_add)
+ ++rx_queue->page_remove;
+
+ /* If page_count is 1 then we hold the only reference to this page. */
+ if (page_count(page) == 1) {
+ ++rx_queue->page_recycle_count;
+ return page;
+ } else {
+ state = page_address(page);
+ dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ put_page(page);
+ ++rx_queue->page_recycle_failed;
+ }
+
+ return NULL;
+}
+
+/* Attempt to recycle the page if there is an RX recycle ring; the page can
+ * only be added if this is the final RX buffer, to prevent pages being used in
+ * the descriptor ring and appearing in the recycle ring simultaneously.
+ */
+static void efx_recycle_rx_page(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+ struct efx_nic *efx = rx_queue->efx;
+ struct page *page = rx_buf->page;
+ unsigned int index;
+
+ /* Only recycle the page after processing the final buffer. */
+ if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
+ return;
+
+ index = rx_queue->page_add & rx_queue->page_ptr_mask;
+ if (rx_queue->page_ring[index] == NULL) {
+ unsigned int read_index = rx_queue->page_remove &
+ rx_queue->page_ptr_mask;
+
+ /* The next slot in the recycle ring is available, but
+ * increment page_remove if the read pointer currently
+ * points here.
+ */
+ if (read_index == index)
+ ++rx_queue->page_remove;
+ rx_queue->page_ring[index] = page;
+ ++rx_queue->page_add;
+ return;
+ }
+ ++rx_queue->page_recycle_full;
+ efx_unmap_rx_buffer(efx, rx_buf);
+ put_page(rx_buf->page);
+}
+
+/* Recycle the pages that are used by buffers that have just been received. */
+void efx_recycle_rx_pages(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+ if (unlikely(!rx_queue->page_ring))
+ return;
+
+ do {
+ efx_recycle_rx_page(channel, rx_buf);
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--n_frags);
+}
+
+void efx_discard_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+ efx_recycle_rx_pages(channel, rx_buf, n_frags);
+
+ efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+}
+
+static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+ unsigned int bufs_in_recycle_ring, page_ring_size;
+ struct efx_nic *efx = rx_queue->efx;
+
+ bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx);
+ page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
+ efx->rx_bufs_per_page);
+ rx_queue->page_ring = kcalloc(page_ring_size,
+ sizeof(*rx_queue->page_ring), GFP_KERNEL);
+ if (!rx_queue->page_ring)
+ rx_queue->page_ptr_mask = 0;
+ else
+ rx_queue->page_ptr_mask = page_ring_size - 1;
+}
+
+static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ int i;
+
+ if (unlikely(!rx_queue->page_ring))
+ return;
+
+ /* Unmap and release the pages in the recycle ring. Remove the ring. */
+ for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+ struct page *page = rx_queue->page_ring[i];
+ struct efx_rx_page_state *state;
+
+ if (page == NULL)
+ continue;
+
+ state = page_address(page);
+ dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ put_page(page);
+ }
+ kfree(rx_queue->page_ring);
+ rx_queue->page_ring = NULL;
+}
+
+static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf)
+{
+ /* Release the page reference we hold for the buffer. */
+ if (rx_buf->page)
+ put_page(rx_buf->page);
+
+ /* If this is the last buffer in a page, unmap and free it. */
+ if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
+ efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ }
+ rx_buf->page = NULL;
+}
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int entries;
+ int rc;
+
+ /* Create the smallest power-of-two aligned ring */
+ entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+ rx_queue->ptr_mask = entries - 1;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "creating RX queue %d size %#x mask %#x\n",
+ efx_rx_queue_index(rx_queue), efx->rxq_entries,
+ rx_queue->ptr_mask);
+
+ /* Allocate RX buffers */
+ rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
+ GFP_KERNEL);
+ if (!rx_queue->buffer)
+ return -ENOMEM;
+
+ rc = efx_nic_probe_rx(rx_queue);
+ if (rc) {
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+ }
+
+ return rc;
+}
+
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ unsigned int max_fill, trigger, max_trigger;
+ struct efx_nic *efx = rx_queue->efx;
+ int rc = 0;
+
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ /* Initialise ptr fields */
+ rx_queue->added_count = 0;
+ rx_queue->notified_count = 0;
+ rx_queue->removed_count = 0;
+ rx_queue->min_fill = -1U;
+ efx_init_rx_recycle_ring(rx_queue);
+
+ rx_queue->page_remove = 0;
+ rx_queue->page_add = rx_queue->page_ptr_mask + 1;
+ rx_queue->page_recycle_count = 0;
+ rx_queue->page_recycle_failed = 0;
+ rx_queue->page_recycle_full = 0;
+
+ /* Initialise limit fields */
+ max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
+ max_trigger =
+ max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+ if (rx_refill_threshold != 0) {
+ trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
+ if (trigger > max_trigger)
+ trigger = max_trigger;
+ } else {
+ trigger = max_trigger;
+ }
+
+ rx_queue->max_fill = max_fill;
+ rx_queue->fast_fill_trigger = trigger;
+ rx_queue->refill_enabled = true;
+
+ /* Initialise XDP queue information */
+ rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
+ rx_queue->core_index, 0);
+
+ if (rc) {
+ netif_err(efx, rx_err, efx->net_dev,
+ "Failure to initialise XDP queue information rc=%d\n",
+ rc);
+ efx->xdp_rxq_info_failed = true;
+ } else {
+ rx_queue->xdp_rxq_info_valid = true;
+ }
+
+ /* Set up RX descriptor ring */
+ efx_nic_init_rx(rx_queue);
+}
+
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_rx_buffer *rx_buf;
+ int i;
+
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ del_timer_sync(&rx_queue->slow_fill);
+
+ /* Release RX buffers from the current read ptr to the write ptr */
+ if (rx_queue->buffer) {
+ for (i = rx_queue->removed_count; i < rx_queue->added_count;
+ i++) {
+ unsigned int index = i & rx_queue->ptr_mask;
+
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ efx_fini_rx_buffer(rx_queue, rx_buf);
+ }
+ }
+
+ efx_fini_rx_recycle_ring(rx_queue);
+
+ if (rx_queue->xdp_rxq_info_valid)
+ xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
+
+ rx_queue->xdp_rxq_info_valid = false;
+}
+
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ efx_nic_remove_rx(rx_queue);
+
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+}
+
+/* Unmap a DMA-mapped page. This function is only called for the final RX
+ * buffer in a page.
+ */
+void efx_unmap_rx_buffer(struct efx_nic *efx,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct page *page = rx_buf->page;
+
+ if (page) {
+ struct efx_rx_page_state *state = page_address(page);
+
+ dma_unmap_page(&efx->pci_dev->dev,
+ state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ }
+}
+
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int num_bufs)
+{
+ do {
+ if (rx_buf->page) {
+ put_page(rx_buf->page);
+ rx_buf->page = NULL;
+ }
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--num_bufs);
+}
+
+void efx_rx_slow_fill(struct timer_list *t)
+{
+ struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
+
+ /* Post an event to cause NAPI to run and refill the queue */
+ efx_nic_generate_fill_event(rx_queue);
+ ++rx_queue->slow_fill_count;
+}
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
+{
+ mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
+}
+
+/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
+ *
+ * @rx_queue: Efx RX queue
+ *
+ * This allocates a batch of pages, maps them for DMA, and populates
+ * struct efx_rx_buffers for each one. Return a negative error code or
+ * 0 on success. If a single page can be used for multiple buffers,
+ * then the page will either be inserted fully, or not at all.
+ */
+static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
+{
+ unsigned int page_offset, index, count;
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_rx_page_state *state;
+ struct efx_rx_buffer *rx_buf;
+ dma_addr_t dma_addr;
+ struct page *page;
+
+ count = 0;
+ do {
+ page = efx_reuse_page(rx_queue);
+ if (page == NULL) {
+ page = alloc_pages(__GFP_COMP |
+ (atomic ? GFP_ATOMIC : GFP_KERNEL),
+ efx->rx_buffer_order);
+ if (unlikely(page == NULL))
+ return -ENOMEM;
+ dma_addr =
+ dma_map_page(&efx->pci_dev->dev, page, 0,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
+ dma_addr))) {
+ __free_pages(page, efx->rx_buffer_order);
+ return -EIO;
+ }
+ state = page_address(page);
+ state->dma_addr = dma_addr;
+ } else {
+ state = page_address(page);
+ dma_addr = state->dma_addr;
+ }
+
+ dma_addr += sizeof(struct efx_rx_page_state);
+ page_offset = sizeof(struct efx_rx_page_state);
+
+ do {
+ index = rx_queue->added_count & rx_queue->ptr_mask;
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
+ EFX_XDP_HEADROOM;
+ rx_buf->page = page;
+ rx_buf->page_offset = page_offset + efx->rx_ip_align +
+ EFX_XDP_HEADROOM;
+ rx_buf->len = efx->rx_dma_len;
+ rx_buf->flags = 0;
+ ++rx_queue->added_count;
+ get_page(page);
+ dma_addr += efx->rx_page_buf_step;
+ page_offset += efx->rx_page_buf_step;
+ } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
+
+ rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
+ } while (++count < efx->rx_pages_per_batch);
+
+ return 0;
+}
+
+void efx_rx_config_page_split(struct efx_nic *efx)
+{
+ efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
+ EFX_XDP_HEADROOM + EFX_XDP_TAILROOM,
+ EFX_RX_BUF_ALIGNMENT);
+ efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
+ ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
+ efx->rx_page_buf_step);
+ efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
+ efx->rx_bufs_per_page;
+ efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
+ efx->rx_bufs_per_page);
+}
+
+/* efx_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue: RX descriptor queue
+ *
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@max_fill. If there is insufficient atomic
+ * memory to do so, a slow fill will be scheduled.
+ *
+ * The caller must provide serialisation (none is used here). In practise,
+ * this means this function must run from the NAPI handler, or be called
+ * when NAPI is disabled.
+ */
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int fill_level, batch_size;
+ int space, rc = 0;
+
+ if (!rx_queue->refill_enabled)
+ return;
+
+ /* Calculate current fill level, and exit if we don't need to fill */
+ fill_level = (rx_queue->added_count - rx_queue->removed_count);
+ EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
+ if (fill_level >= rx_queue->fast_fill_trigger)
+ goto out;
+
+ /* Record minimum fill level */
+ if (unlikely(fill_level < rx_queue->min_fill)) {
+ if (fill_level)
+ rx_queue->min_fill = fill_level;
+ }
+
+ batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+ space = rx_queue->max_fill - fill_level;
+ EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
+
+ netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+ "RX queue %d fast-filling descriptor ring from"
+ " level %d to level %d\n",
+ efx_rx_queue_index(rx_queue), fill_level,
+ rx_queue->max_fill);
+
+ do {
+ rc = efx_init_rx_buffers(rx_queue, atomic);
+ if (unlikely(rc)) {
+ /* Ensure that we don't leave the rx queue empty */
+ efx_schedule_slow_fill(rx_queue);
+ goto out;
+ }
+ } while ((space -= batch_size) >= batch_size);
+
+ netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+ "RX queue %d fast-filled descriptor ring "
+ "to level %d\n", efx_rx_queue_index(rx_queue),
+ rx_queue->added_count - rx_queue->removed_count);
+
+ out:
+ if (rx_queue->notified_count != rx_queue->added_count)
+ efx_nic_notify_rx_desc(rx_queue);
+}
+
+/* Pass a received packet up through GRO. GRO can handle pages
+ * regardless of checksum state and skbs with a good checksum.
+ */
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags, u8 *eh, __wsum csum)
+{
+ struct napi_struct *napi = &channel->napi_str;
+ struct efx_nic *efx = channel->efx;
+ struct sk_buff *skb;
+
+ skb = napi_get_frags(napi);
+ if (unlikely(!skb)) {
+ struct efx_rx_queue *rx_queue;
+
+ rx_queue = efx_channel_get_rx_queue(channel);
+ efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+ return;
+ }
+
+ if (efx->net_dev->features & NETIF_F_RXHASH &&
+ efx_rx_buf_hash_valid(efx, eh))
+ skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
+ PKT_HASH_TYPE_L3);
+ if (csum) {
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ } else {
+ skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
+ CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+ }
+ skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
+
+ for (;;) {
+ skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+ rx_buf->page, rx_buf->page_offset,
+ rx_buf->len);
+ rx_buf->page = NULL;
+ skb->len += rx_buf->len;
+ if (skb_shinfo(skb)->nr_frags == n_frags)
+ break;
+
+ rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+ }
+
+ skb->data_len = skb->len;
+ skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+ skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+ napi_gro_frags(napi);
+}
+
+/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
+{
+ struct list_head *head = &efx->rss_context.list;
+ struct efx_rss_context *ctx, *new;
+ u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ /* Search for first gap in the numbering */
+ list_for_each_entry(ctx, head, list) {
+ if (ctx->user_id != id)
+ break;
+ id++;
+ /* Check for wrap. If this happens, we have nearly 2^32
+ * allocated RSS contexts, which seems unlikely.
+ */
+ if (WARN_ON_ONCE(!id))
+ return NULL;
+ }
+
+ /* Create the new entry */
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+ new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ new->rx_hash_udp_4tuple = false;
+
+ /* Insert the new entry into the gap */
+ new->user_id = id;
+ list_add_tail(&new->list, &ctx->list);
+ return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
+{
+ struct list_head *head = &efx->rss_context.list;
+ struct efx_rss_context *ctx;
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ list_for_each_entry(ctx, head, list)
+ if (ctx->user_id == id)
+ return ctx;
+ return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+ list_del(&ctx->list);
+ kfree(ctx);
+}
+
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+ struct efx_rss_context *ctx)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+ ctx->rx_indir_table[i] =
+ ethtool_rxfh_indir_default(i, efx->rss_spread);
+}
+
+/**
+ * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
+ * @spec: Specification to test
+ *
+ * Return: %true if the specification is a non-drop RX filter that
+ * matches a local MAC address I/G bit value of 1 or matches a local
+ * IPv4 or IPv6 address value in the respective multicast address
+ * range. Otherwise %false.
+ */
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
+{
+ if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
+ spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
+ return false;
+
+ if (spec->match_flags &
+ (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
+ is_multicast_ether_addr(spec->loc_mac))
+ return true;
+
+ if ((spec->match_flags &
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
+ if (spec->ether_type == htons(ETH_P_IP) &&
+ ipv4_is_multicast(spec->loc_host[0]))
+ return true;
+ if (spec->ether_type == htons(ETH_P_IPV6) &&
+ ((const u8 *)spec->loc_host)[0] == 0xff)
+ return true;
+ }
+
+ return false;
+}
+
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ const struct efx_filter_spec *right)
+{
+ if ((left->match_flags ^ right->match_flags) |
+ ((left->flags ^ right->flags) &
+ (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
+ return false;
+
+ return memcmp(&left->outer_vid, &right->outer_vid,
+ sizeof(struct efx_filter_spec) -
+ offsetof(struct efx_filter_spec, outer_vid)) == 0;
+}
+
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
+{
+ BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
+ return jhash2((const u32 *)&spec->outer_vid,
+ (sizeof(struct efx_filter_spec) -
+ offsetof(struct efx_filter_spec, outer_vid)) / 4,
+ 0);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+ bool *force)
+{
+ if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
+ /* ARFS is currently updating this entry, leave it */
+ return false;
+ }
+ if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
+ /* ARFS tried and failed to update this, so it's probably out
+ * of date. Remove the filter and the ARFS rule entry.
+ */
+ rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+ *force = true;
+ return true;
+ } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
+ /* ARFS has moved on, so old filter is not needed. Since we did
+ * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
+ * not be removed by efx_rps_hash_del() subsequently.
+ */
+ *force = true;
+ return true;
+ }
+ /* Remove it iff ARFS wants to. */
+ return true;
+}
+
+static
+struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
+ const struct efx_filter_spec *spec)
+{
+ u32 hash = efx_filter_spec_hash(spec);
+
+ lockdep_assert_held(&efx->rps_hash_lock);
+ if (!efx->rps_hash_table)
+ return NULL;
+ return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
+}
+
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+ const struct efx_filter_spec *spec)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (!head)
+ return NULL;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec))
+ return rule;
+ }
+ return NULL;
+}
+
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ bool *new)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (!head)
+ return NULL;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec)) {
+ *new = false;
+ return rule;
+ }
+ }
+ rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
+ *new = true;
+ if (rule) {
+ memcpy(&rule->spec, spec, sizeof(rule->spec));
+ hlist_add_head(&rule->node, head);
+ }
+ return rule;
+}
+
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (WARN_ON(!head))
+ return;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec)) {
+ /* Someone already reused the entry. We know that if
+ * this check doesn't fire (i.e. filter_id == REMOVING)
+ * then the REMOVING mark was put there by our caller,
+ * because caller is holding a lock on filter table and
+ * only holders of that lock set REMOVING.
+ */
+ if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
+ return;
+ hlist_del(node);
+ kfree(rule);
+ return;
+ }
+ }
+ /* We didn't find it. */
+ WARN_ON(1);
+}
+#endif
+
+int efx_probe_filters(struct efx_nic *efx)
+{
+ int rc;
+
+ mutex_lock(&efx->mac_lock);
+ down_write(&efx->filter_sem);
+ rc = efx->type->filter_table_probe(efx);
+ if (rc)
+ goto out_unlock;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (efx->type->offload_features & NETIF_F_NTUPLE) {
+ struct efx_channel *channel;
+ int i, success = 1;
+
+ efx_for_each_channel(channel, efx) {
+ channel->rps_flow_id =
+ kcalloc(efx->type->max_rx_ip_filters,
+ sizeof(*channel->rps_flow_id),
+ GFP_KERNEL);
+ if (!channel->rps_flow_id)
+ success = 0;
+ else
+ for (i = 0;
+ i < efx->type->max_rx_ip_filters;
+ ++i)
+ channel->rps_flow_id[i] =
+ RPS_FLOW_ID_INVALID;
+ channel->rfs_expire_index = 0;
+ channel->rfs_filter_count = 0;
+ }
+
+ if (!success) {
+ efx_for_each_channel(channel, efx)
+ kfree(channel->rps_flow_id);
+ efx->type->filter_table_remove(efx);
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+ }
+#endif
+out_unlock:
+ up_write(&efx->filter_sem);
+ mutex_unlock(&efx->mac_lock);
+ return rc;
+}
+
+void efx_remove_filters(struct efx_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ cancel_delayed_work_sync(&channel->filter_work);
+ kfree(channel->rps_flow_id);
+ channel->rps_flow_id = NULL;
+ }
+#endif
+ down_write(&efx->filter_sem);
+ efx->type->filter_table_remove(efx);
+ up_write(&efx->filter_sem);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+static void efx_filter_rfs_work(struct work_struct *data)
+{
+ struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
+ work);
+ struct efx_nic *efx = netdev_priv(req->net_dev);
+ struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
+ int slot_idx = req - efx->rps_slot;
+ struct efx_arfs_rule *rule;
+ u16 arfs_id = 0;
+ int rc;
+
+ rc = efx->type->filter_insert(efx, &req->spec, true);
+ if (rc >= 0)
+ /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
+ rc %= efx->type->max_rx_ip_filters;
+ if (efx->rps_hash_table) {
+ spin_lock_bh(&efx->rps_hash_lock);
+ rule = efx_rps_hash_find(efx, &req->spec);
+ /* The rule might have already gone, if someone else's request
+ * for the same spec was already worked and then expired before
+ * we got around to our work. In that case we have nothing
+ * tying us to an arfs_id, meaning that as soon as the filter
+ * is considered for expiry it will be removed.
+ */
+ if (rule) {
+ if (rc < 0)
+ rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
+ else
+ rule->filter_id = rc;
+ arfs_id = rule->arfs_id;
+ }
+ spin_unlock_bh(&efx->rps_hash_lock);
+ }
+ if (rc >= 0) {
+ /* Remember this so we can check whether to expire the filter
+ * later.
+ */
+ mutex_lock(&efx->rps_mutex);
+ if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
+ channel->rfs_filter_count++;
+ channel->rps_flow_id[rc] = req->flow_id;
+ mutex_unlock(&efx->rps_mutex);
+
+ if (req->spec.ether_type == htons(ETH_P_IP))
+ netif_info(efx, rx_status, efx->net_dev,
+ "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
+ (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+ req->spec.rem_host, ntohs(req->spec.rem_port),
+ req->spec.loc_host, ntohs(req->spec.loc_port),
+ req->rxq_index, req->flow_id, rc, arfs_id);
+ else
+ netif_info(efx, rx_status, efx->net_dev,
+ "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
+ (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+ req->spec.rem_host, ntohs(req->spec.rem_port),
+ req->spec.loc_host, ntohs(req->spec.loc_port),
+ req->rxq_index, req->flow_id, rc, arfs_id);
+ channel->n_rfs_succeeded++;
+ } else {
+ if (req->spec.ether_type == htons(ETH_P_IP))
+ netif_dbg(efx, rx_status, efx->net_dev,
+ "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
+ (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+ req->spec.rem_host, ntohs(req->spec.rem_port),
+ req->spec.loc_host, ntohs(req->spec.loc_port),
+ req->rxq_index, req->flow_id, rc, arfs_id);
+ else
+ netif_dbg(efx, rx_status, efx->net_dev,
+ "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
+ (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+ req->spec.rem_host, ntohs(req->spec.rem_port),
+ req->spec.loc_host, ntohs(req->spec.loc_port),
+ req->rxq_index, req->flow_id, rc, arfs_id);
+ channel->n_rfs_failed++;
+ /* We're overloading the NIC's filter tables, so let's do a
+ * chunk of extra expiry work.
+ */
+ __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
+ 100u));
+ }
+
+ /* Release references */
+ clear_bit(slot_idx, &efx->rps_slot_map);
+ dev_put(req->net_dev);
+}
+
+int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ struct efx_async_filter_insertion *req;
+ struct efx_arfs_rule *rule;
+ struct flow_keys fk;
+ int slot_idx;
+ bool new;
+ int rc;
+
+ /* find a free slot */
+ for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
+ if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
+ break;
+ if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
+ return -EBUSY;
+
+ if (flow_id == RPS_FLOW_ID_INVALID) {
+ rc = -EINVAL;
+ goto out_clear;
+ }
+
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
+ rc = -EPROTONOSUPPORT;
+ goto out_clear;
+ }
+
+ if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
+ rc = -EPROTONOSUPPORT;
+ goto out_clear;
+ }
+ if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
+ rc = -EPROTONOSUPPORT;
+ goto out_clear;
+ }
+
+ req = efx->rps_slot + slot_idx;
+ efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
+ efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
+ rxq_index);
+ req->spec.match_flags =
+ EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
+ EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
+ EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
+ req->spec.ether_type = fk.basic.n_proto;
+ req->spec.ip_proto = fk.basic.ip_proto;
+
+ if (fk.basic.n_proto == htons(ETH_P_IP)) {
+ req->spec.rem_host[0] = fk.addrs.v4addrs.src;
+ req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
+ } else {
+ memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
+ sizeof(struct in6_addr));
+ memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ }
+
+ req->spec.rem_port = fk.ports.src;
+ req->spec.loc_port = fk.ports.dst;
+
+ if (efx->rps_hash_table) {
+ /* Add it to ARFS hash table */
+ spin_lock(&efx->rps_hash_lock);
+ rule = efx_rps_hash_add(efx, &req->spec, &new);
+ if (!rule) {
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+ if (new)
+ rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
+ rc = rule->arfs_id;
+ /* Skip if existing or pending filter already does the right thing */
+ if (!new && rule->rxq_index == rxq_index &&
+ rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
+ goto out_unlock;
+ rule->rxq_index = rxq_index;
+ rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
+ spin_unlock(&efx->rps_hash_lock);
+ } else {
+ /* Without an ARFS hash table, we just use arfs_id 0 for all
+ * filters. This means if multiple flows hash to the same
+ * flow_id, all but the most recently touched will be eligible
+ * for expiry.
+ */
+ rc = 0;
+ }
+
+ /* Queue the request */
+ dev_hold(req->net_dev = net_dev);
+ INIT_WORK(&req->work, efx_filter_rfs_work);
+ req->rxq_index = rxq_index;
+ req->flow_id = flow_id;
+ schedule_work(&req->work);
+ return rc;
+out_unlock:
+ spin_unlock(&efx->rps_hash_lock);
+out_clear:
+ clear_bit(slot_idx, &efx->rps_slot_map);
+ return rc;
+}
+
+bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
+{
+ bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
+ struct efx_nic *efx = channel->efx;
+ unsigned int index, size, start;
+ u32 flow_id;
+
+ if (!mutex_trylock(&efx->rps_mutex))
+ return false;
+ expire_one = efx->type->filter_rfs_expire_one;
+ index = channel->rfs_expire_index;
+ start = index;
+ size = efx->type->max_rx_ip_filters;
+ while (quota) {
+ flow_id = channel->rps_flow_id[index];
+
+ if (flow_id != RPS_FLOW_ID_INVALID) {
+ quota--;
+ if (expire_one(efx, flow_id, index)) {
+ netif_info(efx, rx_status, efx->net_dev,
+ "expired filter %d [channel %u flow %u]\n",
+ index, channel->channel, flow_id);
+ channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
+ channel->rfs_filter_count--;
+ }
+ }
+ if (++index == size)
+ index = 0;
+ /* If we were called with a quota that exceeds the total number
+ * of filters in the table (which shouldn't happen, but could
+ * if two callers race), ensure that we don't loop forever -
+ * stop when we've examined every row of the table.
+ */
+ if (index == start)
+ break;
+ }
+
+ channel->rfs_expire_index = index;
+ mutex_unlock(&efx->rps_mutex);
+ return true;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
diff --git a/drivers/net/ethernet/sfc/siena/rx_common.h b/drivers/net/ethernet/sfc/siena/rx_common.h
new file mode 100644
index 000000000000..fbd2769307f9
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/rx_common.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_RX_COMMON_H
+#define EFX_RX_COMMON_H
+
+/* Preferred number of descriptors to fill at once */
+#define EFX_RX_PREFERRED_BATCH 8U
+
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
+ EFX_RX_USR_BUF_SIZE)
+
+/* Number of RX buffers to recycle pages for. When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EFX_RECYCLE_RING_SIZE_10G 256
+
+static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
+{
+ return page_address(buf->page) + buf->page_offset;
+}
+
+static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
+#else
+ const u8 *data = eh + efx->rx_packet_hash_offset;
+
+ return (u32)data[0] |
+ (u32)data[1] << 8 |
+ (u32)data[2] << 16 |
+ (u32)data[3] << 24;
+#endif
+}
+
+void efx_rx_slow_fill(struct timer_list *t);
+
+void efx_recycle_rx_pages(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags);
+void efx_discard_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags);
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_destroy_rx_queue(struct efx_rx_queue *rx_queue);
+
+void efx_init_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct page *page,
+ unsigned int page_offset,
+ u16 flags);
+void efx_unmap_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf);
+
+static inline void efx_sync_rx_buffer(struct efx_nic *efx,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int len)
+{
+ dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
+ DMA_FROM_DEVICE);
+}
+
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int num_bufs);
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
+void efx_rx_config_page_split(struct efx_nic *efx);
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
+
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags, u8 *eh, __wsum csum);
+
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+ struct efx_rss_context *ctx);
+
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ const struct efx_filter_spec *right);
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+ bool *force);
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+ const struct efx_filter_spec *spec);
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ bool *new);
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
+
+int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id);
+bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota);
+#endif
+
+int efx_probe_filters(struct efx_nic *efx);
+void efx_remove_filters(struct efx_nic *efx);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/siena/selftest.c b/drivers/net/ethernet/sfc/siena/selftest.c
new file mode 100644
index 000000000000..3c5227afd497
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/selftest.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "efx_common.h"
+#include "efx_channels.h"
+#include "nic.h"
+#include "mcdi_port_common.h"
+#include "selftest.h"
+#include "workarounds.h"
+
+/* IRQ latency can be enormous because:
+ * - All IRQs may be disabled on a CPU for a *long* time by e.g. a
+ * slow serial console or an old IDE driver doing error recovery
+ * - The PREEMPT_RT patches mostly deal with this, but also allow a
+ * tasklet or normal task to be given higher priority than our IRQ
+ * threads
+ * Try to avoid blaming the hardware for this.
+ */
+#define IRQ_TIMEOUT HZ
+
+/*
+ * Loopback test packet structure
+ *
+ * The self-test should stress every RSS vector, and unfortunately
+ * Falcon only performs RSS on TCP/UDP packets.
+ */
+struct efx_loopback_payload {
+ struct ethhdr header;
+ struct iphdr ip;
+ struct udphdr udp;
+ __be16 iteration;
+ char msg[64];
+} __packed;
+
+/* Loopback test source MAC address */
+static const u8 payload_source[ETH_ALEN] __aligned(2) = {
+ 0x00, 0x0f, 0x53, 0x1b, 0x1b, 0x1b,
+};
+
+static const char payload_msg[] =
+ "Hello world! This is an Efx loopback test in progress!";
+
+/* Interrupt mode names */
+static const unsigned int efx_interrupt_mode_max = EFX_INT_MODE_MAX;
+static const char *const efx_interrupt_mode_names[] = {
+ [EFX_INT_MODE_MSIX] = "MSI-X",
+ [EFX_INT_MODE_MSI] = "MSI",
+ [EFX_INT_MODE_LEGACY] = "legacy",
+};
+#define INT_MODE(efx) \
+ STRING_TABLE_LOOKUP(efx->interrupt_mode, efx_interrupt_mode)
+
+/**
+ * struct efx_loopback_state - persistent state during a loopback selftest
+ * @flush: Drop all packets in efx_loopback_rx_packet
+ * @packet_count: Number of packets being used in this test
+ * @skbs: An array of skbs transmitted
+ * @offload_csum: Checksums are being offloaded
+ * @rx_good: RX good packet count
+ * @rx_bad: RX bad packet count
+ * @payload: Payload used in tests
+ */
+struct efx_loopback_state {
+ bool flush;
+ int packet_count;
+ struct sk_buff **skbs;
+ bool offload_csum;
+ atomic_t rx_good;
+ atomic_t rx_bad;
+ struct efx_loopback_payload payload;
+};
+
+/* How long to wait for all the packets to arrive (in ms) */
+#define LOOPBACK_TIMEOUT_MS 1000
+
+/**************************************************************************
+ *
+ * MII, NVRAM and register tests
+ *
+ **************************************************************************/
+
+static int efx_test_phy_alive(struct efx_nic *efx, struct efx_self_tests *tests)
+{
+ int rc = 0;
+
+ rc = efx_mcdi_phy_test_alive(efx);
+ tests->phy_alive = rc ? -1 : 1;
+
+ return rc;
+}
+
+static int efx_test_nvram(struct efx_nic *efx, struct efx_self_tests *tests)
+{
+ int rc = 0;
+
+ if (efx->type->test_nvram) {
+ rc = efx->type->test_nvram(efx);
+ if (rc == -EPERM)
+ rc = 0;
+ else
+ tests->nvram = rc ? -1 : 1;
+ }
+
+ return rc;
+}
+
+/**************************************************************************
+ *
+ * Interrupt and event queue testing
+ *
+ **************************************************************************/
+
+/* Test generation and receipt of interrupts */
+static int efx_test_interrupts(struct efx_nic *efx,
+ struct efx_self_tests *tests)
+{
+ unsigned long timeout, wait;
+ int cpu;
+ int rc;
+
+ netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n");
+ tests->interrupt = -1;
+
+ rc = efx_nic_irq_test_start(efx);
+ if (rc == -ENOTSUPP) {
+ netif_dbg(efx, drv, efx->net_dev,
+ "direct interrupt testing not supported\n");
+ tests->interrupt = 0;
+ return 0;
+ }
+
+ timeout = jiffies + IRQ_TIMEOUT;
+ wait = 1;
+
+ /* Wait for arrival of test interrupt. */
+ netif_dbg(efx, drv, efx->net_dev, "waiting for test interrupt\n");
+ do {
+ schedule_timeout_uninterruptible(wait);
+ cpu = efx_nic_irq_test_irq_cpu(efx);
+ if (cpu >= 0)
+ goto success;
+ wait *= 2;
+ } while (time_before(jiffies, timeout));
+
+ netif_err(efx, drv, efx->net_dev, "timed out waiting for interrupt\n");
+ return -ETIMEDOUT;
+
+ success:
+ netif_dbg(efx, drv, efx->net_dev, "%s test interrupt seen on CPU%d\n",
+ INT_MODE(efx), cpu);
+ tests->interrupt = 1;
+ return 0;
+}
+
+/* Test generation and receipt of interrupting events */
+static int efx_test_eventq_irq(struct efx_nic *efx,
+ struct efx_self_tests *tests)
+{
+ struct efx_channel *channel;
+ unsigned int read_ptr[EFX_MAX_CHANNELS];
+ unsigned long napi_ran = 0, dma_pend = 0, int_pend = 0;
+ unsigned long timeout, wait;
+
+ BUILD_BUG_ON(EFX_MAX_CHANNELS > BITS_PER_LONG);
+
+ efx_for_each_channel(channel, efx) {
+ read_ptr[channel->channel] = channel->eventq_read_ptr;
+ set_bit(channel->channel, &dma_pend);
+ set_bit(channel->channel, &int_pend);
+ efx_nic_event_test_start(channel);
+ }
+
+ timeout = jiffies + IRQ_TIMEOUT;
+ wait = 1;
+
+ /* Wait for arrival of interrupts. NAPI processing may or may
+ * not complete in time, but we can cope in any case.
+ */
+ do {
+ schedule_timeout_uninterruptible(wait);
+
+ efx_for_each_channel(channel, efx) {
+ efx_stop_eventq(channel);
+ if (channel->eventq_read_ptr !=
+ read_ptr[channel->channel]) {
+ set_bit(channel->channel, &napi_ran);
+ clear_bit(channel->channel, &dma_pend);
+ clear_bit(channel->channel, &int_pend);
+ } else {
+ if (efx_nic_event_present(channel))
+ clear_bit(channel->channel, &dma_pend);
+ if (efx_nic_event_test_irq_cpu(channel) >= 0)
+ clear_bit(channel->channel, &int_pend);
+ }
+ efx_start_eventq(channel);
+ }
+
+ wait *= 2;
+ } while ((dma_pend || int_pend) && time_before(jiffies, timeout));
+
+ efx_for_each_channel(channel, efx) {
+ bool dma_seen = !test_bit(channel->channel, &dma_pend);
+ bool int_seen = !test_bit(channel->channel, &int_pend);
+
+ tests->eventq_dma[channel->channel] = dma_seen ? 1 : -1;
+ tests->eventq_int[channel->channel] = int_seen ? 1 : -1;
+
+ if (dma_seen && int_seen) {
+ netif_dbg(efx, drv, efx->net_dev,
+ "channel %d event queue passed (with%s NAPI)\n",
+ channel->channel,
+ test_bit(channel->channel, &napi_ran) ?
+ "" : "out");
+ } else {
+ /* Report failure and whether either interrupt or DMA
+ * worked
+ */
+ netif_err(efx, drv, efx->net_dev,
+ "channel %d timed out waiting for event queue\n",
+ channel->channel);
+ if (int_seen)
+ netif_err(efx, drv, efx->net_dev,
+ "channel %d saw interrupt "
+ "during event queue test\n",
+ channel->channel);
+ if (dma_seen)
+ netif_err(efx, drv, efx->net_dev,
+ "channel %d event was generated, but "
+ "failed to trigger an interrupt\n",
+ channel->channel);
+ }
+ }
+
+ return (dma_pend || int_pend) ? -ETIMEDOUT : 0;
+}
+
+static int efx_test_phy(struct efx_nic *efx, struct efx_self_tests *tests,
+ unsigned flags)
+{
+ int rc;
+
+ mutex_lock(&efx->mac_lock);
+ rc = efx_mcdi_phy_run_tests(efx, tests->phy_ext, flags);
+ mutex_unlock(&efx->mac_lock);
+ if (rc == -EPERM)
+ rc = 0;
+ else
+ netif_info(efx, drv, efx->net_dev,
+ "%s phy selftest\n", rc ? "Failed" : "Passed");
+
+ return rc;
+}
+
+/**************************************************************************
+ *
+ * Loopback testing
+ * NB Only one loopback test can be executing concurrently.
+ *
+ **************************************************************************/
+
+/* Loopback test RX callback
+ * This is called for each received packet during loopback testing.
+ */
+void efx_loopback_rx_packet(struct efx_nic *efx,
+ const char *buf_ptr, int pkt_len)
+{
+ struct efx_loopback_state *state = efx->loopback_selftest;
+ struct efx_loopback_payload *received;
+ struct efx_loopback_payload *payload;
+
+ BUG_ON(!buf_ptr);
+
+ /* If we are just flushing, then drop the packet */
+ if ((state == NULL) || state->flush)
+ return;
+
+ payload = &state->payload;
+
+ received = (struct efx_loopback_payload *) buf_ptr;
+ received->ip.saddr = payload->ip.saddr;
+ if (state->offload_csum)
+ received->ip.check = payload->ip.check;
+
+ /* Check that header exists */
+ if (pkt_len < sizeof(received->header)) {
+ netif_err(efx, drv, efx->net_dev,
+ "saw runt RX packet (length %d) in %s loopback "
+ "test\n", pkt_len, LOOPBACK_MODE(efx));
+ goto err;
+ }
+
+ /* Check that the ethernet header exists */
+ if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) {
+ netif_err(efx, drv, efx->net_dev,
+ "saw non-loopback RX packet in %s loopback test\n",
+ LOOPBACK_MODE(efx));
+ goto err;
+ }
+
+ /* Check packet length */
+ if (pkt_len != sizeof(*payload)) {
+ netif_err(efx, drv, efx->net_dev,
+ "saw incorrect RX packet length %d (wanted %d) in "
+ "%s loopback test\n", pkt_len, (int)sizeof(*payload),
+ LOOPBACK_MODE(efx));
+ goto err;
+ }
+
+ /* Check that IP header matches */
+ if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) {
+ netif_err(efx, drv, efx->net_dev,
+ "saw corrupted IP header in %s loopback test\n",
+ LOOPBACK_MODE(efx));
+ goto err;
+ }
+
+ /* Check that msg and padding matches */
+ if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) {
+ netif_err(efx, drv, efx->net_dev,
+ "saw corrupted RX packet in %s loopback test\n",
+ LOOPBACK_MODE(efx));
+ goto err;
+ }
+
+ /* Check that iteration matches */
+ if (received->iteration != payload->iteration) {
+ netif_err(efx, drv, efx->net_dev,
+ "saw RX packet from iteration %d (wanted %d) in "
+ "%s loopback test\n", ntohs(received->iteration),
+ ntohs(payload->iteration), LOOPBACK_MODE(efx));
+ goto err;
+ }
+
+ /* Increase correct RX count */
+ netif_vdbg(efx, drv, efx->net_dev,
+ "got loopback RX in %s loopback test\n", LOOPBACK_MODE(efx));
+
+ atomic_inc(&state->rx_good);
+ return;
+
+ err:
+#ifdef DEBUG
+ if (atomic_read(&state->rx_bad) == 0) {
+ netif_err(efx, drv, efx->net_dev, "received packet:\n");
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+ buf_ptr, pkt_len, 0);
+ netif_err(efx, drv, efx->net_dev, "expected packet:\n");
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+ &state->payload, sizeof(state->payload), 0);
+ }
+#endif
+ atomic_inc(&state->rx_bad);
+}
+
+/* Initialise an efx_selftest_state for a new iteration */
+static void efx_iterate_state(struct efx_nic *efx)
+{
+ struct efx_loopback_state *state = efx->loopback_selftest;
+ struct net_device *net_dev = efx->net_dev;
+ struct efx_loopback_payload *payload = &state->payload;
+
+ /* Initialise the layerII header */
+ ether_addr_copy((u8 *)&payload->header.h_dest, net_dev->dev_addr);
+ ether_addr_copy((u8 *)&payload->header.h_source, payload_source);
+ payload->header.h_proto = htons(ETH_P_IP);
+
+ /* saddr set later and used as incrementing count */
+ payload->ip.daddr = htonl(INADDR_LOOPBACK);
+ payload->ip.ihl = 5;
+ payload->ip.check = (__force __sum16) htons(0xdead);
+ payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr));
+ payload->ip.version = IPVERSION;
+ payload->ip.protocol = IPPROTO_UDP;
+
+ /* Initialise udp header */
+ payload->udp.source = 0;
+ payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) -
+ sizeof(struct iphdr));
+ payload->udp.check = 0; /* checksum ignored */
+
+ /* Fill out payload */
+ payload->iteration = htons(ntohs(payload->iteration) + 1);
+ memcpy(&payload->msg, payload_msg, sizeof(payload_msg));
+
+ /* Fill out remaining state members */
+ atomic_set(&state->rx_good, 0);
+ atomic_set(&state->rx_bad, 0);
+ smp_wmb();
+}
+
+static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_loopback_state *state = efx->loopback_selftest;
+ struct efx_loopback_payload *payload;
+ struct sk_buff *skb;
+ int i;
+ netdev_tx_t rc;
+
+ /* Transmit N copies of buffer */
+ for (i = 0; i < state->packet_count; i++) {
+ /* Allocate an skb, holding an extra reference for
+ * transmit completion counting */
+ skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ state->skbs[i] = skb;
+ skb_get(skb);
+
+ /* Copy the payload in, incrementing the source address to
+ * exercise the rss vectors */
+ payload = skb_put(skb, sizeof(state->payload));
+ memcpy(payload, &state->payload, sizeof(state->payload));
+ payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
+
+ /* Ensure everything we've written is visible to the
+ * interrupt handler. */
+ smp_wmb();
+
+ netif_tx_lock_bh(efx->net_dev);
+ rc = efx_enqueue_skb(tx_queue, skb);
+ netif_tx_unlock_bh(efx->net_dev);
+
+ if (rc != NETDEV_TX_OK) {
+ netif_err(efx, drv, efx->net_dev,
+ "TX queue %d could not transmit packet %d of "
+ "%d in %s loopback test\n", tx_queue->label,
+ i + 1, state->packet_count,
+ LOOPBACK_MODE(efx));
+
+ /* Defer cleaning up the other skbs for the caller */
+ kfree_skb(skb);
+ return -EPIPE;
+ }
+ }
+
+ return 0;
+}
+
+static int efx_poll_loopback(struct efx_nic *efx)
+{
+ struct efx_loopback_state *state = efx->loopback_selftest;
+
+ return atomic_read(&state->rx_good) == state->packet_count;
+}
+
+static int efx_end_loopback(struct efx_tx_queue *tx_queue,
+ struct efx_loopback_self_tests *lb_tests)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_loopback_state *state = efx->loopback_selftest;
+ struct sk_buff *skb;
+ int tx_done = 0, rx_good, rx_bad;
+ int i, rc = 0;
+
+ netif_tx_lock_bh(efx->net_dev);
+
+ /* Count the number of tx completions, and decrement the refcnt. Any
+ * skbs not already completed will be free'd when the queue is flushed */
+ for (i = 0; i < state->packet_count; i++) {
+ skb = state->skbs[i];
+ if (skb && !skb_shared(skb))
+ ++tx_done;
+ dev_kfree_skb(skb);
+ }
+
+ netif_tx_unlock_bh(efx->net_dev);
+
+ /* Check TX completion and received packet counts */
+ rx_good = atomic_read(&state->rx_good);
+ rx_bad = atomic_read(&state->rx_bad);
+ if (tx_done != state->packet_count) {
+ /* Don't free the skbs; they will be picked up on TX
+ * overflow or channel teardown.
+ */
+ netif_err(efx, drv, efx->net_dev,
+ "TX queue %d saw only %d out of an expected %d "
+ "TX completion events in %s loopback test\n",
+ tx_queue->label, tx_done, state->packet_count,
+ LOOPBACK_MODE(efx));
+ rc = -ETIMEDOUT;
+ /* Allow to fall through so we see the RX errors as well */
+ }
+
+ /* We may always be up to a flush away from our desired packet total */
+ if (rx_good != state->packet_count) {
+ netif_dbg(efx, drv, efx->net_dev,
+ "TX queue %d saw only %d out of an expected %d "
+ "received packets in %s loopback test\n",
+ tx_queue->label, rx_good, state->packet_count,
+ LOOPBACK_MODE(efx));
+ rc = -ETIMEDOUT;
+ /* Fall through */
+ }
+
+ /* Update loopback test structure */
+ lb_tests->tx_sent[tx_queue->label] += state->packet_count;
+ lb_tests->tx_done[tx_queue->label] += tx_done;
+ lb_tests->rx_good += rx_good;
+ lb_tests->rx_bad += rx_bad;
+
+ return rc;
+}
+
+static int
+efx_test_loopback(struct efx_tx_queue *tx_queue,
+ struct efx_loopback_self_tests *lb_tests)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_loopback_state *state = efx->loopback_selftest;
+ int i, begin_rc, end_rc;
+
+ for (i = 0; i < 3; i++) {
+ /* Determine how many packets to send */
+ state->packet_count = efx->txq_entries / 3;
+ state->packet_count = min(1 << (i << 2), state->packet_count);
+ state->skbs = kcalloc(state->packet_count,
+ sizeof(state->skbs[0]), GFP_KERNEL);
+ if (!state->skbs)
+ return -ENOMEM;
+ state->flush = false;
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "TX queue %d (hw %d) testing %s loopback with %d packets\n",
+ tx_queue->label, tx_queue->queue, LOOPBACK_MODE(efx),
+ state->packet_count);
+
+ efx_iterate_state(efx);
+ begin_rc = efx_begin_loopback(tx_queue);
+
+ /* This will normally complete very quickly, but be
+ * prepared to wait much longer. */
+ msleep(1);
+ if (!efx_poll_loopback(efx)) {
+ msleep(LOOPBACK_TIMEOUT_MS);
+ efx_poll_loopback(efx);
+ }
+
+ end_rc = efx_end_loopback(tx_queue, lb_tests);
+ kfree(state->skbs);
+
+ if (begin_rc || end_rc) {
+ /* Wait a while to ensure there are no packets
+ * floating around after a failure. */
+ schedule_timeout_uninterruptible(HZ / 10);
+ return begin_rc ? begin_rc : end_rc;
+ }
+ }
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "TX queue %d passed %s loopback test with a burst length "
+ "of %d packets\n", tx_queue->label, LOOPBACK_MODE(efx),
+ state->packet_count);
+
+ return 0;
+}
+
+/* Wait for link up. On Falcon, we would prefer to rely on efx_monitor, but
+ * any contention on the mac lock (via e.g. efx_mac_mcast_work) causes it
+ * to delay and retry. Therefore, it's safer to just poll directly. Wait
+ * for link up and any faults to dissipate. */
+static int efx_wait_for_link(struct efx_nic *efx)
+{
+ struct efx_link_state *link_state = &efx->link_state;
+ int count, link_up_count = 0;
+ bool link_up;
+
+ for (count = 0; count < 40; count++) {
+ schedule_timeout_uninterruptible(HZ / 10);
+
+ if (efx->type->monitor != NULL) {
+ mutex_lock(&efx->mac_lock);
+ efx->type->monitor(efx);
+ mutex_unlock(&efx->mac_lock);
+ }
+
+ mutex_lock(&efx->mac_lock);
+ link_up = link_state->up;
+ if (link_up)
+ link_up = !efx->type->check_mac_fault(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ if (link_up) {
+ if (++link_up_count == 2)
+ return 0;
+ } else {
+ link_up_count = 0;
+ }
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests,
+ unsigned int loopback_modes)
+{
+ enum efx_loopback_mode mode;
+ struct efx_loopback_state *state;
+ struct efx_channel *channel =
+ efx_get_channel(efx, efx->tx_channel_offset);
+ struct efx_tx_queue *tx_queue;
+ int rc = 0;
+
+ /* Set the port loopback_selftest member. From this point on
+ * all received packets will be dropped. Mark the state as
+ * "flushing" so all inflight packets are dropped */
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (state == NULL)
+ return -ENOMEM;
+ BUG_ON(efx->loopback_selftest);
+ state->flush = true;
+ efx->loopback_selftest = state;
+
+ /* Test all supported loopback modes */
+ for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+ if (!(loopback_modes & (1 << mode)))
+ continue;
+
+ /* Move the port into the specified loopback mode. */
+ state->flush = true;
+ mutex_lock(&efx->mac_lock);
+ efx->loopback_mode = mode;
+ rc = __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev,
+ "unable to move into %s loopback\n",
+ LOOPBACK_MODE(efx));
+ goto out;
+ }
+
+ rc = efx_wait_for_link(efx);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev,
+ "loopback %s never came up\n",
+ LOOPBACK_MODE(efx));
+ goto out;
+ }
+
+ /* Test all enabled types of TX queue */
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ state->offload_csum = (tx_queue->type &
+ EFX_TXQ_TYPE_OUTER_CSUM);
+ rc = efx_test_loopback(tx_queue,
+ &tests->loopback[mode]);
+ if (rc)
+ goto out;
+ }
+ }
+
+ out:
+ /* Remove the flush. The caller will remove the loopback setting */
+ state->flush = true;
+ efx->loopback_selftest = NULL;
+ wmb();
+ kfree(state);
+
+ if (rc == -EPERM)
+ rc = 0;
+
+ return rc;
+}
+
+/**************************************************************************
+ *
+ * Entry point
+ *
+ *************************************************************************/
+
+int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
+ unsigned flags)
+{
+ enum efx_loopback_mode loopback_mode = efx->loopback_mode;
+ int phy_mode = efx->phy_mode;
+ int rc_test = 0, rc_reset, rc;
+
+ efx_selftest_async_cancel(efx);
+
+ /* Online (i.e. non-disruptive) testing
+ * This checks interrupt generation, event delivery and PHY presence. */
+
+ rc = efx_test_phy_alive(efx, tests);
+ if (rc && !rc_test)
+ rc_test = rc;
+
+ rc = efx_test_nvram(efx, tests);
+ if (rc && !rc_test)
+ rc_test = rc;
+
+ rc = efx_test_interrupts(efx, tests);
+ if (rc && !rc_test)
+ rc_test = rc;
+
+ rc = efx_test_eventq_irq(efx, tests);
+ if (rc && !rc_test)
+ rc_test = rc;
+
+ if (rc_test)
+ return rc_test;
+
+ if (!(flags & ETH_TEST_FL_OFFLINE))
+ return efx_test_phy(efx, tests, flags);
+
+ /* Offline (i.e. disruptive) testing
+ * This checks MAC and PHY loopback on the specified port. */
+
+ /* Detach the device so the kernel doesn't transmit during the
+ * loopback test and the watchdog timeout doesn't fire.
+ */
+ efx_device_detach_sync(efx);
+
+ if (efx->type->test_chip) {
+ rc_reset = efx->type->test_chip(efx, tests);
+ if (rc_reset) {
+ netif_err(efx, hw, efx->net_dev,
+ "Unable to recover from chip test\n");
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ return rc_reset;
+ }
+
+ if ((tests->memory < 0 || tests->registers < 0) && !rc_test)
+ rc_test = -EIO;
+ }
+
+ /* Ensure that the phy is powered and out of loopback
+ * for the bist and loopback tests */
+ mutex_lock(&efx->mac_lock);
+ efx->phy_mode &= ~PHY_MODE_LOW_POWER;
+ efx->loopback_mode = LOOPBACK_NONE;
+ __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ rc = efx_test_phy(efx, tests, flags);
+ if (rc && !rc_test)
+ rc_test = rc;
+
+ rc = efx_test_loopbacks(efx, tests, efx->loopback_modes);
+ if (rc && !rc_test)
+ rc_test = rc;
+
+ /* restore the PHY to the previous state */
+ mutex_lock(&efx->mac_lock);
+ efx->phy_mode = phy_mode;
+ efx->loopback_mode = loopback_mode;
+ __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ efx_device_attach_if_not_resetting(efx);
+
+ return rc_test;
+}
+
+void efx_selftest_async_start(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_nic_event_test_start(channel);
+ schedule_delayed_work(&efx->selftest_work, IRQ_TIMEOUT);
+}
+
+void efx_selftest_async_cancel(struct efx_nic *efx)
+{
+ cancel_delayed_work_sync(&efx->selftest_work);
+}
+
+static void efx_selftest_async_work(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic,
+ selftest_work.work);
+ struct efx_channel *channel;
+ int cpu;
+
+ efx_for_each_channel(channel, efx) {
+ cpu = efx_nic_event_test_irq_cpu(channel);
+ if (cpu < 0)
+ netif_err(efx, ifup, efx->net_dev,
+ "channel %d failed to trigger an interrupt\n",
+ channel->channel);
+ else
+ netif_dbg(efx, ifup, efx->net_dev,
+ "channel %d triggered interrupt on CPU %d\n",
+ channel->channel, cpu);
+ }
+}
+
+void efx_selftest_async_init(struct efx_nic *efx)
+{
+ INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
+}
diff --git a/drivers/net/ethernet/sfc/siena/selftest.h b/drivers/net/ethernet/sfc/siena/selftest.h
new file mode 100644
index 000000000000..a23f085bf298
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/selftest.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ */
+
+#ifndef EFX_SELFTEST_H
+#define EFX_SELFTEST_H
+
+#include "net_driver.h"
+
+/*
+ * Self tests
+ */
+
+struct efx_loopback_self_tests {
+ int tx_sent[EFX_MAX_TXQ_PER_CHANNEL];
+ int tx_done[EFX_MAX_TXQ_PER_CHANNEL];
+ int rx_good;
+ int rx_bad;
+};
+
+#define EFX_MAX_PHY_TESTS 20
+
+/* Efx self test results
+ * For fields which are not counters, 1 indicates success and -1
+ * indicates failure; 0 indicates test could not be run.
+ */
+struct efx_self_tests {
+ /* online tests */
+ int phy_alive;
+ int nvram;
+ int interrupt;
+ int eventq_dma[EFX_MAX_CHANNELS];
+ int eventq_int[EFX_MAX_CHANNELS];
+ /* offline tests */
+ int memory;
+ int registers;
+ int phy_ext[EFX_MAX_PHY_TESTS];
+ struct efx_loopback_self_tests loopback[LOOPBACK_TEST_MAX + 1];
+};
+
+void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr,
+ int pkt_len);
+int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
+ unsigned flags);
+void efx_selftest_async_init(struct efx_nic *efx);
+void efx_selftest_async_start(struct efx_nic *efx);
+void efx_selftest_async_cancel(struct efx_nic *efx);
+
+#endif /* EFX_SELFTEST_H */
diff --git a/drivers/net/ethernet/sfc/siena/sriov.c b/drivers/net/ethernet/sfc/siena/sriov.c
new file mode 100644
index 000000000000..3f241e6c881a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/sriov.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2014-2015 Solarflare Communications Inc.
+ */
+#include <linux/module.h>
+#include "net_driver.h"
+#include "nic.h"
+#include "sriov.h"
+
+int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ if (efx->type->sriov_set_vf_mac)
+ return efx->type->sriov_set_vf_mac(efx, vf_i, mac);
+ else
+ return -EOPNOTSUPP;
+}
+
+int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
+ u8 qos, __be16 vlan_proto)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ if (efx->type->sriov_set_vf_vlan) {
+ if ((vlan & ~VLAN_VID_MASK) ||
+ (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT)))
+ return -EINVAL;
+
+ if (vlan_proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos);
+ } else {
+ return -EOPNOTSUPP;
+ }
+}
+
+int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
+ bool spoofchk)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ if (efx->type->sriov_set_vf_spoofchk)
+ return efx->type->sriov_set_vf_spoofchk(efx, vf_i, spoofchk);
+ else
+ return -EOPNOTSUPP;
+}
+
+int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
+ struct ifla_vf_info *ivi)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ if (efx->type->sriov_get_vf_config)
+ return efx->type->sriov_get_vf_config(efx, vf_i, ivi);
+ else
+ return -EOPNOTSUPP;
+}
+
+int efx_sriov_set_vf_link_state(struct net_device *net_dev, int vf_i,
+ int link_state)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ if (efx->type->sriov_set_vf_link_state)
+ return efx->type->sriov_set_vf_link_state(efx, vf_i,
+ link_state);
+ else
+ return -EOPNOTSUPP;
+}
diff --git a/drivers/net/ethernet/sfc/siena/sriov.h b/drivers/net/ethernet/sfc/siena/sriov.h
new file mode 100644
index 000000000000..747707bee483
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/sriov.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2014-2015 Solarflare Communications Inc.
+ */
+
+#ifndef EFX_SRIOV_H
+#define EFX_SRIOV_H
+
+#include "net_driver.h"
+
+#ifdef CONFIG_SFC_SRIOV
+
+int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac);
+int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
+ u8 qos, __be16 vlan_proto);
+int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
+ bool spoofchk);
+int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
+ struct ifla_vf_info *ivi);
+int efx_sriov_set_vf_link_state(struct net_device *net_dev, int vf_i,
+ int link_state);
+#endif /* CONFIG_SFC_SRIOV */
+
+#endif /* EFX_SRIOV_H */
diff --git a/drivers/net/ethernet/sfc/siena/tx.c b/drivers/net/ethernet/sfc/siena/tx.c
new file mode 100644
index 000000000000..138bca611341
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/tx.c
@@ -0,0 +1,643 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/pci.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/cache.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "io.h"
+#include "nic.h"
+#include "tx.h"
+#include "tx_common.h"
+#include "workarounds.h"
+#include "ef10_regs.h"
+
+#ifdef EFX_USE_PIO
+
+#define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES)
+unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF;
+
+#endif /* EFX_USE_PIO */
+
+static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer)
+{
+ unsigned int index = efx_tx_queue_get_insert_index(tx_queue);
+ struct efx_buffer *page_buf =
+ &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)];
+ unsigned int offset =
+ ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1);
+
+ if (unlikely(!page_buf->addr) &&
+ efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
+ GFP_ATOMIC))
+ return NULL;
+ buffer->dma_addr = page_buf->dma_addr + offset;
+ buffer->unmap_len = 0;
+ return (u8 *)page_buf->addr + offset;
+}
+
+u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer, size_t len)
+{
+ if (len > EFX_TX_CB_SIZE)
+ return NULL;
+ return efx_tx_get_copy_buffer(tx_queue, buffer);
+}
+
+static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
+{
+ /* We need to consider all queues that the net core sees as one */
+ struct efx_nic *efx = txq1->efx;
+ struct efx_tx_queue *txq2;
+ unsigned int fill_level;
+
+ fill_level = efx_channel_tx_old_fill_level(txq1->channel);
+ if (likely(fill_level < efx->txq_stop_thresh))
+ return;
+
+ /* We used the stale old_read_count above, which gives us a
+ * pessimistic estimate of the fill level (which may even
+ * validly be >= efx->txq_entries). Now try again using
+ * read_count (more likely to be a cache miss).
+ *
+ * If we read read_count and then conditionally stop the
+ * queue, it is possible for the completion path to race with
+ * us and complete all outstanding descriptors in the middle,
+ * after which there will be no more completions to wake it.
+ * Therefore we stop the queue first, then read read_count
+ * (with a memory barrier to ensure the ordering), then
+ * restart the queue if the fill level turns out to be low
+ * enough.
+ */
+ netif_tx_stop_queue(txq1->core_txq);
+ smp_mb();
+ efx_for_each_channel_tx_queue(txq2, txq1->channel)
+ txq2->old_read_count = READ_ONCE(txq2->read_count);
+
+ fill_level = efx_channel_tx_old_fill_level(txq1->channel);
+ EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries);
+ if (likely(fill_level < efx->txq_stop_thresh)) {
+ smp_mb();
+ if (likely(!efx->loopback_selftest))
+ netif_tx_start_queue(txq1->core_txq);
+ }
+}
+
+static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue,
+ struct sk_buff *skb)
+{
+ unsigned int copy_len = skb->len;
+ struct efx_tx_buffer *buffer;
+ u8 *copy_buffer;
+ int rc;
+
+ EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE);
+
+ buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+ copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer);
+ if (unlikely(!copy_buffer))
+ return -ENOMEM;
+
+ rc = skb_copy_bits(skb, 0, copy_buffer, copy_len);
+ EFX_WARN_ON_PARANOID(rc);
+ buffer->len = copy_len;
+
+ buffer->skb = skb;
+ buffer->flags = EFX_TX_BUF_SKB;
+
+ ++tx_queue->insert_count;
+ return rc;
+}
+
+#ifdef EFX_USE_PIO
+
+struct efx_short_copy_buffer {
+ int used;
+ u8 buf[L1_CACHE_BYTES];
+};
+
+/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
+ */
+static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf,
+ u8 *data, int len,
+ struct efx_short_copy_buffer *copy_buf)
+{
+ int block_len = len & ~(sizeof(copy_buf->buf) - 1);
+
+ __iowrite64_copy(*piobuf, data, block_len >> 3);
+ *piobuf += block_len;
+ len -= block_len;
+
+ if (len) {
+ data += block_len;
+ BUG_ON(copy_buf->used);
+ BUG_ON(len > sizeof(copy_buf->buf));
+ memcpy(copy_buf->buf, data, len);
+ copy_buf->used = len;
+ }
+}
+
+/* Copy to PIO, respecting dword alignment, popping data from copy buffer first.
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
+ */
+static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf,
+ u8 *data, int len,
+ struct efx_short_copy_buffer *copy_buf)
+{
+ if (copy_buf->used) {
+ /* if the copy buffer is partially full, fill it up and write */
+ int copy_to_buf =
+ min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len);
+
+ memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf);
+ copy_buf->used += copy_to_buf;
+
+ /* if we didn't fill it up then we're done for now */
+ if (copy_buf->used < sizeof(copy_buf->buf))
+ return;
+
+ __iowrite64_copy(*piobuf, copy_buf->buf,
+ sizeof(copy_buf->buf) >> 3);
+ *piobuf += sizeof(copy_buf->buf);
+ data += copy_to_buf;
+ len -= copy_to_buf;
+ copy_buf->used = 0;
+ }
+
+ efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf);
+}
+
+static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf,
+ struct efx_short_copy_buffer *copy_buf)
+{
+ /* if there's anything in it, write the whole buffer, including junk */
+ if (copy_buf->used)
+ __iowrite64_copy(piobuf, copy_buf->buf,
+ sizeof(copy_buf->buf) >> 3);
+}
+
+/* Traverse skb structure and copy fragments in to PIO buffer.
+ * Advances piobuf pointer.
+ */
+static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
+ u8 __iomem **piobuf,
+ struct efx_short_copy_buffer *copy_buf)
+{
+ int i;
+
+ efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb),
+ copy_buf);
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+ u8 *vaddr;
+
+ vaddr = kmap_atomic(skb_frag_page(f));
+
+ efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + skb_frag_off(f),
+ skb_frag_size(f), copy_buf);
+ kunmap_atomic(vaddr);
+ }
+
+ EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list);
+}
+
+static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
+ struct sk_buff *skb)
+{
+ struct efx_tx_buffer *buffer =
+ efx_tx_queue_get_insert_buffer(tx_queue);
+ u8 __iomem *piobuf = tx_queue->piobuf;
+
+ /* Copy to PIO buffer. Ensure the writes are padded to the end
+ * of a cache line, as this is required for write-combining to be
+ * effective on at least x86.
+ */
+
+ if (skb_shinfo(skb)->nr_frags) {
+ /* The size of the copy buffer will ensure all writes
+ * are the size of a cache line.
+ */
+ struct efx_short_copy_buffer copy_buf;
+
+ copy_buf.used = 0;
+
+ efx_skb_copy_bits_to_pio(tx_queue->efx, skb,
+ &piobuf, &copy_buf);
+ efx_flush_copy_buffer(tx_queue->efx, piobuf, &copy_buf);
+ } else {
+ /* Pad the write to the size of a cache line.
+ * We can do this because we know the skb_shared_info struct is
+ * after the source, and the destination buffer is big enough.
+ */
+ BUILD_BUG_ON(L1_CACHE_BYTES >
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+ __iowrite64_copy(tx_queue->piobuf, skb->data,
+ ALIGN(skb->len, L1_CACHE_BYTES) >> 3);
+ }
+
+ buffer->skb = skb;
+ buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION;
+
+ EFX_POPULATE_QWORD_5(buffer->option,
+ ESF_DZ_TX_DESC_IS_OPT, 1,
+ ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
+ ESF_DZ_TX_PIO_CONT, 0,
+ ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
+ ESF_DZ_TX_PIO_BUF_ADDR,
+ tx_queue->piobuf_offset);
+ ++tx_queue->insert_count;
+ return 0;
+}
+
+/* Decide whether we can use TX PIO, ie. write packet data directly into
+ * a buffer on the device. This can reduce latency at the expense of
+ * throughput, so we only do this if both hardware and software TX rings
+ * are empty, including all queues for the channel. This also ensures that
+ * only one packet at a time can be using the PIO buffer. If the xmit_more
+ * flag is set then we don't use this - there'll be another packet along
+ * shortly and we want to hold off the doorbell.
+ */
+static bool efx_tx_may_pio(struct efx_tx_queue *tx_queue)
+{
+ struct efx_channel *channel = tx_queue->channel;
+
+ if (!tx_queue->piobuf)
+ return false;
+
+ EFX_WARN_ON_ONCE_PARANOID(!channel->efx->type->option_descriptors);
+
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ if (!efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count))
+ return false;
+
+ return true;
+}
+#endif /* EFX_USE_PIO */
+
+/* Send any pending traffic for a channel. xmit_more is shared across all
+ * queues for a channel, so we must check all of them.
+ */
+static void efx_tx_send_pending(struct efx_channel *channel)
+{
+ struct efx_tx_queue *q;
+
+ efx_for_each_channel_tx_queue(q, channel) {
+ if (q->xmit_pending)
+ efx_nic_push_buffers(q);
+ }
+}
+
+/*
+ * Add a socket buffer to a TX queue
+ *
+ * This maps all fragments of a socket buffer for DMA and adds them to
+ * the TX queue. The queue's insert pointer will be incremented by
+ * the number of fragments in the socket buffer.
+ *
+ * If any DMA mapping fails, any mapped fragments will be unmapped,
+ * the queue's insert pointer will be restored to its original value.
+ *
+ * This function is split out from efx_hard_start_xmit to allow the
+ * loopback test to direct packets via specific TX queues.
+ *
+ * Returns NETDEV_TX_OK.
+ * You must hold netif_tx_lock() to call this function.
+ */
+netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+{
+ unsigned int old_insert_count = tx_queue->insert_count;
+ bool xmit_more = netdev_xmit_more();
+ bool data_mapped = false;
+ unsigned int segments;
+ unsigned int skb_len;
+ int rc;
+
+ skb_len = skb->len;
+ segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0;
+ if (segments == 1)
+ segments = 0; /* Don't use TSO for a single segment. */
+
+ /* Handle TSO first - it's *possible* (although unlikely) that we might
+ * be passed a packet to segment that's smaller than the copybreak/PIO
+ * size limit.
+ */
+ if (segments) {
+ switch (tx_queue->tso_version) {
+ case 1:
+ rc = efx_enqueue_skb_tso(tx_queue, skb, &data_mapped);
+ break;
+ case 2:
+ rc = efx_ef10_tx_tso_desc(tx_queue, skb, &data_mapped);
+ break;
+ case 0: /* No TSO on this queue, SW fallback needed */
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc == -EINVAL) {
+ rc = efx_tx_tso_fallback(tx_queue, skb);
+ tx_queue->tso_fallbacks++;
+ if (rc == 0)
+ return 0;
+ }
+ if (rc)
+ goto err;
+#ifdef EFX_USE_PIO
+ } else if (skb_len <= efx_piobuf_size && !xmit_more &&
+ efx_tx_may_pio(tx_queue)) {
+ /* Use PIO for short packets with an empty queue. */
+ if (efx_enqueue_skb_pio(tx_queue, skb))
+ goto err;
+ tx_queue->pio_packets++;
+ data_mapped = true;
+#endif
+ } else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) {
+ /* Pad short packets or coalesce short fragmented packets. */
+ if (efx_enqueue_skb_copy(tx_queue, skb))
+ goto err;
+ tx_queue->cb_packets++;
+ data_mapped = true;
+ }
+
+ /* Map for DMA and create descriptors if we haven't done so already. */
+ if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments)))
+ goto err;
+
+ efx_tx_maybe_stop_queue(tx_queue);
+
+ tx_queue->xmit_pending = true;
+
+ /* Pass off to hardware */
+ if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more))
+ efx_tx_send_pending(tx_queue->channel);
+
+ if (segments) {
+ tx_queue->tso_bursts++;
+ tx_queue->tso_packets += segments;
+ tx_queue->tx_packets += segments;
+ } else {
+ tx_queue->tx_packets++;
+ }
+
+ return NETDEV_TX_OK;
+
+
+err:
+ efx_enqueue_unwind(tx_queue, old_insert_count);
+ dev_kfree_skb_any(skb);
+
+ /* If we're not expecting another transmit and we had something to push
+ * on this queue or a partner queue then we need to push here to get the
+ * previous packets out.
+ */
+ if (!xmit_more)
+ efx_tx_send_pending(tx_queue->channel);
+
+ return NETDEV_TX_OK;
+}
+
+/* Transmit a packet from an XDP buffer
+ *
+ * Returns number of packets sent on success, error code otherwise.
+ * Runs in NAPI context, either in our poll (for XDP TX) or a different NIC
+ * (for XDP redirect).
+ */
+int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
+ bool flush)
+{
+ struct efx_tx_buffer *tx_buffer;
+ struct efx_tx_queue *tx_queue;
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ unsigned int len;
+ int space;
+ int cpu;
+ int i = 0;
+
+ if (unlikely(n && !xdpfs))
+ return -EINVAL;
+ if (unlikely(!n))
+ return 0;
+
+ cpu = raw_smp_processor_id();
+ if (unlikely(cpu >= efx->xdp_tx_queue_count))
+ return -EINVAL;
+
+ tx_queue = efx->xdp_tx_queues[cpu];
+ if (unlikely(!tx_queue))
+ return -EINVAL;
+
+ if (!tx_queue->initialised)
+ return -EINVAL;
+
+ if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED)
+ HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu);
+
+ /* If we're borrowing net stack queues we have to handle stop-restart
+ * or we might block the queue and it will be considered as frozen
+ */
+ if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) {
+ if (netif_tx_queue_stopped(tx_queue->core_txq))
+ goto unlock;
+ efx_tx_maybe_stop_queue(tx_queue);
+ }
+
+ /* Check for available space. We should never need multiple
+ * descriptors per frame.
+ */
+ space = efx->txq_entries +
+ tx_queue->read_count - tx_queue->insert_count;
+
+ for (i = 0; i < n; i++) {
+ xdpf = xdpfs[i];
+
+ if (i >= space)
+ break;
+
+ /* We'll want a descriptor for this tx. */
+ prefetchw(__efx_tx_queue_get_insert_buffer(tx_queue));
+
+ len = xdpf->len;
+
+ /* Map for DMA. */
+ dma_addr = dma_map_single(&efx->pci_dev->dev,
+ xdpf->data, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&efx->pci_dev->dev, dma_addr))
+ break;
+
+ /* Create descriptor and set up for unmapping DMA. */
+ tx_buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+ tx_buffer->xdpf = xdpf;
+ tx_buffer->flags = EFX_TX_BUF_XDP |
+ EFX_TX_BUF_MAP_SINGLE;
+ tx_buffer->dma_offset = 0;
+ tx_buffer->unmap_len = len;
+ tx_queue->tx_packets++;
+ }
+
+ /* Pass mapped frames to hardware. */
+ if (flush && i > 0)
+ efx_nic_push_buffers(tx_queue);
+
+unlock:
+ if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED)
+ HARD_TX_UNLOCK(efx->net_dev, tx_queue->core_txq);
+
+ return i == 0 ? -EIO : i;
+}
+
+/* Initiate a packet transmission. We use one channel per CPU
+ * (sharing when we have more CPUs than channels).
+ *
+ * Context: non-blocking.
+ * Should always return NETDEV_TX_OK and consume the skb.
+ */
+netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
+ struct net_device *net_dev)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ struct efx_tx_queue *tx_queue;
+ unsigned index, type;
+
+ EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
+
+ index = skb_get_queue_mapping(skb);
+ type = efx_tx_csum_type_skb(skb);
+ if (index >= efx->n_tx_channels) {
+ index -= efx->n_tx_channels;
+ type |= EFX_TXQ_TYPE_HIGHPRI;
+ }
+
+ /* PTP "event" packet */
+ if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
+ ((efx_ptp_use_mac_tx_timestamps(efx) && efx->ptp_data) ||
+ unlikely(efx_ptp_is_ptp_tx(efx, skb)))) {
+ /* There may be existing transmits on the channel that are
+ * waiting for this packet to trigger the doorbell write.
+ * We need to send the packets at this point.
+ */
+ efx_tx_send_pending(efx_get_tx_channel(efx, index));
+ return efx_ptp_tx(efx, skb);
+ }
+
+ tx_queue = efx_get_tx_queue(efx, index, type);
+ if (WARN_ON_ONCE(!tx_queue)) {
+ /* We don't have a TXQ of the right type.
+ * This should never happen, as we don't advertise offload
+ * features unless we can support them.
+ */
+ dev_kfree_skb_any(skb);
+ /* If we're not expecting another transmit and we had something to push
+ * on this queue or a partner queue then we need to push here to get the
+ * previous packets out.
+ */
+ if (!netdev_xmit_more())
+ efx_tx_send_pending(tx_queue->channel);
+ return NETDEV_TX_OK;
+ }
+
+ return __efx_enqueue_skb(tx_queue, skb);
+}
+
+void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
+{
+ unsigned int pkts_compl = 0, bytes_compl = 0;
+ unsigned int read_ptr;
+ bool finished = false;
+
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+ while (!finished) {
+ struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+ if (!efx_tx_buffer_in_use(buffer)) {
+ struct efx_nic *efx = tx_queue->efx;
+
+ netif_err(efx, hw, efx->net_dev,
+ "TX queue %d spurious single TX completion\n",
+ tx_queue->queue);
+ efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+ return;
+ }
+
+ /* Need to check the flag before dequeueing. */
+ if (buffer->flags & EFX_TX_BUF_SKB)
+ finished = true;
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+ ++tx_queue->read_count;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+ }
+
+ tx_queue->pkts_compl += pkts_compl;
+ tx_queue->bytes_compl += bytes_compl;
+
+ EFX_WARN_ON_PARANOID(pkts_compl != 1);
+
+ efx_xmit_done_check_empty(tx_queue);
+}
+
+void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+
+ /* Must be inverse of queue lookup in efx_hard_start_xmit() */
+ tx_queue->core_txq =
+ netdev_get_tx_queue(efx->net_dev,
+ tx_queue->channel->channel +
+ ((tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ?
+ efx->n_tx_channels : 0));
+}
+
+int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ struct tc_mqprio_qopt *mqprio = type_data;
+ unsigned tc, num_tc;
+
+ if (type != TC_SETUP_QDISC_MQPRIO)
+ return -EOPNOTSUPP;
+
+ /* Only Siena supported highpri queues */
+ if (efx_nic_rev(efx) > EFX_REV_SIENA_A0)
+ return -EOPNOTSUPP;
+
+ num_tc = mqprio->num_tc;
+
+ if (num_tc > EFX_MAX_TX_TC)
+ return -EINVAL;
+
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+ if (num_tc == net_dev->num_tc)
+ return 0;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels;
+ net_dev->tc_to_txq[tc].count = efx->n_tx_channels;
+ }
+
+ net_dev->num_tc = num_tc;
+
+ return netif_set_real_num_tx_queues(net_dev,
+ max_t(int, num_tc, 1) *
+ efx->n_tx_channels);
+}
diff --git a/drivers/net/ethernet/sfc/siena/tx.h b/drivers/net/ethernet/sfc/siena/tx.h
new file mode 100644
index 000000000000..f2c4d2f89919
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/tx.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2015 Solarflare Communications Inc.
+ */
+
+#ifndef EFX_TX_H
+#define EFX_TX_H
+
+#include <linux/types.h>
+
+/* Driver internal tx-path related declarations. */
+
+unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, unsigned int len);
+
+u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer, size_t len);
+
+/* What TXQ type will satisfy the checksum offloads required for this skb? */
+static inline unsigned int efx_tx_csum_type_skb(struct sk_buff *skb)
+{
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0; /* no checksum offload */
+
+ if (skb->encapsulation &&
+ skb_checksum_start_offset(skb) == skb_inner_transport_offset(skb)) {
+ /* we only advertise features for IPv4 and IPv6 checksums on
+ * encapsulated packets, so if the checksum is for the inner
+ * packet, it must be one of them; no further checking required.
+ */
+
+ /* Do we also need to offload the outer header checksum? */
+ if (skb_shinfo(skb)->gso_segs > 1 &&
+ !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+ return EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM;
+ return EFX_TXQ_TYPE_INNER_CSUM;
+ }
+
+ /* similarly, we only advertise features for IPv4 and IPv6 checksums,
+ * so it must be one of them. No need for further checks.
+ */
+ return EFX_TXQ_TYPE_OUTER_CSUM;
+}
+#endif /* EFX_TX_H */
diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c
new file mode 100644
index 000000000000..9bc8281b7f5b
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/tx_common.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "efx.h"
+#include "nic_common.h"
+#include "tx_common.h"
+
+static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
+{
+ return DIV_ROUND_UP(tx_queue->ptr_mask + 1,
+ PAGE_SIZE >> EFX_TX_CB_ORDER);
+}
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int entries;
+ int rc;
+
+ /* Create the smallest power-of-two aligned ring */
+ entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+ tx_queue->ptr_mask = entries - 1;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "creating TX queue %d size %#x mask %#x\n",
+ tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
+
+ /* Allocate software ring */
+ tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
+ GFP_KERNEL);
+ if (!tx_queue->buffer)
+ return -ENOMEM;
+
+ tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
+ sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+ if (!tx_queue->cb_page) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+
+ /* Allocate hardware ring, determine TXQ type */
+ rc = efx_nic_probe_tx(tx_queue);
+ if (rc)
+ goto fail2;
+
+ tx_queue->channel->tx_queue_by_type[tx_queue->type] = tx_queue;
+ return 0;
+
+fail2:
+ kfree(tx_queue->cb_page);
+ tx_queue->cb_page = NULL;
+fail1:
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+ return rc;
+}
+
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "initialising TX queue %d\n", tx_queue->queue);
+
+ tx_queue->insert_count = 0;
+ tx_queue->notify_count = 0;
+ tx_queue->write_count = 0;
+ tx_queue->packet_write_count = 0;
+ tx_queue->old_write_count = 0;
+ tx_queue->read_count = 0;
+ tx_queue->old_read_count = 0;
+ tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
+ tx_queue->xmit_pending = false;
+ tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
+ tx_queue->channel == efx_ptp_channel(efx));
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+
+ tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
+ tx_queue->tso_version = 0;
+
+ /* Set up TX descriptor ring */
+ efx_nic_init_tx(tx_queue);
+
+ tx_queue->initialised = true;
+}
+
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_tx_buffer *buffer;
+
+ netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+ "shutting down TX queue %d\n", tx_queue->queue);
+
+ tx_queue->initialised = false;
+
+ if (!tx_queue->buffer)
+ return;
+
+ /* Free any buffers left in the ring */
+ while (tx_queue->read_count != tx_queue->write_count) {
+ unsigned int pkts_compl = 0, bytes_compl = 0;
+
+ buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+ ++tx_queue->read_count;
+ }
+ tx_queue->xmit_pending = false;
+ netdev_tx_reset_queue(tx_queue->core_txq);
+}
+
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ int i;
+
+ if (!tx_queue->buffer)
+ return;
+
+ netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+ "destroying TX queue %d\n", tx_queue->queue);
+ efx_nic_remove_tx(tx_queue);
+
+ if (tx_queue->cb_page) {
+ for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
+ efx_nic_free_buffer(tx_queue->efx,
+ &tx_queue->cb_page[i]);
+ kfree(tx_queue->cb_page);
+ tx_queue->cb_page = NULL;
+ }
+
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+ tx_queue->channel->tx_queue_by_type[tx_queue->type] = NULL;
+}
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
+{
+ if (buffer->unmap_len) {
+ struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
+ dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
+
+ if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
+ dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
+ DMA_TO_DEVICE);
+ else
+ dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
+ DMA_TO_DEVICE);
+ buffer->unmap_len = 0;
+ }
+
+ if (buffer->flags & EFX_TX_BUF_SKB) {
+ struct sk_buff *skb = (struct sk_buff *)buffer->skb;
+
+ EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
+ (*pkts_compl)++;
+ (*bytes_compl) += skb->len;
+ if (tx_queue->timestamping &&
+ (tx_queue->completed_timestamp_major ||
+ tx_queue->completed_timestamp_minor)) {
+ struct skb_shared_hwtstamps hwtstamp;
+
+ hwtstamp.hwtstamp =
+ efx_ptp_nic_to_kernel_time(tx_queue);
+ skb_tstamp_tx(skb, &hwtstamp);
+
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+ }
+ dev_consume_skb_any((struct sk_buff *)buffer->skb);
+ netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
+ "TX queue %d transmission id %x complete\n",
+ tx_queue->queue, tx_queue->read_count);
+ } else if (buffer->flags & EFX_TX_BUF_XDP) {
+ xdp_return_frame_rx_napi(buffer->xdpf);
+ }
+
+ buffer->len = 0;
+ buffer->flags = 0;
+}
+
+/* Remove packets from the TX queue
+ *
+ * This removes packets from the TX queue, up to and including the
+ * specified index.
+ */
+static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
+ unsigned int index,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int stop_index, read_ptr;
+
+ stop_index = (index + 1) & tx_queue->ptr_mask;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+ while (read_ptr != stop_index) {
+ struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+ if (!efx_tx_buffer_in_use(buffer)) {
+ netif_err(efx, tx_err, efx->net_dev,
+ "TX queue %d spurious TX completion id %d\n",
+ tx_queue->queue, read_ptr);
+ efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+ return;
+ }
+
+ efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+
+ ++tx_queue->read_count;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+ }
+}
+
+void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
+{
+ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+ tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
+ if (tx_queue->read_count == tx_queue->old_write_count) {
+ /* Ensure that read_count is flushed. */
+ smp_mb();
+ tx_queue->empty_read_count =
+ tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
+ }
+ }
+}
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
+{
+ unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
+ struct efx_nic *efx = tx_queue->efx;
+
+ EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
+
+ efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+ tx_queue->pkts_compl += pkts_compl;
+ tx_queue->bytes_compl += bytes_compl;
+
+ if (pkts_compl > 1)
+ ++tx_queue->merge_events;
+
+ /* See if we need to restart the netif queue. This memory
+ * barrier ensures that we write read_count (inside
+ * efx_dequeue_buffers()) before reading the queue status.
+ */
+ smp_mb();
+ if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
+ likely(efx->port_enabled) &&
+ likely(netif_device_present(efx->net_dev))) {
+ fill_level = efx_channel_tx_fill_level(tx_queue->channel);
+ if (fill_level <= efx->txq_wake_thresh)
+ netif_tx_wake_queue(tx_queue->core_txq);
+ }
+
+ efx_xmit_done_check_empty(tx_queue);
+}
+
+/* Remove buffers put into a tx_queue for the current packet.
+ * None of the buffers must have an skb attached.
+ */
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+ unsigned int insert_count)
+{
+ struct efx_tx_buffer *buffer;
+ unsigned int bytes_compl = 0;
+ unsigned int pkts_compl = 0;
+
+ /* Work backwards until we hit the original insert pointer value */
+ while (tx_queue->insert_count != insert_count) {
+ --tx_queue->insert_count;
+ buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+ }
+}
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, size_t len)
+{
+ const struct efx_nic_type *nic_type = tx_queue->efx->type;
+ struct efx_tx_buffer *buffer;
+ unsigned int dma_len;
+
+ /* Map the fragment taking account of NIC-dependent DMA limits. */
+ do {
+ buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+ if (nic_type->tx_limit_len)
+ dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+ else
+ dma_len = len;
+
+ buffer->len = dma_len;
+ buffer->dma_addr = dma_addr;
+ buffer->flags = EFX_TX_BUF_CONT;
+ len -= dma_len;
+ dma_addr += dma_len;
+ ++tx_queue->insert_count;
+ } while (len);
+
+ return buffer;
+}
+
+int efx_tx_tso_header_length(struct sk_buff *skb)
+{
+ size_t header_len;
+
+ if (skb->encapsulation)
+ header_len = skb_inner_transport_header(skb) -
+ skb->data +
+ (inner_tcp_hdr(skb)->doff << 2u);
+ else
+ header_len = skb_transport_header(skb) - skb->data +
+ (tcp_hdr(skb)->doff << 2u);
+ return header_len;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue. */
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ unsigned int segment_count)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct device *dma_dev = &efx->pci_dev->dev;
+ unsigned int frag_index, nr_frags;
+ dma_addr_t dma_addr, unmap_addr;
+ unsigned short dma_flags;
+ size_t len, unmap_len;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ frag_index = 0;
+
+ /* Map header data. */
+ len = skb_headlen(skb);
+ dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+ dma_flags = EFX_TX_BUF_MAP_SINGLE;
+ unmap_len = len;
+ unmap_addr = dma_addr;
+
+ if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+ return -EIO;
+
+ if (segment_count) {
+ /* For TSO we need to put the header in to a separate
+ * descriptor. Map this separately if necessary.
+ */
+ size_t header_len = efx_tx_tso_header_length(skb);
+
+ if (header_len != len) {
+ tx_queue->tso_long_headers++;
+ efx_tx_map_chunk(tx_queue, dma_addr, header_len);
+ len -= header_len;
+ dma_addr += header_len;
+ }
+ }
+
+ /* Add descriptors for each fragment. */
+ do {
+ struct efx_tx_buffer *buffer;
+ skb_frag_t *fragment;
+
+ buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+
+ /* The final descriptor for a fragment is responsible for
+ * unmapping the whole fragment.
+ */
+ buffer->flags = EFX_TX_BUF_CONT | dma_flags;
+ buffer->unmap_len = unmap_len;
+ buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+ if (frag_index >= nr_frags) {
+ /* Store SKB details with the final buffer for
+ * the completion.
+ */
+ buffer->skb = skb;
+ buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+ return 0;
+ }
+
+ /* Move on to the next fragment. */
+ fragment = &skb_shinfo(skb)->frags[frag_index++];
+ len = skb_frag_size(fragment);
+ dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
+ DMA_TO_DEVICE);
+ dma_flags = 0;
+ unmap_len = len;
+ unmap_addr = dma_addr;
+
+ if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+ return -EIO;
+ } while (1);
+}
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
+{
+ /* Header and payload descriptor for each output segment, plus
+ * one for every input fragment boundary within a segment
+ */
+ unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
+
+ /* Possibly one more per segment for option descriptors */
+ if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+ max_descs += EFX_TSO_MAX_SEGS;
+
+ /* Possibly more for PCIe page boundaries within input fragments */
+ if (PAGE_SIZE > EFX_PAGE_SIZE)
+ max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
+ DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
+
+ return max_descs;
+}
+
+/*
+ * Fallback to software TSO.
+ *
+ * This is used if we are unable to send a GSO packet through hardware TSO.
+ * This should only ever happen due to per-queue restrictions - unsupported
+ * packets should first be filtered by the feature flags.
+ *
+ * Returns 0 on success, error code otherwise.
+ */
+int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+{
+ struct sk_buff *segments, *next;
+
+ segments = skb_gso_segment(skb, 0);
+ if (IS_ERR(segments))
+ return PTR_ERR(segments);
+
+ dev_consume_skb_any(skb);
+
+ skb_list_walk_safe(segments, skb, next) {
+ skb_mark_not_on_list(skb);
+ efx_enqueue_skb(tx_queue, skb);
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/sfc/siena/tx_common.h b/drivers/net/ethernet/sfc/siena/tx_common.h
new file mode 100644
index 000000000000..bbab7f248250
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/tx_common.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TX_COMMON_H
+#define EFX_TX_COMMON_H
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl);
+
+static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer)
+{
+ return buffer->len || (buffer->flags & EFX_TX_BUF_OPTION);
+}
+
+void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue);
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+ unsigned int insert_count);
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, size_t len);
+int efx_tx_tso_header_length(struct sk_buff *skb);
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ unsigned int segment_count);
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
+int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
+
+extern bool efx_separate_tx_channels;
+#endif
diff --git a/drivers/net/ethernet/sfc/siena/vfdi.h b/drivers/net/ethernet/sfc/siena/vfdi.h
new file mode 100644
index 000000000000..480b872eb4d1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/vfdi.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2010-2012 Solarflare Communications Inc.
+ */
+#ifndef _VFDI_H
+#define _VFDI_H
+
+/**
+ * DOC: Virtual Function Driver Interface
+ *
+ * This file contains software structures used to form a two way
+ * communication channel between the VF driver and the PF driver,
+ * named Virtual Function Driver Interface (VFDI).
+ *
+ * For the purposes of VFDI, a page is a memory region with size and
+ * alignment of 4K. All addresses are DMA addresses to be used within
+ * the domain of the relevant VF.
+ *
+ * The only hardware-defined channels for a VF driver to communicate
+ * with the PF driver are the event mailboxes (%FR_CZ_USR_EV
+ * registers). Writing to these registers generates an event with
+ * EV_CODE = EV_CODE_USR_EV, USER_QID set to the index of the mailbox
+ * and USER_EV_REG_VALUE set to the value written. The PF driver may
+ * direct or disable delivery of these events by setting
+ * %FR_CZ_USR_EV_CFG.
+ *
+ * The PF driver can send arbitrary events to arbitrary event queues.
+ * However, for consistency, VFDI events from the PF are defined to
+ * follow the same form and be sent to the first event queue assigned
+ * to the VF while that queue is enabled by the VF driver.
+ *
+ * The general form of the variable bits of VFDI events is:
+ *
+ * 0 16 24 31
+ * | DATA | TYPE | SEQ |
+ *
+ * SEQ is a sequence number which should be incremented by 1 (modulo
+ * 256) for each event. The sequence numbers used in each direction
+ * are independent.
+ *
+ * The VF submits requests of type &struct vfdi_req by sending the
+ * address of the request (ADDR) in a series of 4 events:
+ *
+ * 0 16 24 31
+ * | ADDR[0:15] | VFDI_EV_TYPE_REQ_WORD0 | SEQ |
+ * | ADDR[16:31] | VFDI_EV_TYPE_REQ_WORD1 | SEQ+1 |
+ * | ADDR[32:47] | VFDI_EV_TYPE_REQ_WORD2 | SEQ+2 |
+ * | ADDR[48:63] | VFDI_EV_TYPE_REQ_WORD3 | SEQ+3 |
+ *
+ * The address must be page-aligned. After receiving such a valid
+ * series of events, the PF driver will attempt to read the request
+ * and write a response to the same address. In case of an invalid
+ * sequence of events or a DMA error, there will be no response.
+ *
+ * The VF driver may request that the PF driver writes status
+ * information into its domain asynchronously. After writing the
+ * status, the PF driver will send an event of the form:
+ *
+ * 0 16 24 31
+ * | reserved | VFDI_EV_TYPE_STATUS | SEQ |
+ *
+ * In case the VF must be reset for any reason, the PF driver will
+ * send an event of the form:
+ *
+ * 0 16 24 31
+ * | reserved | VFDI_EV_TYPE_RESET | SEQ |
+ *
+ * It is then the responsibility of the VF driver to request
+ * reinitialisation of its queues.
+ */
+#define VFDI_EV_SEQ_LBN 24
+#define VFDI_EV_SEQ_WIDTH 8
+#define VFDI_EV_TYPE_LBN 16
+#define VFDI_EV_TYPE_WIDTH 8
+#define VFDI_EV_TYPE_REQ_WORD0 0
+#define VFDI_EV_TYPE_REQ_WORD1 1
+#define VFDI_EV_TYPE_REQ_WORD2 2
+#define VFDI_EV_TYPE_REQ_WORD3 3
+#define VFDI_EV_TYPE_STATUS 4
+#define VFDI_EV_TYPE_RESET 5
+#define VFDI_EV_DATA_LBN 0
+#define VFDI_EV_DATA_WIDTH 16
+
+struct vfdi_endpoint {
+ u8 mac_addr[ETH_ALEN];
+ __be16 tci;
+};
+
+/**
+ * enum vfdi_op - VFDI operation enumeration
+ * @VFDI_OP_RESPONSE: Indicates a response to the request.
+ * @VFDI_OP_INIT_EVQ: Initialize SRAM entries and initialize an EVQ.
+ * @VFDI_OP_INIT_RXQ: Initialize SRAM entries and initialize an RXQ.
+ * @VFDI_OP_INIT_TXQ: Initialize SRAM entries and initialize a TXQ.
+ * @VFDI_OP_FINI_ALL_QUEUES: Flush all queues, finalize all queues, then
+ * finalize the SRAM entries.
+ * @VFDI_OP_INSERT_FILTER: Insert a MAC filter targeting the given RXQ.
+ * @VFDI_OP_REMOVE_ALL_FILTERS: Remove all filters.
+ * @VFDI_OP_SET_STATUS_PAGE: Set the DMA page(s) used for status updates
+ * from PF and write the initial status.
+ * @VFDI_OP_CLEAR_STATUS_PAGE: Clear the DMA page(s) used for status
+ * updates from PF.
+ */
+enum vfdi_op {
+ VFDI_OP_RESPONSE = 0,
+ VFDI_OP_INIT_EVQ = 1,
+ VFDI_OP_INIT_RXQ = 2,
+ VFDI_OP_INIT_TXQ = 3,
+ VFDI_OP_FINI_ALL_QUEUES = 4,
+ VFDI_OP_INSERT_FILTER = 5,
+ VFDI_OP_REMOVE_ALL_FILTERS = 6,
+ VFDI_OP_SET_STATUS_PAGE = 7,
+ VFDI_OP_CLEAR_STATUS_PAGE = 8,
+ VFDI_OP_LIMIT,
+};
+
+/* Response codes for VFDI operations. Other values may be used in future. */
+#define VFDI_RC_SUCCESS 0
+#define VFDI_RC_ENOMEM (-12)
+#define VFDI_RC_EINVAL (-22)
+#define VFDI_RC_EOPNOTSUPP (-95)
+#define VFDI_RC_ETIMEDOUT (-110)
+
+/**
+ * struct vfdi_req - Request from VF driver to PF driver
+ * @op: Operation code or response indicator, taken from &enum vfdi_op.
+ * @rc: Response code. Set to 0 on success or a negative error code on failure.
+ * @u.init_evq.index: Index of event queue to create.
+ * @u.init_evq.buf_count: Number of 4k buffers backing event queue.
+ * @u.init_evq.addr: Array of length %u.init_evq.buf_count containing DMA
+ * address of each page backing the event queue.
+ * @u.init_rxq.index: Index of receive queue to create.
+ * @u.init_rxq.buf_count: Number of 4k buffers backing receive queue.
+ * @u.init_rxq.evq: Instance of event queue to target receive events at.
+ * @u.init_rxq.label: Label used in receive events.
+ * @u.init_rxq.flags: Unused.
+ * @u.init_rxq.addr: Array of length %u.init_rxq.buf_count containing DMA
+ * address of each page backing the receive queue.
+ * @u.init_txq.index: Index of transmit queue to create.
+ * @u.init_txq.buf_count: Number of 4k buffers backing transmit queue.
+ * @u.init_txq.evq: Instance of event queue to target transmit completion
+ * events at.
+ * @u.init_txq.label: Label used in transmit completion events.
+ * @u.init_txq.flags: Checksum offload flags.
+ * @u.init_txq.addr: Array of length %u.init_txq.buf_count containing DMA
+ * address of each page backing the transmit queue.
+ * @u.mac_filter.rxq: Insert MAC filter at VF local address/VLAN targeting
+ * all traffic at this receive queue.
+ * @u.mac_filter.flags: MAC filter flags.
+ * @u.set_status_page.dma_addr: Base address for the &struct vfdi_status.
+ * This address must be page-aligned and the PF may write up to a
+ * whole page (allowing for extension of the structure).
+ * @u.set_status_page.peer_page_count: Number of additional pages the VF
+ * has provided into which peer addresses may be DMAd.
+ * @u.set_status_page.peer_page_addr: Array of DMA addresses of pages.
+ * If the number of peers exceeds 256, then the VF must provide
+ * additional pages in this array. The PF will then DMA up to
+ * 512 vfdi_endpoint structures into each page. These addresses
+ * must be page-aligned.
+ */
+struct vfdi_req {
+ u32 op;
+ u32 reserved1;
+ s32 rc;
+ u32 reserved2;
+ union {
+ struct {
+ u32 index;
+ u32 buf_count;
+ u64 addr[];
+ } init_evq;
+ struct {
+ u32 index;
+ u32 buf_count;
+ u32 evq;
+ u32 label;
+ u32 flags;
+#define VFDI_RXQ_FLAG_SCATTER_EN 1
+ u32 reserved;
+ u64 addr[];
+ } init_rxq;
+ struct {
+ u32 index;
+ u32 buf_count;
+ u32 evq;
+ u32 label;
+ u32 flags;
+#define VFDI_TXQ_FLAG_IP_CSUM_DIS 1
+#define VFDI_TXQ_FLAG_TCPUDP_CSUM_DIS 2
+ u32 reserved;
+ u64 addr[];
+ } init_txq;
+ struct {
+ u32 rxq;
+ u32 flags;
+#define VFDI_MAC_FILTER_FLAG_RSS 1
+#define VFDI_MAC_FILTER_FLAG_SCATTER 2
+ } mac_filter;
+ struct {
+ u64 dma_addr;
+ u64 peer_page_count;
+ u64 peer_page_addr[];
+ } set_status_page;
+ } u;
+};
+
+/**
+ * struct vfdi_status - Status provided by PF driver to VF driver
+ * @generation_start: A generation count DMA'd to VF *before* the
+ * rest of the structure.
+ * @generation_end: A generation count DMA'd to VF *after* the
+ * rest of the structure.
+ * @version: Version of this structure; currently set to 1. Later
+ * versions must either be layout-compatible or only be sent to VFs
+ * that specifically request them.
+ * @length: Total length of this structure including embedded tables
+ * @vi_scale: log2 the number of VIs available on this VF. This quantity
+ * is used by the hardware for register decoding.
+ * @max_tx_channels: The maximum number of transmit queues the VF can use.
+ * @rss_rxq_count: The number of receive queues present in the shared RSS
+ * indirection table.
+ * @peer_count: Total number of peers in the complete peer list. If larger
+ * than ARRAY_SIZE(%peers), then the VF must provide sufficient
+ * additional pages each of which is filled with vfdi_endpoint structures.
+ * @local: The MAC address and outer VLAN tag of *this* VF
+ * @peers: Table of peer addresses. The @tci fields in these structures
+ * are currently unused and must be ignored. Additional peers are
+ * written into any additional pages provided by the VF.
+ * @timer_quantum_ns: Timer quantum (nominal period between timer ticks)
+ * for interrupt moderation timers, in nanoseconds. This member is only
+ * present if @length is sufficiently large.
+ */
+struct vfdi_status {
+ u32 generation_start;
+ u32 generation_end;
+ u32 version;
+ u32 length;
+ u8 vi_scale;
+ u8 max_tx_channels;
+ u8 rss_rxq_count;
+ u8 reserved1;
+ u16 peer_count;
+ u16 reserved2;
+ struct vfdi_endpoint local;
+ struct vfdi_endpoint peers[256];
+
+ /* Members below here extend version 1 of this structure */
+ u32 timer_quantum_ns;
+};
+
+#endif
diff --git a/drivers/net/ethernet/sfc/siena/workarounds.h b/drivers/net/ethernet/sfc/siena/workarounds.h
new file mode 100644
index 000000000000..815be2d20c4b
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/workarounds.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ */
+
+#ifndef EFX_WORKAROUNDS_H
+#define EFX_WORKAROUNDS_H
+
+/*
+ * Hardware workarounds.
+ * Bug numbers are from Solarflare's Bugzilla.
+ */
+
+#define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
+#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+#define EFX_WORKAROUND_10G(efx) 1
+
+/* Bit-bashed I2C reads cause performance drop */
+#define EFX_WORKAROUND_7884 EFX_WORKAROUND_10G
+/* Legacy interrupt storm when interrupt fifo fills */
+#define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA
+
+/* Lockup when writing event block registers at gen2/gen3 */
+#define EFX_EF10_WORKAROUND_35388(efx) \
+ (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_35388)
+#define EFX_WORKAROUND_35388(efx) \
+ (efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx))
+
+/* Moderation timer access must go through MCDI */
+#define EFX_EF10_WORKAROUND_61265(efx) \
+ (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_61265)
+
+#endif /* EFX_WORKAROUNDS_H */