diff options
author | Martin Habets <martinh@xilinx.com> | 2022-05-09 16:31:43 +0100 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-05-10 15:38:14 -0700 |
commit | d48523cb88e0703055c1b33e61eb644a7976f92b (patch) | |
tree | 2c7a176c21ecf2d6305bd1ef8581475c0ea7c228 /drivers/net/ethernet/sfc/siena | |
parent | 6e173d3b4af9e8804ebdbdb7a4afd7ed8f96220b (diff) |
sfc: Copy shared files needed for Siena (part 2)
These are the files starting with m through w.
No changes are done, those will be done with subsequent commits.
Signed-off-by: Martin Habets <habetsm.xilinx@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/sfc/siena')
27 files changed, 14153 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/siena/mcdi.c b/drivers/net/ethernet/sfc/siena/mcdi.c new file mode 100644 index 000000000000..50baf62b2cbc --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mcdi.c @@ -0,0 +1,2375 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2008-2013 Solarflare Communications Inc. + */ + +#include <linux/delay.h> +#include <linux/moduleparam.h> +#include <linux/atomic.h> +#include "net_driver.h" +#include "nic.h" +#include "io.h" +#include "farch_regs.h" +#include "mcdi_pcol.h" + +/************************************************************************** + * + * Management-Controller-to-Driver Interface + * + ************************************************************************** + */ + +#define MCDI_RPC_TIMEOUT (10 * HZ) + +/* A reboot/assertion causes the MCDI status word to be set after the + * command word is set or a REBOOT event is sent. If we notice a reboot + * via these mechanisms then wait 250ms for the status word to be set. + */ +#define MCDI_STATUS_DELAY_US 100 +#define MCDI_STATUS_DELAY_COUNT 2500 +#define MCDI_STATUS_SLEEP_MS \ + (MCDI_STATUS_DELAY_US * MCDI_STATUS_DELAY_COUNT / 1000) + +#define SEQ_MASK \ + EFX_MASK32(EFX_WIDTH(MCDI_HEADER_SEQ)) + +struct efx_mcdi_async_param { + struct list_head list; + unsigned int cmd; + size_t inlen; + size_t outlen; + bool quiet; + efx_mcdi_async_completer *complete; + unsigned long cookie; + /* followed by request/response buffer */ +}; + +static void efx_mcdi_timeout_async(struct timer_list *t); +static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, + bool *was_attached_out); +static bool efx_mcdi_poll_once(struct efx_nic *efx); +static void efx_mcdi_abandon(struct efx_nic *efx); + +#ifdef CONFIG_SFC_MCDI_LOGGING +static bool mcdi_logging_default; +module_param(mcdi_logging_default, bool, 0644); +MODULE_PARM_DESC(mcdi_logging_default, + "Enable MCDI logging on newly-probed functions"); +#endif + +int efx_mcdi_init(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi; + bool already_attached; + int rc = -ENOMEM; + + efx->mcdi = kzalloc(sizeof(*efx->mcdi), GFP_KERNEL); + if (!efx->mcdi) + goto fail; + + mcdi = efx_mcdi(efx); + mcdi->efx = efx; +#ifdef CONFIG_SFC_MCDI_LOGGING + /* consuming code assumes buffer is page-sized */ + mcdi->logging_buffer = (char *)__get_free_page(GFP_KERNEL); + if (!mcdi->logging_buffer) + goto fail1; + mcdi->logging_enabled = mcdi_logging_default; +#endif + init_waitqueue_head(&mcdi->wq); + init_waitqueue_head(&mcdi->proxy_rx_wq); + spin_lock_init(&mcdi->iface_lock); + mcdi->state = MCDI_STATE_QUIESCENT; + mcdi->mode = MCDI_MODE_POLL; + spin_lock_init(&mcdi->async_lock); + INIT_LIST_HEAD(&mcdi->async_list); + timer_setup(&mcdi->async_timer, efx_mcdi_timeout_async, 0); + + (void) efx_mcdi_poll_reboot(efx); + mcdi->new_epoch = true; + + /* Recover from a failed assertion before probing */ + rc = efx_mcdi_handle_assertion(efx); + if (rc) + goto fail2; + + /* Let the MC (and BMC, if this is a LOM) know that the driver + * is loaded. We should do this before we reset the NIC. + */ + rc = efx_mcdi_drv_attach(efx, true, &already_attached); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "Unable to register driver with MCPU\n"); + goto fail2; + } + if (already_attached) + /* Not a fatal error */ + netif_err(efx, probe, efx->net_dev, + "Host already registered with MCPU\n"); + + if (efx->mcdi->fn_flags & + (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_PRIMARY)) + efx->primary = efx; + + return 0; +fail2: +#ifdef CONFIG_SFC_MCDI_LOGGING + free_page((unsigned long)mcdi->logging_buffer); +fail1: +#endif + kfree(efx->mcdi); + efx->mcdi = NULL; +fail: + return rc; +} + +void efx_mcdi_detach(struct efx_nic *efx) +{ + if (!efx->mcdi) + return; + + BUG_ON(efx->mcdi->iface.state != MCDI_STATE_QUIESCENT); + + /* Relinquish the device (back to the BMC, if this is a LOM) */ + efx_mcdi_drv_attach(efx, false, NULL); +} + +void efx_mcdi_fini(struct efx_nic *efx) +{ + if (!efx->mcdi) + return; + +#ifdef CONFIG_SFC_MCDI_LOGGING + free_page((unsigned long)efx->mcdi->iface.logging_buffer); +#endif + + kfree(efx->mcdi); +} + +static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); +#ifdef CONFIG_SFC_MCDI_LOGGING + char *buf = mcdi->logging_buffer; /* page-sized */ +#endif + efx_dword_t hdr[2]; + size_t hdr_len; + u32 xflags, seqno; + + BUG_ON(mcdi->state == MCDI_STATE_QUIESCENT); + + /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */ + spin_lock_bh(&mcdi->iface_lock); + ++mcdi->seqno; + seqno = mcdi->seqno & SEQ_MASK; + spin_unlock_bh(&mcdi->iface_lock); + + xflags = 0; + if (mcdi->mode == MCDI_MODE_EVENTS) + xflags |= MCDI_HEADER_XFLAGS_EVREQ; + + if (efx->type->mcdi_max_ver == 1) { + /* MCDI v1 */ + EFX_POPULATE_DWORD_7(hdr[0], + MCDI_HEADER_RESPONSE, 0, + MCDI_HEADER_RESYNC, 1, + MCDI_HEADER_CODE, cmd, + MCDI_HEADER_DATALEN, inlen, + MCDI_HEADER_SEQ, seqno, + MCDI_HEADER_XFLAGS, xflags, + MCDI_HEADER_NOT_EPOCH, !mcdi->new_epoch); + hdr_len = 4; + } else { + /* MCDI v2 */ + BUG_ON(inlen > MCDI_CTL_SDU_LEN_MAX_V2); + EFX_POPULATE_DWORD_7(hdr[0], + MCDI_HEADER_RESPONSE, 0, + MCDI_HEADER_RESYNC, 1, + MCDI_HEADER_CODE, MC_CMD_V2_EXTN, + MCDI_HEADER_DATALEN, 0, + MCDI_HEADER_SEQ, seqno, + MCDI_HEADER_XFLAGS, xflags, + MCDI_HEADER_NOT_EPOCH, !mcdi->new_epoch); + EFX_POPULATE_DWORD_2(hdr[1], + MC_CMD_V2_EXTN_IN_EXTENDED_CMD, cmd, + MC_CMD_V2_EXTN_IN_ACTUAL_LEN, inlen); + hdr_len = 8; + } + +#ifdef CONFIG_SFC_MCDI_LOGGING + if (mcdi->logging_enabled && !WARN_ON_ONCE(!buf)) { + int bytes = 0; + int i; + /* Lengths should always be a whole number of dwords, so scream + * if they're not. + */ + WARN_ON_ONCE(hdr_len % 4); + WARN_ON_ONCE(inlen % 4); + + /* We own the logging buffer, as only one MCDI can be in + * progress on a NIC at any one time. So no need for locking. + */ + for (i = 0; i < hdr_len / 4 && bytes < PAGE_SIZE; i++) + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", + le32_to_cpu(hdr[i].u32[0])); + + for (i = 0; i < inlen / 4 && bytes < PAGE_SIZE; i++) + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", + le32_to_cpu(inbuf[i].u32[0])); + + netif_info(efx, hw, efx->net_dev, "MCDI RPC REQ:%s\n", buf); + } +#endif + + efx->type->mcdi_request(efx, hdr, hdr_len, inbuf, inlen); + + mcdi->new_epoch = false; +} + +static int efx_mcdi_errno(unsigned int mcdi_err) +{ + switch (mcdi_err) { + case 0: + return 0; +#define TRANSLATE_ERROR(name) \ + case MC_CMD_ERR_ ## name: \ + return -name; + TRANSLATE_ERROR(EPERM); + TRANSLATE_ERROR(ENOENT); + TRANSLATE_ERROR(EINTR); + TRANSLATE_ERROR(EAGAIN); + TRANSLATE_ERROR(EACCES); + TRANSLATE_ERROR(EBUSY); + TRANSLATE_ERROR(EINVAL); + TRANSLATE_ERROR(EDEADLK); + TRANSLATE_ERROR(ENOSYS); + TRANSLATE_ERROR(ETIME); + TRANSLATE_ERROR(EALREADY); + TRANSLATE_ERROR(ENOSPC); +#undef TRANSLATE_ERROR + case MC_CMD_ERR_ENOTSUP: + return -EOPNOTSUPP; + case MC_CMD_ERR_ALLOC_FAIL: + return -ENOBUFS; + case MC_CMD_ERR_MAC_EXIST: + return -EADDRINUSE; + default: + return -EPROTO; + } +} + +static void efx_mcdi_read_response_header(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + unsigned int respseq, respcmd, error; +#ifdef CONFIG_SFC_MCDI_LOGGING + char *buf = mcdi->logging_buffer; /* page-sized */ +#endif + efx_dword_t hdr; + + efx->type->mcdi_read_response(efx, &hdr, 0, 4); + respseq = EFX_DWORD_FIELD(hdr, MCDI_HEADER_SEQ); + respcmd = EFX_DWORD_FIELD(hdr, MCDI_HEADER_CODE); + error = EFX_DWORD_FIELD(hdr, MCDI_HEADER_ERROR); + + if (respcmd != MC_CMD_V2_EXTN) { + mcdi->resp_hdr_len = 4; + mcdi->resp_data_len = EFX_DWORD_FIELD(hdr, MCDI_HEADER_DATALEN); + } else { + efx->type->mcdi_read_response(efx, &hdr, 4, 4); + mcdi->resp_hdr_len = 8; + mcdi->resp_data_len = + EFX_DWORD_FIELD(hdr, MC_CMD_V2_EXTN_IN_ACTUAL_LEN); + } + +#ifdef CONFIG_SFC_MCDI_LOGGING + if (mcdi->logging_enabled && !WARN_ON_ONCE(!buf)) { + size_t hdr_len, data_len; + int bytes = 0; + int i; + + WARN_ON_ONCE(mcdi->resp_hdr_len % 4); + hdr_len = mcdi->resp_hdr_len / 4; + /* MCDI_DECLARE_BUF ensures that underlying buffer is padded + * to dword size, and the MCDI buffer is always dword size + */ + data_len = DIV_ROUND_UP(mcdi->resp_data_len, 4); + + /* We own the logging buffer, as only one MCDI can be in + * progress on a NIC at any one time. So no need for locking. + */ + for (i = 0; i < hdr_len && bytes < PAGE_SIZE; i++) { + efx->type->mcdi_read_response(efx, &hdr, (i * 4), 4); + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", le32_to_cpu(hdr.u32[0])); + } + + for (i = 0; i < data_len && bytes < PAGE_SIZE; i++) { + efx->type->mcdi_read_response(efx, &hdr, + mcdi->resp_hdr_len + (i * 4), 4); + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", le32_to_cpu(hdr.u32[0])); + } + + netif_info(efx, hw, efx->net_dev, "MCDI RPC RESP:%s\n", buf); + } +#endif + + mcdi->resprc_raw = 0; + if (error && mcdi->resp_data_len == 0) { + netif_err(efx, hw, efx->net_dev, "MC rebooted\n"); + mcdi->resprc = -EIO; + } else if ((respseq ^ mcdi->seqno) & SEQ_MASK) { + netif_err(efx, hw, efx->net_dev, + "MC response mismatch tx seq 0x%x rx seq 0x%x\n", + respseq, mcdi->seqno); + mcdi->resprc = -EIO; + } else if (error) { + efx->type->mcdi_read_response(efx, &hdr, mcdi->resp_hdr_len, 4); + mcdi->resprc_raw = EFX_DWORD_FIELD(hdr, EFX_DWORD_0); + mcdi->resprc = efx_mcdi_errno(mcdi->resprc_raw); + } else { + mcdi->resprc = 0; + } +} + +static bool efx_mcdi_poll_once(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + rmb(); + if (!efx->type->mcdi_poll_response(efx)) + return false; + + spin_lock_bh(&mcdi->iface_lock); + efx_mcdi_read_response_header(efx); + spin_unlock_bh(&mcdi->iface_lock); + + return true; +} + +static int efx_mcdi_poll(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + unsigned long time, finish; + unsigned int spins; + int rc; + + /* Check for a reboot atomically with respect to efx_mcdi_copyout() */ + rc = efx_mcdi_poll_reboot(efx); + if (rc) { + spin_lock_bh(&mcdi->iface_lock); + mcdi->resprc = rc; + mcdi->resp_hdr_len = 0; + mcdi->resp_data_len = 0; + spin_unlock_bh(&mcdi->iface_lock); + return 0; + } + + /* Poll for completion. Poll quickly (once a us) for the 1st jiffy, + * because generally mcdi responses are fast. After that, back off + * and poll once a jiffy (approximately) + */ + spins = USER_TICK_USEC; + finish = jiffies + MCDI_RPC_TIMEOUT; + + while (1) { + if (spins != 0) { + --spins; + udelay(1); + } else { + schedule_timeout_uninterruptible(1); + } + + time = jiffies; + + if (efx_mcdi_poll_once(efx)) + break; + + if (time_after(time, finish)) + return -ETIMEDOUT; + } + + /* Return rc=0 like wait_event_timeout() */ + return 0; +} + +/* Test and clear MC-rebooted flag for this port/function; reset + * software state as necessary. + */ +int efx_mcdi_poll_reboot(struct efx_nic *efx) +{ + if (!efx->mcdi) + return 0; + + return efx->type->mcdi_poll_reboot(efx); +} + +static bool efx_mcdi_acquire_async(struct efx_mcdi_iface *mcdi) +{ + return cmpxchg(&mcdi->state, + MCDI_STATE_QUIESCENT, MCDI_STATE_RUNNING_ASYNC) == + MCDI_STATE_QUIESCENT; +} + +static void efx_mcdi_acquire_sync(struct efx_mcdi_iface *mcdi) +{ + /* Wait until the interface becomes QUIESCENT and we win the race + * to mark it RUNNING_SYNC. + */ + wait_event(mcdi->wq, + cmpxchg(&mcdi->state, + MCDI_STATE_QUIESCENT, MCDI_STATE_RUNNING_SYNC) == + MCDI_STATE_QUIESCENT); +} + +static int efx_mcdi_await_completion(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + if (wait_event_timeout(mcdi->wq, mcdi->state == MCDI_STATE_COMPLETED, + MCDI_RPC_TIMEOUT) == 0) + return -ETIMEDOUT; + + /* Check if efx_mcdi_set_mode() switched us back to polled completions. + * In which case, poll for completions directly. If efx_mcdi_ev_cpl() + * completed the request first, then we'll just end up completing the + * request again, which is safe. + * + * We need an smp_rmb() to synchronise with efx_mcdi_mode_poll(), which + * wait_event_timeout() implicitly provides. + */ + if (mcdi->mode == MCDI_MODE_POLL) + return efx_mcdi_poll(efx); + + return 0; +} + +/* If the interface is RUNNING_SYNC, switch to COMPLETED and wake the + * requester. Return whether this was done. Does not take any locks. + */ +static bool efx_mcdi_complete_sync(struct efx_mcdi_iface *mcdi) +{ + if (cmpxchg(&mcdi->state, + MCDI_STATE_RUNNING_SYNC, MCDI_STATE_COMPLETED) == + MCDI_STATE_RUNNING_SYNC) { + wake_up(&mcdi->wq); + return true; + } + + return false; +} + +static void efx_mcdi_release(struct efx_mcdi_iface *mcdi) +{ + if (mcdi->mode == MCDI_MODE_EVENTS) { + struct efx_mcdi_async_param *async; + struct efx_nic *efx = mcdi->efx; + + /* Process the asynchronous request queue */ + spin_lock_bh(&mcdi->async_lock); + async = list_first_entry_or_null( + &mcdi->async_list, struct efx_mcdi_async_param, list); + if (async) { + mcdi->state = MCDI_STATE_RUNNING_ASYNC; + efx_mcdi_send_request(efx, async->cmd, + (const efx_dword_t *)(async + 1), + async->inlen); + mod_timer(&mcdi->async_timer, + jiffies + MCDI_RPC_TIMEOUT); + } + spin_unlock_bh(&mcdi->async_lock); + + if (async) + return; + } + + mcdi->state = MCDI_STATE_QUIESCENT; + wake_up(&mcdi->wq); +} + +/* If the interface is RUNNING_ASYNC, switch to COMPLETED, call the + * asynchronous completion function, and release the interface. + * Return whether this was done. Must be called in bh-disabled + * context. Will take iface_lock and async_lock. + */ +static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout) +{ + struct efx_nic *efx = mcdi->efx; + struct efx_mcdi_async_param *async; + size_t hdr_len, data_len, err_len; + efx_dword_t *outbuf; + MCDI_DECLARE_BUF_ERR(errbuf); + int rc; + + if (cmpxchg(&mcdi->state, + MCDI_STATE_RUNNING_ASYNC, MCDI_STATE_COMPLETED) != + MCDI_STATE_RUNNING_ASYNC) + return false; + + spin_lock(&mcdi->iface_lock); + if (timeout) { + /* Ensure that if the completion event arrives later, + * the seqno check in efx_mcdi_ev_cpl() will fail + */ + ++mcdi->seqno; + ++mcdi->credits; + rc = -ETIMEDOUT; + hdr_len = 0; + data_len = 0; + } else { + rc = mcdi->resprc; + hdr_len = mcdi->resp_hdr_len; + data_len = mcdi->resp_data_len; + } + spin_unlock(&mcdi->iface_lock); + + /* Stop the timer. In case the timer function is running, we + * must wait for it to return so that there is no possibility + * of it aborting the next request. + */ + if (!timeout) + del_timer_sync(&mcdi->async_timer); + + spin_lock(&mcdi->async_lock); + async = list_first_entry(&mcdi->async_list, + struct efx_mcdi_async_param, list); + list_del(&async->list); + spin_unlock(&mcdi->async_lock); + + outbuf = (efx_dword_t *)(async + 1); + efx->type->mcdi_read_response(efx, outbuf, hdr_len, + min(async->outlen, data_len)); + if (!timeout && rc && !async->quiet) { + err_len = min(sizeof(errbuf), data_len); + efx->type->mcdi_read_response(efx, errbuf, hdr_len, + sizeof(errbuf)); + efx_mcdi_display_error(efx, async->cmd, async->inlen, errbuf, + err_len, rc); + } + + if (async->complete) + async->complete(efx, async->cookie, rc, outbuf, + min(async->outlen, data_len)); + kfree(async); + + efx_mcdi_release(mcdi); + + return true; +} + +static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno, + unsigned int datalen, unsigned int mcdi_err) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + bool wake = false; + + spin_lock(&mcdi->iface_lock); + + if ((seqno ^ mcdi->seqno) & SEQ_MASK) { + if (mcdi->credits) + /* The request has been cancelled */ + --mcdi->credits; + else + netif_err(efx, hw, efx->net_dev, + "MC response mismatch tx seq 0x%x rx " + "seq 0x%x\n", seqno, mcdi->seqno); + } else { + if (efx->type->mcdi_max_ver >= 2) { + /* MCDI v2 responses don't fit in an event */ + efx_mcdi_read_response_header(efx); + } else { + mcdi->resprc = efx_mcdi_errno(mcdi_err); + mcdi->resp_hdr_len = 4; + mcdi->resp_data_len = datalen; + } + + wake = true; + } + + spin_unlock(&mcdi->iface_lock); + + if (wake) { + if (!efx_mcdi_complete_async(mcdi, false)) + (void) efx_mcdi_complete_sync(mcdi); + + /* If the interface isn't RUNNING_ASYNC or + * RUNNING_SYNC then we've received a duplicate + * completion after we've already transitioned back to + * QUIESCENT. [A subsequent invocation would increment + * seqno, so would have failed the seqno check]. + */ + } +} + +static void efx_mcdi_timeout_async(struct timer_list *t) +{ + struct efx_mcdi_iface *mcdi = from_timer(mcdi, t, async_timer); + + efx_mcdi_complete_async(mcdi, true); +} + +static int +efx_mcdi_check_supported(struct efx_nic *efx, unsigned int cmd, size_t inlen) +{ + if (efx->type->mcdi_max_ver < 0 || + (efx->type->mcdi_max_ver < 2 && + cmd > MC_CMD_CMD_SPACE_ESCAPE_7)) + return -EINVAL; + + if (inlen > MCDI_CTL_SDU_LEN_MAX_V2 || + (efx->type->mcdi_max_ver < 2 && + inlen > MCDI_CTL_SDU_LEN_MAX_V1)) + return -EMSGSIZE; + + return 0; +} + +static bool efx_mcdi_get_proxy_handle(struct efx_nic *efx, + size_t hdr_len, size_t data_len, + u32 *proxy_handle) +{ + MCDI_DECLARE_BUF_ERR(testbuf); + const size_t buflen = sizeof(testbuf); + + if (!proxy_handle || data_len < buflen) + return false; + + efx->type->mcdi_read_response(efx, testbuf, hdr_len, buflen); + if (MCDI_DWORD(testbuf, ERR_CODE) == MC_CMD_ERR_PROXY_PENDING) { + *proxy_handle = MCDI_DWORD(testbuf, ERR_PROXY_PENDING_HANDLE); + return true; + } + + return false; +} + +static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned int cmd, + size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual, bool quiet, + u32 *proxy_handle, int *raw_rc) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + MCDI_DECLARE_BUF_ERR(errbuf); + int rc; + + if (mcdi->mode == MCDI_MODE_POLL) + rc = efx_mcdi_poll(efx); + else + rc = efx_mcdi_await_completion(efx); + + if (rc != 0) { + netif_err(efx, hw, efx->net_dev, + "MC command 0x%x inlen %d mode %d timed out\n", + cmd, (int)inlen, mcdi->mode); + + if (mcdi->mode == MCDI_MODE_EVENTS && efx_mcdi_poll_once(efx)) { + netif_err(efx, hw, efx->net_dev, + "MCDI request was completed without an event\n"); + rc = 0; + } + + efx_mcdi_abandon(efx); + + /* Close the race with efx_mcdi_ev_cpl() executing just too late + * and completing a request we've just cancelled, by ensuring + * that the seqno check therein fails. + */ + spin_lock_bh(&mcdi->iface_lock); + ++mcdi->seqno; + ++mcdi->credits; + spin_unlock_bh(&mcdi->iface_lock); + } + + if (proxy_handle) + *proxy_handle = 0; + + if (rc != 0) { + if (outlen_actual) + *outlen_actual = 0; + } else { + size_t hdr_len, data_len, err_len; + + /* At the very least we need a memory barrier here to ensure + * we pick up changes from efx_mcdi_ev_cpl(). Protect against + * a spurious efx_mcdi_ev_cpl() running concurrently by + * acquiring the iface_lock. */ + spin_lock_bh(&mcdi->iface_lock); + rc = mcdi->resprc; + if (raw_rc) + *raw_rc = mcdi->resprc_raw; + hdr_len = mcdi->resp_hdr_len; + data_len = mcdi->resp_data_len; + err_len = min(sizeof(errbuf), data_len); + spin_unlock_bh(&mcdi->iface_lock); + + BUG_ON(rc > 0); + + efx->type->mcdi_read_response(efx, outbuf, hdr_len, + min(outlen, data_len)); + if (outlen_actual) + *outlen_actual = data_len; + + efx->type->mcdi_read_response(efx, errbuf, hdr_len, err_len); + + if (cmd == MC_CMD_REBOOT && rc == -EIO) { + /* Don't reset if MC_CMD_REBOOT returns EIO */ + } else if (rc == -EIO || rc == -EINTR) { + netif_err(efx, hw, efx->net_dev, "MC reboot detected\n"); + netif_dbg(efx, hw, efx->net_dev, "MC rebooted during command %d rc %d\n", + cmd, -rc); + if (efx->type->mcdi_reboot_detected) + efx->type->mcdi_reboot_detected(efx); + efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE); + } else if (proxy_handle && (rc == -EPROTO) && + efx_mcdi_get_proxy_handle(efx, hdr_len, data_len, + proxy_handle)) { + mcdi->proxy_rx_status = 0; + mcdi->proxy_rx_handle = 0; + mcdi->state = MCDI_STATE_PROXY_WAIT; + } else if (rc && !quiet) { + efx_mcdi_display_error(efx, cmd, inlen, errbuf, err_len, + rc); + } + + if (rc == -EIO || rc == -EINTR) { + msleep(MCDI_STATUS_SLEEP_MS); + efx_mcdi_poll_reboot(efx); + mcdi->new_epoch = true; + } + } + + if (!proxy_handle || !*proxy_handle) + efx_mcdi_release(mcdi); + return rc; +} + +static void efx_mcdi_proxy_abort(struct efx_mcdi_iface *mcdi) +{ + if (mcdi->state == MCDI_STATE_PROXY_WAIT) { + /* Interrupt the proxy wait. */ + mcdi->proxy_rx_status = -EINTR; + wake_up(&mcdi->proxy_rx_wq); + } +} + +static void efx_mcdi_ev_proxy_response(struct efx_nic *efx, + u32 handle, int status) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + WARN_ON(mcdi->state != MCDI_STATE_PROXY_WAIT); + + mcdi->proxy_rx_status = efx_mcdi_errno(status); + /* Ensure the status is written before we update the handle, since the + * latter is used to check if we've finished. + */ + wmb(); + mcdi->proxy_rx_handle = handle; + wake_up(&mcdi->proxy_rx_wq); +} + +static int efx_mcdi_proxy_wait(struct efx_nic *efx, u32 handle, bool quiet) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + int rc; + + /* Wait for a proxy event, or timeout. */ + rc = wait_event_timeout(mcdi->proxy_rx_wq, + mcdi->proxy_rx_handle != 0 || + mcdi->proxy_rx_status == -EINTR, + MCDI_RPC_TIMEOUT); + + if (rc <= 0) { + netif_dbg(efx, hw, efx->net_dev, + "MCDI proxy timeout %d\n", handle); + return -ETIMEDOUT; + } else if (mcdi->proxy_rx_handle != handle) { + netif_warn(efx, hw, efx->net_dev, + "MCDI proxy unexpected handle %d (expected %d)\n", + mcdi->proxy_rx_handle, handle); + return -EINVAL; + } + + return mcdi->proxy_rx_status; +} + +static int _efx_mcdi_rpc(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual, bool quiet, int *raw_rc) +{ + u32 proxy_handle = 0; /* Zero is an invalid proxy handle. */ + int rc; + + if (inbuf && inlen && (inbuf == outbuf)) { + /* The input buffer can't be aliased with the output. */ + WARN_ON(1); + return -EINVAL; + } + + rc = efx_mcdi_rpc_start(efx, cmd, inbuf, inlen); + if (rc) + return rc; + + rc = _efx_mcdi_rpc_finish(efx, cmd, inlen, outbuf, outlen, + outlen_actual, quiet, &proxy_handle, raw_rc); + + if (proxy_handle) { + /* Handle proxy authorisation. This allows approval of MCDI + * operations to be delegated to the admin function, allowing + * fine control over (eg) multicast subscriptions. + */ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + netif_dbg(efx, hw, efx->net_dev, + "MCDI waiting for proxy auth %d\n", + proxy_handle); + rc = efx_mcdi_proxy_wait(efx, proxy_handle, quiet); + + if (rc == 0) { + netif_dbg(efx, hw, efx->net_dev, + "MCDI proxy retry %d\n", proxy_handle); + + /* We now retry the original request. */ + mcdi->state = MCDI_STATE_RUNNING_SYNC; + efx_mcdi_send_request(efx, cmd, inbuf, inlen); + + rc = _efx_mcdi_rpc_finish(efx, cmd, inlen, + outbuf, outlen, outlen_actual, + quiet, NULL, raw_rc); + } else { + netif_cond_dbg(efx, hw, efx->net_dev, rc == -EPERM, err, + "MC command 0x%x failed after proxy auth rc=%d\n", + cmd, rc); + + if (rc == -EINTR || rc == -EIO) + efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE); + efx_mcdi_release(mcdi); + } + } + + return rc; +} + +static int _efx_mcdi_rpc_evb_retry(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual, bool quiet) +{ + int raw_rc = 0; + int rc; + + rc = _efx_mcdi_rpc(efx, cmd, inbuf, inlen, + outbuf, outlen, outlen_actual, true, &raw_rc); + + if ((rc == -EPROTO) && (raw_rc == MC_CMD_ERR_NO_EVB_PORT) && + efx->type->is_vf) { + /* If the EVB port isn't available within a VF this may + * mean the PF is still bringing the switch up. We should + * retry our request shortly. + */ + unsigned long abort_time = jiffies + MCDI_RPC_TIMEOUT; + unsigned int delay_us = 10000; + + netif_dbg(efx, hw, efx->net_dev, + "%s: NO_EVB_PORT; will retry request\n", + __func__); + + do { + usleep_range(delay_us, delay_us + 10000); + rc = _efx_mcdi_rpc(efx, cmd, inbuf, inlen, + outbuf, outlen, outlen_actual, + true, &raw_rc); + if (delay_us < 100000) + delay_us <<= 1; + } while ((rc == -EPROTO) && + (raw_rc == MC_CMD_ERR_NO_EVB_PORT) && + time_before(jiffies, abort_time)); + } + + if (rc && !quiet && !(cmd == MC_CMD_REBOOT && rc == -EIO)) + efx_mcdi_display_error(efx, cmd, inlen, + outbuf, outlen, rc); + + return rc; +} + +/** + * efx_mcdi_rpc - Issue an MCDI command and wait for completion + * @efx: NIC through which to issue the command + * @cmd: Command type number + * @inbuf: Command parameters + * @inlen: Length of command parameters, in bytes. Must be a multiple + * of 4 and no greater than %MCDI_CTL_SDU_LEN_MAX_V1. + * @outbuf: Response buffer. May be %NULL if @outlen is 0. + * @outlen: Length of response buffer, in bytes. If the actual + * response is longer than @outlen & ~3, it will be truncated + * to that length. + * @outlen_actual: Pointer through which to return the actual response + * length. May be %NULL if this is not needed. + * + * This function may sleep and therefore must be called in an appropriate + * context. + * + * Return: A negative error code, or zero if successful. The error + * code may come from the MCDI response or may indicate a failure + * to communicate with the MC. In the former case, the response + * will still be copied to @outbuf and *@outlen_actual will be + * set accordingly. In the latter case, *@outlen_actual will be + * set to zero. + */ +int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual) +{ + return _efx_mcdi_rpc_evb_retry(efx, cmd, inbuf, inlen, outbuf, outlen, + outlen_actual, false); +} + +/* Normally, on receiving an error code in the MCDI response, + * efx_mcdi_rpc will log an error message containing (among other + * things) the raw error code, by means of efx_mcdi_display_error. + * This _quiet version suppresses that; if the caller wishes to log + * the error conditionally on the return code, it should call this + * function and is then responsible for calling efx_mcdi_display_error + * as needed. + */ +int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual) +{ + return _efx_mcdi_rpc_evb_retry(efx, cmd, inbuf, inlen, outbuf, outlen, + outlen_actual, true); +} + +int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + int rc; + + rc = efx_mcdi_check_supported(efx, cmd, inlen); + if (rc) + return rc; + + if (efx->mc_bist_for_other_fn) + return -ENETDOWN; + + if (mcdi->mode == MCDI_MODE_FAIL) + return -ENETDOWN; + + efx_mcdi_acquire_sync(mcdi); + efx_mcdi_send_request(efx, cmd, inbuf, inlen); + return 0; +} + +static int _efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, + size_t outlen, + efx_mcdi_async_completer *complete, + unsigned long cookie, bool quiet) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + struct efx_mcdi_async_param *async; + int rc; + + rc = efx_mcdi_check_supported(efx, cmd, inlen); + if (rc) + return rc; + + if (efx->mc_bist_for_other_fn) + return -ENETDOWN; + + async = kmalloc(sizeof(*async) + ALIGN(max(inlen, outlen), 4), + GFP_ATOMIC); + if (!async) + return -ENOMEM; + + async->cmd = cmd; + async->inlen = inlen; + async->outlen = outlen; + async->quiet = quiet; + async->complete = complete; + async->cookie = cookie; + memcpy(async + 1, inbuf, inlen); + + spin_lock_bh(&mcdi->async_lock); + + if (mcdi->mode == MCDI_MODE_EVENTS) { + list_add_tail(&async->list, &mcdi->async_list); + + /* If this is at the front of the queue, try to start it + * immediately + */ + if (mcdi->async_list.next == &async->list && + efx_mcdi_acquire_async(mcdi)) { + efx_mcdi_send_request(efx, cmd, inbuf, inlen); + mod_timer(&mcdi->async_timer, + jiffies + MCDI_RPC_TIMEOUT); + } + } else { + kfree(async); + rc = -ENETDOWN; + } + + spin_unlock_bh(&mcdi->async_lock); + + return rc; +} + +/** + * efx_mcdi_rpc_async - Schedule an MCDI command to run asynchronously + * @efx: NIC through which to issue the command + * @cmd: Command type number + * @inbuf: Command parameters + * @inlen: Length of command parameters, in bytes + * @outlen: Length to allocate for response buffer, in bytes + * @complete: Function to be called on completion or cancellation. + * @cookie: Arbitrary value to be passed to @complete. + * + * This function does not sleep and therefore may be called in atomic + * context. It will fail if event queues are disabled or if MCDI + * event completions have been disabled due to an error. + * + * If it succeeds, the @complete function will be called exactly once + * in atomic context, when one of the following occurs: + * (a) the completion event is received (in NAPI context) + * (b) event queues are disabled (in the process that disables them) + * (c) the request times-out (in timer context) + */ +int +efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, size_t outlen, + efx_mcdi_async_completer *complete, unsigned long cookie) +{ + return _efx_mcdi_rpc_async(efx, cmd, inbuf, inlen, outlen, complete, + cookie, false); +} + +int efx_mcdi_rpc_async_quiet(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, + size_t outlen, efx_mcdi_async_completer *complete, + unsigned long cookie) +{ + return _efx_mcdi_rpc_async(efx, cmd, inbuf, inlen, outlen, complete, + cookie, true); +} + +int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual) +{ + return _efx_mcdi_rpc_finish(efx, cmd, inlen, outbuf, outlen, + outlen_actual, false, NULL, NULL); +} + +int efx_mcdi_rpc_finish_quiet(struct efx_nic *efx, unsigned cmd, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual) +{ + return _efx_mcdi_rpc_finish(efx, cmd, inlen, outbuf, outlen, + outlen_actual, true, NULL, NULL); +} + +void efx_mcdi_display_error(struct efx_nic *efx, unsigned cmd, + size_t inlen, efx_dword_t *outbuf, + size_t outlen, int rc) +{ + int code = 0, err_arg = 0; + + if (outlen >= MC_CMD_ERR_CODE_OFST + 4) + code = MCDI_DWORD(outbuf, ERR_CODE); + if (outlen >= MC_CMD_ERR_ARG_OFST + 4) + err_arg = MCDI_DWORD(outbuf, ERR_ARG); + netif_cond_dbg(efx, hw, efx->net_dev, rc == -EPERM, err, + "MC command 0x%x inlen %zu failed rc=%d (raw=%d) arg=%d\n", + cmd, inlen, rc, code, err_arg); +} + +/* Switch to polled MCDI completions. This can be called in various + * error conditions with various locks held, so it must be lockless. + * Caller is responsible for flushing asynchronous requests later. + */ +void efx_mcdi_mode_poll(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi; + + if (!efx->mcdi) + return; + + mcdi = efx_mcdi(efx); + /* If already in polling mode, nothing to do. + * If in fail-fast state, don't switch to polled completion. + * FLR recovery will do that later. + */ + if (mcdi->mode == MCDI_MODE_POLL || mcdi->mode == MCDI_MODE_FAIL) + return; + + /* We can switch from event completion to polled completion, because + * mcdi requests are always completed in shared memory. We do this by + * switching the mode to POLL'd then completing the request. + * efx_mcdi_await_completion() will then call efx_mcdi_poll(). + * + * We need an smp_wmb() to synchronise with efx_mcdi_await_completion(), + * which efx_mcdi_complete_sync() provides for us. + */ + mcdi->mode = MCDI_MODE_POLL; + + efx_mcdi_complete_sync(mcdi); +} + +/* Flush any running or queued asynchronous requests, after event processing + * is stopped + */ +void efx_mcdi_flush_async(struct efx_nic *efx) +{ + struct efx_mcdi_async_param *async, *next; + struct efx_mcdi_iface *mcdi; + + if (!efx->mcdi) + return; + + mcdi = efx_mcdi(efx); + + /* We must be in poll or fail mode so no more requests can be queued */ + BUG_ON(mcdi->mode == MCDI_MODE_EVENTS); + + del_timer_sync(&mcdi->async_timer); + + /* If a request is still running, make sure we give the MC + * time to complete it so that the response won't overwrite our + * next request. + */ + if (mcdi->state == MCDI_STATE_RUNNING_ASYNC) { + efx_mcdi_poll(efx); + mcdi->state = MCDI_STATE_QUIESCENT; + } + + /* Nothing else will access the async list now, so it is safe + * to walk it without holding async_lock. If we hold it while + * calling a completer then lockdep may warn that we have + * acquired locks in the wrong order. + */ + list_for_each_entry_safe(async, next, &mcdi->async_list, list) { + if (async->complete) + async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); + list_del(&async->list); + kfree(async); + } +} + +void efx_mcdi_mode_event(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi; + + if (!efx->mcdi) + return; + + mcdi = efx_mcdi(efx); + /* If already in event completion mode, nothing to do. + * If in fail-fast state, don't switch to event completion. FLR + * recovery will do that later. + */ + if (mcdi->mode == MCDI_MODE_EVENTS || mcdi->mode == MCDI_MODE_FAIL) + return; + + /* We can't switch from polled to event completion in the middle of a + * request, because the completion method is specified in the request. + * So acquire the interface to serialise the requestors. We don't need + * to acquire the iface_lock to change the mode here, but we do need a + * write memory barrier ensure that efx_mcdi_rpc() sees it, which + * efx_mcdi_acquire() provides. + */ + efx_mcdi_acquire_sync(mcdi); + mcdi->mode = MCDI_MODE_EVENTS; + efx_mcdi_release(mcdi); +} + +static void efx_mcdi_ev_death(struct efx_nic *efx, int rc) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + /* If there is an outstanding MCDI request, it has been terminated + * either by a BADASSERT or REBOOT event. If the mcdi interface is + * in polled mode, then do nothing because the MC reboot handler will + * set the header correctly. However, if the mcdi interface is waiting + * for a CMDDONE event it won't receive it [and since all MCDI events + * are sent to the same queue, we can't be racing with + * efx_mcdi_ev_cpl()] + * + * If there is an outstanding asynchronous request, we can't + * complete it now (efx_mcdi_complete() would deadlock). The + * reset process will take care of this. + * + * There's a race here with efx_mcdi_send_request(), because + * we might receive a REBOOT event *before* the request has + * been copied out. In polled mode (during startup) this is + * irrelevant, because efx_mcdi_complete_sync() is ignored. In + * event mode, this condition is just an edge-case of + * receiving a REBOOT event after posting the MCDI + * request. Did the mc reboot before or after the copyout? The + * best we can do always is just return failure. + * + * If there is an outstanding proxy response expected it is not going + * to arrive. We should thus abort it. + */ + spin_lock(&mcdi->iface_lock); + efx_mcdi_proxy_abort(mcdi); + + if (efx_mcdi_complete_sync(mcdi)) { + if (mcdi->mode == MCDI_MODE_EVENTS) { + mcdi->resprc = rc; + mcdi->resp_hdr_len = 0; + mcdi->resp_data_len = 0; + ++mcdi->credits; + } + } else { + int count; + + /* Consume the status word since efx_mcdi_rpc_finish() won't */ + for (count = 0; count < MCDI_STATUS_DELAY_COUNT; ++count) { + rc = efx_mcdi_poll_reboot(efx); + if (rc) + break; + udelay(MCDI_STATUS_DELAY_US); + } + + /* On EF10, a CODE_MC_REBOOT event can be received without the + * reboot detection in efx_mcdi_poll_reboot() being triggered. + * If zero was returned from the final call to + * efx_mcdi_poll_reboot(), the MC reboot wasn't noticed but the + * MC has definitely rebooted so prepare for the reset. + */ + if (!rc && efx->type->mcdi_reboot_detected) + efx->type->mcdi_reboot_detected(efx); + + mcdi->new_epoch = true; + + /* Nobody was waiting for an MCDI request, so trigger a reset */ + efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE); + } + + spin_unlock(&mcdi->iface_lock); +} + +/* The MC is going down in to BIST mode. set the BIST flag to block + * new MCDI, cancel any outstanding MCDI and and schedule a BIST-type reset + * (which doesn't actually execute a reset, it waits for the controlling + * function to reset it). + */ +static void efx_mcdi_ev_bist(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + spin_lock(&mcdi->iface_lock); + efx->mc_bist_for_other_fn = true; + efx_mcdi_proxy_abort(mcdi); + + if (efx_mcdi_complete_sync(mcdi)) { + if (mcdi->mode == MCDI_MODE_EVENTS) { + mcdi->resprc = -EIO; + mcdi->resp_hdr_len = 0; + mcdi->resp_data_len = 0; + ++mcdi->credits; + } + } + mcdi->new_epoch = true; + efx_schedule_reset(efx, RESET_TYPE_MC_BIST); + spin_unlock(&mcdi->iface_lock); +} + +/* MCDI timeouts seen, so make all MCDI calls fail-fast and issue an FLR to try + * to recover. + */ +static void efx_mcdi_abandon(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + if (xchg(&mcdi->mode, MCDI_MODE_FAIL) == MCDI_MODE_FAIL) + return; /* it had already been done */ + netif_dbg(efx, hw, efx->net_dev, "MCDI is timing out; trying to recover\n"); + efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT); +} + +static void efx_handle_drain_event(struct efx_nic *efx) +{ + if (atomic_dec_and_test(&efx->active_queues)) + wake_up(&efx->flush_wq); + + WARN_ON(atomic_read(&efx->active_queues) < 0); +} + +/* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */ +void efx_mcdi_process_event(struct efx_channel *channel, + efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + int code = EFX_QWORD_FIELD(*event, MCDI_EVENT_CODE); + u32 data = EFX_QWORD_FIELD(*event, MCDI_EVENT_DATA); + + switch (code) { + case MCDI_EVENT_CODE_BADSSERT: + netif_err(efx, hw, efx->net_dev, + "MC watchdog or assertion failure at 0x%x\n", data); + efx_mcdi_ev_death(efx, -EINTR); + break; + + case MCDI_EVENT_CODE_PMNOTICE: + netif_info(efx, wol, efx->net_dev, "MCDI PM event.\n"); + break; + + case MCDI_EVENT_CODE_CMDDONE: + efx_mcdi_ev_cpl(efx, + MCDI_EVENT_FIELD(*event, CMDDONE_SEQ), + MCDI_EVENT_FIELD(*event, CMDDONE_DATALEN), + MCDI_EVENT_FIELD(*event, CMDDONE_ERRNO)); + break; + + case MCDI_EVENT_CODE_LINKCHANGE: + efx_mcdi_process_link_change(efx, event); + break; + case MCDI_EVENT_CODE_SENSOREVT: + efx_sensor_event(efx, event); + break; + case MCDI_EVENT_CODE_SCHEDERR: + netif_dbg(efx, hw, efx->net_dev, + "MC Scheduler alert (0x%x)\n", data); + break; + case MCDI_EVENT_CODE_REBOOT: + case MCDI_EVENT_CODE_MC_REBOOT: + netif_info(efx, hw, efx->net_dev, "MC Reboot\n"); + efx_mcdi_ev_death(efx, -EIO); + break; + case MCDI_EVENT_CODE_MC_BIST: + netif_info(efx, hw, efx->net_dev, "MC entered BIST mode\n"); + efx_mcdi_ev_bist(efx); + break; + case MCDI_EVENT_CODE_MAC_STATS_DMA: + /* MAC stats are gather lazily. We can ignore this. */ + break; + case MCDI_EVENT_CODE_FLR: + if (efx->type->sriov_flr) + efx->type->sriov_flr(efx, + MCDI_EVENT_FIELD(*event, FLR_VF)); + break; + case MCDI_EVENT_CODE_PTP_RX: + case MCDI_EVENT_CODE_PTP_FAULT: + case MCDI_EVENT_CODE_PTP_PPS: + efx_ptp_event(efx, event); + break; + case MCDI_EVENT_CODE_PTP_TIME: + efx_time_sync_event(channel, event); + break; + case MCDI_EVENT_CODE_TX_FLUSH: + case MCDI_EVENT_CODE_RX_FLUSH: + /* Two flush events will be sent: one to the same event + * queue as completions, and one to event queue 0. + * In the latter case the {RX,TX}_FLUSH_TO_DRIVER + * flag will be set, and we should ignore the event + * because we want to wait for all completions. + */ + BUILD_BUG_ON(MCDI_EVENT_TX_FLUSH_TO_DRIVER_LBN != + MCDI_EVENT_RX_FLUSH_TO_DRIVER_LBN); + if (!MCDI_EVENT_FIELD(*event, TX_FLUSH_TO_DRIVER)) + efx_handle_drain_event(efx); + break; + case MCDI_EVENT_CODE_TX_ERR: + case MCDI_EVENT_CODE_RX_ERR: + netif_err(efx, hw, efx->net_dev, + "%s DMA error (event: "EFX_QWORD_FMT")\n", + code == MCDI_EVENT_CODE_TX_ERR ? "TX" : "RX", + EFX_QWORD_VAL(*event)); + efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR); + break; + case MCDI_EVENT_CODE_PROXY_RESPONSE: + efx_mcdi_ev_proxy_response(efx, + MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_HANDLE), + MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_RC)); + break; + default: + netif_err(efx, hw, efx->net_dev, + "Unknown MCDI event " EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + } +} + +/************************************************************************** + * + * Specific request functions + * + ************************************************************************** + */ + +void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_VERSION_OUT_LEN); + size_t outlength; + const __le16 *ver_words; + size_t offset; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_VERSION_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_VERSION, NULL, 0, + outbuf, sizeof(outbuf), &outlength); + if (rc) + goto fail; + if (outlength < MC_CMD_GET_VERSION_OUT_LEN) { + rc = -EIO; + goto fail; + } + + ver_words = (__le16 *)MCDI_PTR(outbuf, GET_VERSION_OUT_VERSION); + offset = scnprintf(buf, len, "%u.%u.%u.%u", + le16_to_cpu(ver_words[0]), + le16_to_cpu(ver_words[1]), + le16_to_cpu(ver_words[2]), + le16_to_cpu(ver_words[3])); + + if (efx->type->print_additional_fwver) + offset += efx->type->print_additional_fwver(efx, buf + offset, + len - offset); + + /* It's theoretically possible for the string to exceed 31 + * characters, though in practice the first three version + * components are short enough that this doesn't happen. + */ + if (WARN_ON(offset >= len)) + buf[0] = 0; + + return; + +fail: + netif_err(efx, probe, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + buf[0] = 0; +} + +static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, + bool *was_attached) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_DRV_ATTACH_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_DRV_ATTACH_EXT_OUT_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_NEW_STATE, + driver_operating ? 1 : 0); + MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_UPDATE, 1); + MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID, MC_CMD_FW_LOW_LATENCY); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + /* If we're not the primary PF, trying to ATTACH with a FIRMWARE_ID + * specified will fail with EPERM, and we have to tell the MC we don't + * care what firmware we get. + */ + if (rc == -EPERM) { + netif_dbg(efx, probe, efx->net_dev, + "efx_mcdi_drv_attach with fw-variant setting failed EPERM, trying without it\n"); + MCDI_SET_DWORD(inbuf, DRV_ATTACH_IN_FIRMWARE_ID, + MC_CMD_FW_DONT_CARE); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_DRV_ATTACH, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), + &outlen); + } + if (rc) { + efx_mcdi_display_error(efx, MC_CMD_DRV_ATTACH, sizeof(inbuf), + outbuf, outlen, rc); + goto fail; + } + if (outlen < MC_CMD_DRV_ATTACH_OUT_LEN) { + rc = -EIO; + goto fail; + } + + if (driver_operating) { + if (outlen >= MC_CMD_DRV_ATTACH_EXT_OUT_LEN) { + efx->mcdi->fn_flags = + MCDI_DWORD(outbuf, + DRV_ATTACH_EXT_OUT_FUNC_FLAGS); + } else { + /* Synthesise flags for Siena */ + efx->mcdi->fn_flags = + 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL | + 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_TRUSTED | + (efx_port_num(efx) == 0) << + MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_PRIMARY; + } + } + + /* We currently assume we have control of the external link + * and are completely trusted by firmware. Abort probing + * if that's not true for this function. + */ + + if (was_attached != NULL) + *was_attached = MCDI_DWORD(outbuf, DRV_ATTACH_OUT_OLD_STATE); + return 0; + +fail: + netif_err(efx, probe, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address, + u16 *fw_subtype_list, u32 *capabilities) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_BOARD_CFG_OUT_LENMAX); + size_t outlen, i; + int port_num = efx_port_num(efx); + int rc; + + BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_IN_LEN != 0); + /* we need __aligned(2) for ether_addr_copy */ + BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT0_OFST & 1); + BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT1_OFST & 1); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_BOARD_CFG, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_GET_BOARD_CFG_OUT_LENMIN) { + rc = -EIO; + goto fail; + } + + if (mac_address) + ether_addr_copy(mac_address, + port_num ? + MCDI_PTR(outbuf, GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT1) : + MCDI_PTR(outbuf, GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT0)); + if (fw_subtype_list) { + for (i = 0; + i < MCDI_VAR_ARRAY_LEN(outlen, + GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST); + i++) + fw_subtype_list[i] = MCDI_ARRAY_WORD( + outbuf, GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST, i); + for (; i < MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM; i++) + fw_subtype_list[i] = 0; + } + if (capabilities) { + if (port_num) + *capabilities = MCDI_DWORD(outbuf, + GET_BOARD_CFG_OUT_CAPABILITIES_PORT1); + else + *capabilities = MCDI_DWORD(outbuf, + GET_BOARD_CFG_OUT_CAPABILITIES_PORT0); + } + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d len=%d\n", + __func__, rc, (int)outlen); + + return rc; +} + +int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, u32 dest_evq) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_LOG_CTRL_IN_LEN); + u32 dest = 0; + int rc; + + if (uart) + dest |= MC_CMD_LOG_CTRL_IN_LOG_DEST_UART; + if (evq) + dest |= MC_CMD_LOG_CTRL_IN_LOG_DEST_EVQ; + + MCDI_SET_DWORD(inbuf, LOG_CTRL_IN_LOG_DEST, dest); + MCDI_SET_DWORD(inbuf, LOG_CTRL_IN_LOG_DEST_EVQ, dest_evq); + + BUILD_BUG_ON(MC_CMD_LOG_CTRL_OUT_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_LOG_CTRL, inbuf, sizeof(inbuf), + NULL, 0, NULL); + return rc; +} + +int efx_mcdi_nvram_types(struct efx_nic *efx, u32 *nvram_types_out) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_TYPES_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_NVRAM_TYPES_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_TYPES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + if (outlen < MC_CMD_NVRAM_TYPES_OUT_LEN) { + rc = -EIO; + goto fail; + } + + *nvram_types_out = MCDI_DWORD(outbuf, NVRAM_TYPES_OUT_TYPES); + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", + __func__, rc); + return rc; +} + +/* This function finds types using the new NVRAM_PARTITIONS mcdi. */ +static int efx_new_mcdi_nvram_types(struct efx_nic *efx, u32 *number, + u32 *nvram_types) +{ + efx_dword_t *outbuf = kzalloc(MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX_MCDI2, + GFP_KERNEL); + size_t outlen; + int rc; + + if (!outbuf) + return -ENOMEM; + + BUILD_BUG_ON(MC_CMD_NVRAM_PARTITIONS_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_PARTITIONS, NULL, 0, + outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX_MCDI2, &outlen); + if (rc) + goto fail; + + *number = MCDI_DWORD(outbuf, NVRAM_PARTITIONS_OUT_NUM_PARTITIONS); + + memcpy(nvram_types, MCDI_PTR(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID), + *number * sizeof(u32)); + +fail: + kfree(outbuf); + return rc; +} + +int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, + size_t *size_out, size_t *erase_size_out, + bool *protected_out) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_INFO_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_INFO_OUT_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, NVRAM_INFO_IN_TYPE, type); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_INFO, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + if (outlen < MC_CMD_NVRAM_INFO_OUT_LEN) { + rc = -EIO; + goto fail; + } + + *size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_SIZE); + *erase_size_out = MCDI_DWORD(outbuf, NVRAM_INFO_OUT_ERASESIZE); + *protected_out = !!(MCDI_DWORD(outbuf, NVRAM_INFO_OUT_FLAGS) & + (1 << MC_CMD_NVRAM_INFO_OUT_PROTECTED_LBN)); + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +static int efx_mcdi_nvram_test(struct efx_nic *efx, unsigned int type) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_TEST_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_TEST_OUT_LEN); + int rc; + + MCDI_SET_DWORD(inbuf, NVRAM_TEST_IN_TYPE, type); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_TEST, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + if (rc) + return rc; + + switch (MCDI_DWORD(outbuf, NVRAM_TEST_OUT_RESULT)) { + case MC_CMD_NVRAM_TEST_PASS: + case MC_CMD_NVRAM_TEST_NOTSUPP: + return 0; + default: + return -EIO; + } +} + +/* This function tests nvram partitions using the new mcdi partition lookup scheme */ +int efx_new_mcdi_nvram_test_all(struct efx_nic *efx) +{ + u32 *nvram_types = kzalloc(MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX_MCDI2, + GFP_KERNEL); + unsigned int number; + int rc, i; + + if (!nvram_types) + return -ENOMEM; + + rc = efx_new_mcdi_nvram_types(efx, &number, nvram_types); + if (rc) + goto fail; + + /* Require at least one check */ + rc = -EAGAIN; + + for (i = 0; i < number; i++) { + if (nvram_types[i] == NVRAM_PARTITION_TYPE_PARTITION_MAP || + nvram_types[i] == NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG) + continue; + + rc = efx_mcdi_nvram_test(efx, nvram_types[i]); + if (rc) + goto fail; + } + +fail: + kfree(nvram_types); + return rc; +} + +int efx_mcdi_nvram_test_all(struct efx_nic *efx) +{ + u32 nvram_types; + unsigned int type; + int rc; + + rc = efx_mcdi_nvram_types(efx, &nvram_types); + if (rc) + goto fail1; + + type = 0; + while (nvram_types != 0) { + if (nvram_types & 1) { + rc = efx_mcdi_nvram_test(efx, type); + if (rc) + goto fail2; + } + type++; + nvram_types >>= 1; + } + + return 0; + +fail2: + netif_err(efx, hw, efx->net_dev, "%s: failed type=%u\n", + __func__, type); +fail1: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +/* Returns 1 if an assertion was read, 0 if no assertion had fired, + * negative on error. + */ +static int efx_mcdi_read_assertion(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_ASSERTS_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_ASSERTS_OUT_LEN); + unsigned int flags, index; + const char *reason; + size_t outlen; + int retry; + int rc; + + /* Attempt to read any stored assertion state before we reboot + * the mcfw out of the assertion handler. Retry twice, once + * because a boot-time assertion might cause this command to fail + * with EINTR. And once again because GET_ASSERTS can race with + * MC_CMD_REBOOT running on the other port. */ + retry = 2; + do { + MCDI_SET_DWORD(inbuf, GET_ASSERTS_IN_CLEAR, 1); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_ASSERTS, + inbuf, MC_CMD_GET_ASSERTS_IN_LEN, + outbuf, sizeof(outbuf), &outlen); + if (rc == -EPERM) + return 0; + } while ((rc == -EINTR || rc == -EIO) && retry-- > 0); + + if (rc) { + efx_mcdi_display_error(efx, MC_CMD_GET_ASSERTS, + MC_CMD_GET_ASSERTS_IN_LEN, outbuf, + outlen, rc); + return rc; + } + if (outlen < MC_CMD_GET_ASSERTS_OUT_LEN) + return -EIO; + + /* Print out any recorded assertion state */ + flags = MCDI_DWORD(outbuf, GET_ASSERTS_OUT_GLOBAL_FLAGS); + if (flags == MC_CMD_GET_ASSERTS_FLAGS_NO_FAILS) + return 0; + + reason = (flags == MC_CMD_GET_ASSERTS_FLAGS_SYS_FAIL) + ? "system-level assertion" + : (flags == MC_CMD_GET_ASSERTS_FLAGS_THR_FAIL) + ? "thread-level assertion" + : (flags == MC_CMD_GET_ASSERTS_FLAGS_WDOG_FIRED) + ? "watchdog reset" + : "unknown assertion"; + netif_err(efx, hw, efx->net_dev, + "MCPU %s at PC = 0x%.8x in thread 0x%.8x\n", reason, + MCDI_DWORD(outbuf, GET_ASSERTS_OUT_SAVED_PC_OFFS), + MCDI_DWORD(outbuf, GET_ASSERTS_OUT_THREAD_OFFS)); + + /* Print out the registers */ + for (index = 0; + index < MC_CMD_GET_ASSERTS_OUT_GP_REGS_OFFS_NUM; + index++) + netif_err(efx, hw, efx->net_dev, "R%.2d (?): 0x%.8x\n", + 1 + index, + MCDI_ARRAY_DWORD(outbuf, GET_ASSERTS_OUT_GP_REGS_OFFS, + index)); + + return 1; +} + +static int efx_mcdi_exit_assertion(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_REBOOT_IN_LEN); + int rc; + + /* If the MC is running debug firmware, it might now be + * waiting for a debugger to attach, but we just want it to + * reboot. We set a flag that makes the command a no-op if it + * has already done so. + * The MCDI will thus return either 0 or -EIO. + */ + BUILD_BUG_ON(MC_CMD_REBOOT_OUT_LEN != 0); + MCDI_SET_DWORD(inbuf, REBOOT_IN_FLAGS, + MC_CMD_REBOOT_FLAGS_AFTER_ASSERTION); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_REBOOT, inbuf, MC_CMD_REBOOT_IN_LEN, + NULL, 0, NULL); + if (rc == -EIO) + rc = 0; + if (rc) + efx_mcdi_display_error(efx, MC_CMD_REBOOT, MC_CMD_REBOOT_IN_LEN, + NULL, 0, rc); + return rc; +} + +int efx_mcdi_handle_assertion(struct efx_nic *efx) +{ + int rc; + + rc = efx_mcdi_read_assertion(efx); + if (rc <= 0) + return rc; + + return efx_mcdi_exit_assertion(efx); +} + +int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_ID_LED_IN_LEN); + + BUILD_BUG_ON(EFX_LED_OFF != MC_CMD_LED_OFF); + BUILD_BUG_ON(EFX_LED_ON != MC_CMD_LED_ON); + BUILD_BUG_ON(EFX_LED_DEFAULT != MC_CMD_LED_DEFAULT); + + BUILD_BUG_ON(MC_CMD_SET_ID_LED_OUT_LEN != 0); + + MCDI_SET_DWORD(inbuf, SET_ID_LED_IN_STATE, mode); + + return efx_mcdi_rpc(efx, MC_CMD_SET_ID_LED, inbuf, sizeof(inbuf), NULL, 0, NULL); +} + +static int efx_mcdi_reset_func(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_ENTITY_RESET_IN_LEN); + int rc; + + BUILD_BUG_ON(MC_CMD_ENTITY_RESET_OUT_LEN != 0); + MCDI_POPULATE_DWORD_1(inbuf, ENTITY_RESET_IN_FLAG, + ENTITY_RESET_IN_FUNCTION_RESOURCE_RESET, 1); + rc = efx_mcdi_rpc(efx, MC_CMD_ENTITY_RESET, inbuf, sizeof(inbuf), + NULL, 0, NULL); + return rc; +} + +static int efx_mcdi_reset_mc(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_REBOOT_IN_LEN); + int rc; + + BUILD_BUG_ON(MC_CMD_REBOOT_OUT_LEN != 0); + MCDI_SET_DWORD(inbuf, REBOOT_IN_FLAGS, 0); + rc = efx_mcdi_rpc(efx, MC_CMD_REBOOT, inbuf, sizeof(inbuf), + NULL, 0, NULL); + /* White is black, and up is down */ + if (rc == -EIO) + return 0; + if (rc == 0) + rc = -EIO; + return rc; +} + +enum reset_type efx_mcdi_map_reset_reason(enum reset_type reason) +{ + return RESET_TYPE_RECOVER_OR_ALL; +} + +int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method) +{ + int rc; + + /* If MCDI is down, we can't handle_assertion */ + if (method == RESET_TYPE_MCDI_TIMEOUT) { + rc = pci_reset_function(efx->pci_dev); + if (rc) + return rc; + /* Re-enable polled MCDI completion */ + if (efx->mcdi) { + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + mcdi->mode = MCDI_MODE_POLL; + } + return 0; + } + + /* Recover from a failed assertion pre-reset */ + rc = efx_mcdi_handle_assertion(efx); + if (rc) + return rc; + + if (method == RESET_TYPE_DATAPATH) + return 0; + else if (method == RESET_TYPE_WORLD) + return efx_mcdi_reset_mc(efx); + else + return efx_mcdi_reset_func(efx); +} + +static int efx_mcdi_wol_filter_set(struct efx_nic *efx, u32 type, + const u8 *mac, int *id_out) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_WOL_FILTER_SET_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_WOL_FILTER_SET_OUT_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, WOL_FILTER_SET_IN_WOL_TYPE, type); + MCDI_SET_DWORD(inbuf, WOL_FILTER_SET_IN_FILTER_MODE, + MC_CMD_FILTER_MODE_SIMPLE); + ether_addr_copy(MCDI_PTR(inbuf, WOL_FILTER_SET_IN_MAGIC_MAC), mac); + + rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_SET, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_WOL_FILTER_SET_OUT_LEN) { + rc = -EIO; + goto fail; + } + + *id_out = (int)MCDI_DWORD(outbuf, WOL_FILTER_SET_OUT_FILTER_ID); + + return 0; + +fail: + *id_out = -1; + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; + +} + + +int +efx_mcdi_wol_filter_set_magic(struct efx_nic *efx, const u8 *mac, int *id_out) +{ + return efx_mcdi_wol_filter_set(efx, MC_CMD_WOL_TYPE_MAGIC, mac, id_out); +} + + +int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_WOL_FILTER_GET_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_GET, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_WOL_FILTER_GET_OUT_LEN) { + rc = -EIO; + goto fail; + } + + *id_out = (int)MCDI_DWORD(outbuf, WOL_FILTER_GET_OUT_FILTER_ID); + + return 0; + +fail: + *id_out = -1; + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + + +int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_WOL_FILTER_REMOVE_IN_LEN); + int rc; + + MCDI_SET_DWORD(inbuf, WOL_FILTER_REMOVE_IN_FILTER_ID, (u32)id); + + rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_REMOVE, inbuf, sizeof(inbuf), + NULL, 0, NULL); + return rc; +} + +int efx_mcdi_flush_rxqs(struct efx_nic *efx) +{ + struct efx_channel *channel; + struct efx_rx_queue *rx_queue; + MCDI_DECLARE_BUF(inbuf, + MC_CMD_FLUSH_RX_QUEUES_IN_LEN(EFX_MAX_CHANNELS)); + int rc, count; + + BUILD_BUG_ON(EFX_MAX_CHANNELS > + MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM); + + count = 0; + efx_for_each_channel(channel, efx) { + efx_for_each_channel_rx_queue(rx_queue, channel) { + if (rx_queue->flush_pending) { + rx_queue->flush_pending = false; + atomic_dec(&efx->rxq_flush_pending); + MCDI_SET_ARRAY_DWORD( + inbuf, FLUSH_RX_QUEUES_IN_QID_OFST, + count, efx_rx_queue_index(rx_queue)); + count++; + } + } + } + + rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, inbuf, + MC_CMD_FLUSH_RX_QUEUES_IN_LEN(count), NULL, 0, NULL); + WARN_ON(rc < 0); + + return rc; +} + +int efx_mcdi_wol_filter_reset(struct efx_nic *efx) +{ + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_WOL_FILTER_RESET, NULL, 0, NULL, 0, NULL); + return rc; +} + +int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled, + unsigned int *flags) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_WORKAROUND_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_WORKAROUND_EXT_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_WORKAROUND_OUT_LEN != 0); + MCDI_SET_DWORD(inbuf, WORKAROUND_IN_TYPE, type); + MCDI_SET_DWORD(inbuf, WORKAROUND_IN_ENABLED, enabled); + rc = efx_mcdi_rpc(efx, MC_CMD_WORKAROUND, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (!flags) + return 0; + + if (outlen >= MC_CMD_WORKAROUND_EXT_OUT_LEN) + *flags = MCDI_DWORD(outbuf, WORKAROUND_EXT_OUT_FLAGS); + else + *flags = 0; + + return 0; +} + +int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out, + unsigned int *enabled_out) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_WORKAROUNDS_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_WORKAROUNDS, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_GET_WORKAROUNDS_OUT_LEN) { + rc = -EIO; + goto fail; + } + + if (impl_out) + *impl_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_IMPLEMENTED); + + if (enabled_out) + *enabled_out = MCDI_DWORD(outbuf, GET_WORKAROUNDS_OUT_ENABLED); + + return 0; + +fail: + /* Older firmware lacks GET_WORKAROUNDS and this isn't especially + * terrifying. The call site will have to deal with it though. + */ + netif_cond_dbg(efx, hw, efx->net_dev, rc == -ENOSYS, err, + "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +#ifdef CONFIG_SFC_MTD + +#define EFX_MCDI_NVRAM_LEN_MAX 128 + +static int efx_mcdi_nvram_update_start(struct efx_nic *efx, unsigned int type) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN); + int rc; + + MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_START_IN_TYPE, type); + MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_START_V2_IN_FLAGS, + NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT, + 1); + + BUILD_BUG_ON(MC_CMD_NVRAM_UPDATE_START_OUT_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_START, inbuf, sizeof(inbuf), + NULL, 0, NULL); + + return rc; +} + +static int efx_mcdi_nvram_read(struct efx_nic *efx, unsigned int type, + loff_t offset, u8 *buffer, size_t length) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_READ_IN_V2_LEN); + MCDI_DECLARE_BUF(outbuf, + MC_CMD_NVRAM_READ_OUT_LEN(EFX_MCDI_NVRAM_LEN_MAX)); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_TYPE, type); + MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_OFFSET, offset); + MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_LENGTH, length); + MCDI_SET_DWORD(inbuf, NVRAM_READ_IN_V2_MODE, + MC_CMD_NVRAM_READ_IN_V2_DEFAULT); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_READ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + memcpy(buffer, MCDI_PTR(outbuf, NVRAM_READ_OUT_READ_BUFFER), length); + return 0; +} + +static int efx_mcdi_nvram_write(struct efx_nic *efx, unsigned int type, + loff_t offset, const u8 *buffer, size_t length) +{ + MCDI_DECLARE_BUF(inbuf, + MC_CMD_NVRAM_WRITE_IN_LEN(EFX_MCDI_NVRAM_LEN_MAX)); + int rc; + + MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_TYPE, type); + MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_OFFSET, offset); + MCDI_SET_DWORD(inbuf, NVRAM_WRITE_IN_LENGTH, length); + memcpy(MCDI_PTR(inbuf, NVRAM_WRITE_IN_WRITE_BUFFER), buffer, length); + + BUILD_BUG_ON(MC_CMD_NVRAM_WRITE_OUT_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_WRITE, inbuf, + ALIGN(MC_CMD_NVRAM_WRITE_IN_LEN(length), 4), + NULL, 0, NULL); + return rc; +} + +static int efx_mcdi_nvram_erase(struct efx_nic *efx, unsigned int type, + loff_t offset, size_t length) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_ERASE_IN_LEN); + int rc; + + MCDI_SET_DWORD(inbuf, NVRAM_ERASE_IN_TYPE, type); + MCDI_SET_DWORD(inbuf, NVRAM_ERASE_IN_OFFSET, offset); + MCDI_SET_DWORD(inbuf, NVRAM_ERASE_IN_LENGTH, length); + + BUILD_BUG_ON(MC_CMD_NVRAM_ERASE_OUT_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_ERASE, inbuf, sizeof(inbuf), + NULL, 0, NULL); + return rc; +} + +static int efx_mcdi_nvram_update_finish(struct efx_nic *efx, unsigned int type) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN); + size_t outlen; + int rc, rc2; + + MCDI_SET_DWORD(inbuf, NVRAM_UPDATE_FINISH_IN_TYPE, type); + /* Always set this flag. Old firmware ignores it */ + MCDI_POPULATE_DWORD_1(inbuf, NVRAM_UPDATE_FINISH_V2_IN_FLAGS, + NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT, + 1); + + rc = efx_mcdi_rpc(efx, MC_CMD_NVRAM_UPDATE_FINISH, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (!rc && outlen >= MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN) { + rc2 = MCDI_DWORD(outbuf, NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE); + if (rc2 != MC_CMD_NVRAM_VERIFY_RC_SUCCESS) + netif_err(efx, drv, efx->net_dev, + "NVRAM update failed verification with code 0x%x\n", + rc2); + switch (rc2) { + case MC_CMD_NVRAM_VERIFY_RC_SUCCESS: + break; + case MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED: + case MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED: + rc = -EIO; + break; + case MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT: + case MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST: + rc = -EINVAL; + break; + case MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES: + case MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS: + case MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH: + rc = -EPERM; + break; + default: + netif_err(efx, drv, efx->net_dev, + "Unknown response to NVRAM_UPDATE_FINISH\n"); + rc = -EIO; + } + } + + return rc; +} + +int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, + size_t len, size_t *retlen, u8 *buffer) +{ + struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd); + struct efx_nic *efx = mtd->priv; + loff_t offset = start; + loff_t end = min_t(loff_t, start + len, mtd->size); + size_t chunk; + int rc = 0; + + while (offset < end) { + chunk = min_t(size_t, end - offset, EFX_MCDI_NVRAM_LEN_MAX); + rc = efx_mcdi_nvram_read(efx, part->nvram_type, offset, + buffer, chunk); + if (rc) + goto out; + offset += chunk; + buffer += chunk; + } +out: + *retlen = offset - start; + return rc; +} + +int efx_mcdi_mtd_erase(struct mtd_info *mtd, loff_t start, size_t len) +{ + struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd); + struct efx_nic *efx = mtd->priv; + loff_t offset = start & ~((loff_t)(mtd->erasesize - 1)); + loff_t end = min_t(loff_t, start + len, mtd->size); + size_t chunk = part->common.mtd.erasesize; + int rc = 0; + + if (!part->updating) { + rc = efx_mcdi_nvram_update_start(efx, part->nvram_type); + if (rc) + goto out; + part->updating = true; + } + + /* The MCDI interface can in fact do multiple erase blocks at once; + * but erasing may be slow, so we make multiple calls here to avoid + * tripping the MCDI RPC timeout. */ + while (offset < end) { + rc = efx_mcdi_nvram_erase(efx, part->nvram_type, offset, + chunk); + if (rc) + goto out; + offset += chunk; + } +out: + return rc; +} + +int efx_mcdi_mtd_write(struct mtd_info *mtd, loff_t start, + size_t len, size_t *retlen, const u8 *buffer) +{ + struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd); + struct efx_nic *efx = mtd->priv; + loff_t offset = start; + loff_t end = min_t(loff_t, start + len, mtd->size); + size_t chunk; + int rc = 0; + + if (!part->updating) { + rc = efx_mcdi_nvram_update_start(efx, part->nvram_type); + if (rc) + goto out; + part->updating = true; + } + + while (offset < end) { + chunk = min_t(size_t, end - offset, EFX_MCDI_NVRAM_LEN_MAX); + rc = efx_mcdi_nvram_write(efx, part->nvram_type, offset, + buffer, chunk); + if (rc) + goto out; + offset += chunk; + buffer += chunk; + } +out: + *retlen = offset - start; + return rc; +} + +int efx_mcdi_mtd_sync(struct mtd_info *mtd) +{ + struct efx_mcdi_mtd_partition *part = to_efx_mcdi_mtd_partition(mtd); + struct efx_nic *efx = mtd->priv; + int rc = 0; + + if (part->updating) { + part->updating = false; + rc = efx_mcdi_nvram_update_finish(efx, part->nvram_type); + } + + return rc; +} + +void efx_mcdi_mtd_rename(struct efx_mtd_partition *part) +{ + struct efx_mcdi_mtd_partition *mcdi_part = + container_of(part, struct efx_mcdi_mtd_partition, common); + struct efx_nic *efx = part->mtd.priv; + + snprintf(part->name, sizeof(part->name), "%s %s:%02x", + efx->name, part->type_name, mcdi_part->fw_subtype); +} + +#endif /* CONFIG_SFC_MTD */ diff --git a/drivers/net/ethernet/sfc/siena/mcdi.h b/drivers/net/ethernet/sfc/siena/mcdi.h new file mode 100644 index 000000000000..69c2924a147c --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mcdi.h @@ -0,0 +1,388 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2008-2013 Solarflare Communications Inc. + */ + +#ifndef EFX_MCDI_H +#define EFX_MCDI_H + +/** + * enum efx_mcdi_state - MCDI request handling state + * @MCDI_STATE_QUIESCENT: No pending MCDI requests. If the caller holds the + * mcdi @iface_lock then they are able to move to %MCDI_STATE_RUNNING + * @MCDI_STATE_RUNNING_SYNC: There is a synchronous MCDI request pending. + * Only the thread that moved into this state is allowed to move out of it. + * @MCDI_STATE_RUNNING_ASYNC: There is an asynchronous MCDI request pending. + * @MCDI_STATE_PROXY_WAIT: An MCDI request has completed with a response that + * indicates we must wait for a proxy try again message. + * @MCDI_STATE_COMPLETED: An MCDI request has completed, but the owning thread + * has not yet consumed the result. For all other threads, equivalent to + * %MCDI_STATE_RUNNING. + */ +enum efx_mcdi_state { + MCDI_STATE_QUIESCENT, + MCDI_STATE_RUNNING_SYNC, + MCDI_STATE_RUNNING_ASYNC, + MCDI_STATE_PROXY_WAIT, + MCDI_STATE_COMPLETED, +}; + +/** + * enum efx_mcdi_mode - MCDI transaction mode + * @MCDI_MODE_POLL: poll for MCDI completion, until timeout + * @MCDI_MODE_EVENTS: wait for an mcdi_event. On timeout, poll once + * @MCDI_MODE_FAIL: we think MCDI is dead, so fail-fast all calls + */ +enum efx_mcdi_mode { + MCDI_MODE_POLL, + MCDI_MODE_EVENTS, + MCDI_MODE_FAIL, +}; + +/** + * struct efx_mcdi_iface - MCDI protocol context + * @efx: The associated NIC. + * @state: Request handling state. Waited for by @wq. + * @mode: Poll for mcdi completion, or wait for an mcdi_event. + * @wq: Wait queue for threads waiting for @state != %MCDI_STATE_RUNNING + * @new_epoch: Indicates start of day or start of MC reboot recovery + * @iface_lock: Serialises access to @seqno, @credits and response metadata + * @seqno: The next sequence number to use for mcdi requests. + * @credits: Number of spurious MCDI completion events allowed before we + * trigger a fatal error + * @resprc: Response error/success code (Linux numbering) + * @resp_hdr_len: Response header length + * @resp_data_len: Response data (SDU or error) length + * @async_lock: Serialises access to @async_list while event processing is + * enabled + * @async_list: Queue of asynchronous requests + * @async_timer: Timer for asynchronous request timeout + * @logging_buffer: buffer that may be used to build MCDI tracing messages + * @logging_enabled: whether to trace MCDI + * @proxy_rx_handle: Most recently received proxy authorisation handle + * @proxy_rx_status: Status of most recent proxy authorisation + * @proxy_rx_wq: Wait queue for updates to proxy_rx_handle + */ +struct efx_mcdi_iface { + struct efx_nic *efx; + enum efx_mcdi_state state; + enum efx_mcdi_mode mode; + wait_queue_head_t wq; + spinlock_t iface_lock; + bool new_epoch; + unsigned int credits; + unsigned int seqno; + int resprc; + int resprc_raw; + size_t resp_hdr_len; + size_t resp_data_len; + spinlock_t async_lock; + struct list_head async_list; + struct timer_list async_timer; +#ifdef CONFIG_SFC_MCDI_LOGGING + char *logging_buffer; + bool logging_enabled; +#endif + unsigned int proxy_rx_handle; + int proxy_rx_status; + wait_queue_head_t proxy_rx_wq; +}; + +struct efx_mcdi_mon { + struct efx_buffer dma_buf; + struct mutex update_lock; + unsigned long last_update; + struct device *device; + struct efx_mcdi_mon_attribute *attrs; + struct attribute_group group; + const struct attribute_group *groups[2]; + unsigned int n_attrs; +}; + +struct efx_mcdi_mtd_partition { + struct efx_mtd_partition common; + bool updating; + u16 nvram_type; + u16 fw_subtype; +}; + +#define to_efx_mcdi_mtd_partition(mtd) \ + container_of(mtd, struct efx_mcdi_mtd_partition, common.mtd) + +/** + * struct efx_mcdi_data - extra state for NICs that implement MCDI + * @iface: Interface/protocol state + * @hwmon: Hardware monitor state + * @fn_flags: Flags for this function, as returned by %MC_CMD_DRV_ATTACH. + */ +struct efx_mcdi_data { + struct efx_mcdi_iface iface; +#ifdef CONFIG_SFC_MCDI_MON + struct efx_mcdi_mon hwmon; +#endif + u32 fn_flags; +}; + +static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) +{ + EFX_WARN_ON_PARANOID(!efx->mcdi); + return &efx->mcdi->iface; +} + +#ifdef CONFIG_SFC_MCDI_MON +static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx) +{ + EFX_WARN_ON_PARANOID(!efx->mcdi); + return &efx->mcdi->hwmon; +} +#endif + +int efx_mcdi_init(struct efx_nic *efx); +void efx_mcdi_detach(struct efx_nic *efx); +void efx_mcdi_fini(struct efx_nic *efx); + +int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const efx_dword_t *inbuf, + size_t inlen, efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual); +int efx_mcdi_rpc_quiet(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual); + +int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, + const efx_dword_t *inbuf, size_t inlen); +int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, + efx_dword_t *outbuf, size_t outlen, + size_t *outlen_actual); +int efx_mcdi_rpc_finish_quiet(struct efx_nic *efx, unsigned cmd, + size_t inlen, efx_dword_t *outbuf, + size_t outlen, size_t *outlen_actual); + +typedef void efx_mcdi_async_completer(struct efx_nic *efx, + unsigned long cookie, int rc, + efx_dword_t *outbuf, + size_t outlen_actual); +int efx_mcdi_rpc_async(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, size_t outlen, + efx_mcdi_async_completer *complete, + unsigned long cookie); +int efx_mcdi_rpc_async_quiet(struct efx_nic *efx, unsigned int cmd, + const efx_dword_t *inbuf, size_t inlen, + size_t outlen, + efx_mcdi_async_completer *complete, + unsigned long cookie); + +void efx_mcdi_display_error(struct efx_nic *efx, unsigned cmd, + size_t inlen, efx_dword_t *outbuf, + size_t outlen, int rc); + +int efx_mcdi_poll_reboot(struct efx_nic *efx); +void efx_mcdi_mode_poll(struct efx_nic *efx); +void efx_mcdi_mode_event(struct efx_nic *efx); +void efx_mcdi_flush_async(struct efx_nic *efx); + +void efx_mcdi_process_event(struct efx_channel *channel, efx_qword_t *event); +void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); + +/* We expect that 16- and 32-bit fields in MCDI requests and responses + * are appropriately aligned, but 64-bit fields are only + * 32-bit-aligned. Also, on Siena we must copy to the MC shared + * memory strictly 32 bits at a time, so add any necessary padding. + */ +#define MCDI_TX_BUF_LEN(_len) DIV_ROUND_UP((_len), 4) +#define _MCDI_DECLARE_BUF(_name, _len) \ + efx_dword_t _name[DIV_ROUND_UP(_len, 4)] +#define MCDI_DECLARE_BUF(_name, _len) \ + _MCDI_DECLARE_BUF(_name, _len) = {{{0}}} +#define MCDI_DECLARE_BUF_ERR(_name) \ + MCDI_DECLARE_BUF(_name, 8) +#define _MCDI_PTR(_buf, _offset) \ + ((u8 *)(_buf) + (_offset)) +#define MCDI_PTR(_buf, _field) \ + _MCDI_PTR(_buf, MC_CMD_ ## _field ## _OFST) +#define _MCDI_CHECK_ALIGN(_ofst, _align) \ + ((_ofst) + BUILD_BUG_ON_ZERO((_ofst) & (_align - 1))) +#define _MCDI_DWORD(_buf, _field) \ + ((_buf) + (_MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, 4) >> 2)) + +#define MCDI_BYTE(_buf, _field) \ + ((void)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 1), \ + *MCDI_PTR(_buf, _field)) +#define MCDI_WORD(_buf, _field) \ + ((u16)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2) + \ + le16_to_cpu(*(__force const __le16 *)MCDI_PTR(_buf, _field))) +#define MCDI_SET_DWORD(_buf, _field, _value) \ + EFX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), EFX_DWORD_0, _value) +#define MCDI_DWORD(_buf, _field) \ + EFX_DWORD_FIELD(*_MCDI_DWORD(_buf, _field), EFX_DWORD_0) +#define MCDI_POPULATE_DWORD_1(_buf, _field, _name1, _value1) \ + EFX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), \ + MC_CMD_ ## _name1, _value1) +#define MCDI_POPULATE_DWORD_2(_buf, _field, _name1, _value1, \ + _name2, _value2) \ + EFX_POPULATE_DWORD_2(*_MCDI_DWORD(_buf, _field), \ + MC_CMD_ ## _name1, _value1, \ + MC_CMD_ ## _name2, _value2) +#define MCDI_POPULATE_DWORD_3(_buf, _field, _name1, _value1, \ + _name2, _value2, _name3, _value3) \ + EFX_POPULATE_DWORD_3(*_MCDI_DWORD(_buf, _field), \ + MC_CMD_ ## _name1, _value1, \ + MC_CMD_ ## _name2, _value2, \ + MC_CMD_ ## _name3, _value3) +#define MCDI_POPULATE_DWORD_4(_buf, _field, _name1, _value1, \ + _name2, _value2, _name3, _value3, \ + _name4, _value4) \ + EFX_POPULATE_DWORD_4(*_MCDI_DWORD(_buf, _field), \ + MC_CMD_ ## _name1, _value1, \ + MC_CMD_ ## _name2, _value2, \ + MC_CMD_ ## _name3, _value3, \ + MC_CMD_ ## _name4, _value4) +#define MCDI_POPULATE_DWORD_5(_buf, _field, _name1, _value1, \ + _name2, _value2, _name3, _value3, \ + _name4, _value4, _name5, _value5) \ + EFX_POPULATE_DWORD_5(*_MCDI_DWORD(_buf, _field), \ + MC_CMD_ ## _name1, _value1, \ + MC_CMD_ ## _name2, _value2, \ + MC_CMD_ ## _name3, _value3, \ + MC_CMD_ ## _name4, _value4, \ + MC_CMD_ ## _name5, _value5) +#define MCDI_POPULATE_DWORD_6(_buf, _field, _name1, _value1, \ + _name2, _value2, _name3, _value3, \ + _name4, _value4, _name5, _value5, \ + _name6, _value6) \ + EFX_POPULATE_DWORD_6(*_MCDI_DWORD(_buf, _field), \ + MC_CMD_ ## _name1, _value1, \ + MC_CMD_ ## _name2, _value2, \ + MC_CMD_ ## _name3, _value3, \ + MC_CMD_ ## _name4, _value4, \ + MC_CMD_ ## _name5, _value5, \ + MC_CMD_ ## _name6, _value6) +#define MCDI_POPULATE_DWORD_7(_buf, _field, _name1, _value1, \ + _name2, _value2, _name3, _value3, \ + _name4, _value4, _name5, _value5, \ + _name6, _value6, _name7, _value7) \ + EFX_POPULATE_DWORD_7(*_MCDI_DWORD(_buf, _field), \ + MC_CMD_ ## _name1, _value1, \ + MC_CMD_ ## _name2, _value2, \ + MC_CMD_ ## _name3, _value3, \ + MC_CMD_ ## _name4, _value4, \ + MC_CMD_ ## _name5, _value5, \ + MC_CMD_ ## _name6, _value6, \ + MC_CMD_ ## _name7, _value7) +#define MCDI_SET_QWORD(_buf, _field, _value) \ + do { \ + EFX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[0], \ + EFX_DWORD_0, (u32)(_value)); \ + EFX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[1], \ + EFX_DWORD_0, (u64)(_value) >> 32); \ + } while (0) +#define MCDI_QWORD(_buf, _field) \ + (EFX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[0], EFX_DWORD_0) | \ + (u64)EFX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[1], EFX_DWORD_0) << 32) +#define MCDI_FIELD(_ptr, _type, _field) \ + EFX_EXTRACT_DWORD( \ + *(efx_dword_t *) \ + _MCDI_PTR(_ptr, MC_CMD_ ## _type ## _ ## _field ## _OFST & ~3),\ + MC_CMD_ ## _type ## _ ## _field ## _LBN & 0x1f, \ + (MC_CMD_ ## _type ## _ ## _field ## _LBN & 0x1f) + \ + MC_CMD_ ## _type ## _ ## _field ## _WIDTH - 1) + +#define _MCDI_ARRAY_PTR(_buf, _field, _index, _align) \ + (_MCDI_PTR(_buf, _MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, _align))\ + + (_index) * _MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _LEN, _align)) +#define MCDI_DECLARE_STRUCT_PTR(_name) \ + efx_dword_t *_name +#define MCDI_ARRAY_STRUCT_PTR(_buf, _field, _index) \ + ((efx_dword_t *)_MCDI_ARRAY_PTR(_buf, _field, _index, 4)) +#define MCDI_VAR_ARRAY_LEN(_len, _field) \ + min_t(size_t, MC_CMD_ ## _field ## _MAXNUM, \ + ((_len) - MC_CMD_ ## _field ## _OFST) / MC_CMD_ ## _field ## _LEN) +#define MCDI_ARRAY_WORD(_buf, _field, _index) \ + (BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2) + \ + le16_to_cpu(*(__force const __le16 *) \ + _MCDI_ARRAY_PTR(_buf, _field, _index, 2))) +#define _MCDI_ARRAY_DWORD(_buf, _field, _index) \ + (BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 4) + \ + (efx_dword_t *)_MCDI_ARRAY_PTR(_buf, _field, _index, 4)) +#define MCDI_SET_ARRAY_DWORD(_buf, _field, _index, _value) \ + EFX_SET_DWORD_FIELD(*_MCDI_ARRAY_DWORD(_buf, _field, _index), \ + EFX_DWORD_0, _value) +#define MCDI_ARRAY_DWORD(_buf, _field, _index) \ + EFX_DWORD_FIELD(*_MCDI_ARRAY_DWORD(_buf, _field, _index), EFX_DWORD_0) +#define _MCDI_ARRAY_QWORD(_buf, _field, _index) \ + (BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 8) + \ + (efx_dword_t *)_MCDI_ARRAY_PTR(_buf, _field, _index, 4)) +#define MCDI_SET_ARRAY_QWORD(_buf, _field, _index, _value) \ + do { \ + EFX_SET_DWORD_FIELD(_MCDI_ARRAY_QWORD(_buf, _field, _index)[0],\ + EFX_DWORD_0, (u32)(_value)); \ + EFX_SET_DWORD_FIELD(_MCDI_ARRAY_QWORD(_buf, _field, _index)[1],\ + EFX_DWORD_0, (u64)(_value) >> 32); \ + } while (0) +#define MCDI_ARRAY_FIELD(_buf, _field1, _type, _index, _field2) \ + MCDI_FIELD(MCDI_ARRAY_STRUCT_PTR(_buf, _field1, _index), \ + _type ## _TYPEDEF, _field2) + +#define MCDI_EVENT_FIELD(_ev, _field) \ + EFX_QWORD_FIELD(_ev, MCDI_EVENT_ ## _field) + +#define MCDI_CAPABILITY(field) \ + MC_CMD_GET_CAPABILITIES_V8_OUT_ ## field ## _LBN + +#define MCDI_CAPABILITY_OFST(field) \ + MC_CMD_GET_CAPABILITIES_V8_OUT_ ## field ## _OFST + +#define efx_has_cap(efx, field) \ + efx->type->check_caps(efx, \ + MCDI_CAPABILITY(field), \ + MCDI_CAPABILITY_OFST(field)) + +void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len); +int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address, + u16 *fw_subtype_list, u32 *capabilities); +int efx_mcdi_log_ctrl(struct efx_nic *efx, bool evq, bool uart, u32 dest_evq); +int efx_mcdi_nvram_types(struct efx_nic *efx, u32 *nvram_types_out); +int efx_mcdi_nvram_info(struct efx_nic *efx, unsigned int type, + size_t *size_out, size_t *erase_size_out, + bool *protected_out); +int efx_new_mcdi_nvram_test_all(struct efx_nic *efx); +int efx_mcdi_nvram_test_all(struct efx_nic *efx); +int efx_mcdi_handle_assertion(struct efx_nic *efx); +int efx_mcdi_set_id_led(struct efx_nic *efx, enum efx_led_mode mode); +int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx, const u8 *mac, + int *id_out); +int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out); +int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id); +int efx_mcdi_wol_filter_reset(struct efx_nic *efx); +int efx_mcdi_flush_rxqs(struct efx_nic *efx); +void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev); +void efx_mcdi_mac_start_stats(struct efx_nic *efx); +void efx_mcdi_mac_stop_stats(struct efx_nic *efx); +void efx_mcdi_mac_pull_stats(struct efx_nic *efx); +enum reset_type efx_mcdi_map_reset_reason(enum reset_type reason); +int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method); +int efx_mcdi_set_workaround(struct efx_nic *efx, u32 type, bool enabled, + unsigned int *flags); +int efx_mcdi_get_workarounds(struct efx_nic *efx, unsigned int *impl_out, + unsigned int *enabled_out); + +#ifdef CONFIG_SFC_MCDI_MON +int efx_mcdi_mon_probe(struct efx_nic *efx); +void efx_mcdi_mon_remove(struct efx_nic *efx); +#else +static inline int efx_mcdi_mon_probe(struct efx_nic *efx) { return 0; } +static inline void efx_mcdi_mon_remove(struct efx_nic *efx) {} +#endif + +#ifdef CONFIG_SFC_MTD +int efx_mcdi_mtd_read(struct mtd_info *mtd, loff_t start, size_t len, + size_t *retlen, u8 *buffer); +int efx_mcdi_mtd_erase(struct mtd_info *mtd, loff_t start, size_t len); +int efx_mcdi_mtd_write(struct mtd_info *mtd, loff_t start, size_t len, + size_t *retlen, const u8 *buffer); +int efx_mcdi_mtd_sync(struct mtd_info *mtd); +void efx_mcdi_mtd_rename(struct efx_mtd_partition *part); +#endif + +#endif /* EFX_MCDI_H */ diff --git a/drivers/net/ethernet/sfc/siena/mcdi_mon.c b/drivers/net/ethernet/sfc/siena/mcdi_mon.c new file mode 100644 index 000000000000..5954fcfee2b1 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mcdi_mon.c @@ -0,0 +1,531 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2011-2013 Solarflare Communications Inc. + */ + +#include <linux/bitops.h> +#include <linux/slab.h> +#include <linux/hwmon.h> +#include <linux/stat.h> + +#include "net_driver.h" +#include "mcdi.h" +#include "mcdi_pcol.h" +#include "nic.h" + +enum efx_hwmon_type { + EFX_HWMON_UNKNOWN, + EFX_HWMON_TEMP, /* temperature */ + EFX_HWMON_COOL, /* cooling device, probably a heatsink */ + EFX_HWMON_IN, /* voltage */ + EFX_HWMON_CURR, /* current */ + EFX_HWMON_POWER, /* power */ + EFX_HWMON_TYPES_COUNT +}; + +static const char *const efx_hwmon_unit[EFX_HWMON_TYPES_COUNT] = { + [EFX_HWMON_TEMP] = " degC", + [EFX_HWMON_COOL] = " rpm", /* though nonsense for a heatsink */ + [EFX_HWMON_IN] = " mV", + [EFX_HWMON_CURR] = " mA", + [EFX_HWMON_POWER] = " W", +}; + +static const struct { + const char *label; + enum efx_hwmon_type hwmon_type; + int port; +} efx_mcdi_sensor_type[] = { +#define SENSOR(name, label, hwmon_type, port) \ + [MC_CMD_SENSOR_##name] = { label, EFX_HWMON_ ## hwmon_type, port } + SENSOR(CONTROLLER_TEMP, "Controller board temp.", TEMP, -1), + SENSOR(PHY_COMMON_TEMP, "PHY temp.", TEMP, -1), + SENSOR(CONTROLLER_COOLING, "Controller heat sink", COOL, -1), + SENSOR(PHY0_TEMP, "PHY temp.", TEMP, 0), + SENSOR(PHY0_COOLING, "PHY heat sink", COOL, 0), + SENSOR(PHY1_TEMP, "PHY temp.", TEMP, 1), + SENSOR(PHY1_COOLING, "PHY heat sink", COOL, 1), + SENSOR(IN_1V0, "1.0V supply", IN, -1), + SENSOR(IN_1V2, "1.2V supply", IN, -1), + SENSOR(IN_1V8, "1.8V supply", IN, -1), + SENSOR(IN_2V5, "2.5V supply", IN, -1), + SENSOR(IN_3V3, "3.3V supply", IN, -1), + SENSOR(IN_12V0, "12.0V supply", IN, -1), + SENSOR(IN_1V2A, "1.2V analogue supply", IN, -1), + SENSOR(IN_VREF, "Ref. voltage", IN, -1), + SENSOR(OUT_VAOE, "AOE FPGA supply", IN, -1), + SENSOR(AOE_TEMP, "AOE FPGA temp.", TEMP, -1), + SENSOR(PSU_AOE_TEMP, "AOE regulator temp.", TEMP, -1), + SENSOR(PSU_TEMP, "Controller regulator temp.", + TEMP, -1), + SENSOR(FAN_0, "Fan 0", COOL, -1), + SENSOR(FAN_1, "Fan 1", COOL, -1), + SENSOR(FAN_2, "Fan 2", COOL, -1), + SENSOR(FAN_3, "Fan 3", COOL, -1), + SENSOR(FAN_4, "Fan 4", COOL, -1), + SENSOR(IN_VAOE, "AOE input supply", IN, -1), + SENSOR(OUT_IAOE, "AOE output current", CURR, -1), + SENSOR(IN_IAOE, "AOE input current", CURR, -1), + SENSOR(NIC_POWER, "Board power use", POWER, -1), + SENSOR(IN_0V9, "0.9V supply", IN, -1), + SENSOR(IN_I0V9, "0.9V supply current", CURR, -1), + SENSOR(IN_I1V2, "1.2V supply current", CURR, -1), + SENSOR(IN_0V9_ADC, "0.9V supply (ext. ADC)", IN, -1), + SENSOR(CONTROLLER_2_TEMP, "Controller board temp. 2", TEMP, -1), + SENSOR(VREG_INTERNAL_TEMP, "Regulator die temp.", TEMP, -1), + SENSOR(VREG_0V9_TEMP, "0.9V regulator temp.", TEMP, -1), + SENSOR(VREG_1V2_TEMP, "1.2V regulator temp.", TEMP, -1), + SENSOR(CONTROLLER_VPTAT, + "Controller PTAT voltage (int. ADC)", IN, -1), + SENSOR(CONTROLLER_INTERNAL_TEMP, + "Controller die temp. (int. ADC)", TEMP, -1), + SENSOR(CONTROLLER_VPTAT_EXTADC, + "Controller PTAT voltage (ext. ADC)", IN, -1), + SENSOR(CONTROLLER_INTERNAL_TEMP_EXTADC, + "Controller die temp. (ext. ADC)", TEMP, -1), + SENSOR(AMBIENT_TEMP, "Ambient temp.", TEMP, -1), + SENSOR(AIRFLOW, "Air flow raw", IN, -1), + SENSOR(VDD08D_VSS08D_CSR, "0.9V die (int. ADC)", IN, -1), + SENSOR(VDD08D_VSS08D_CSR_EXTADC, "0.9V die (ext. ADC)", IN, -1), + SENSOR(HOTPOINT_TEMP, "Controller board temp. (hotpoint)", TEMP, -1), +#undef SENSOR +}; + +static const char *const sensor_status_names[] = { + [MC_CMD_SENSOR_STATE_OK] = "OK", + [MC_CMD_SENSOR_STATE_WARNING] = "Warning", + [MC_CMD_SENSOR_STATE_FATAL] = "Fatal", + [MC_CMD_SENSOR_STATE_BROKEN] = "Device failure", + [MC_CMD_SENSOR_STATE_NO_READING] = "No reading", +}; + +void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev) +{ + unsigned int type, state, value; + enum efx_hwmon_type hwmon_type = EFX_HWMON_UNKNOWN; + const char *name = NULL, *state_txt, *unit; + + type = EFX_QWORD_FIELD(*ev, MCDI_EVENT_SENSOREVT_MONITOR); + state = EFX_QWORD_FIELD(*ev, MCDI_EVENT_SENSOREVT_STATE); + value = EFX_QWORD_FIELD(*ev, MCDI_EVENT_SENSOREVT_VALUE); + + /* Deal gracefully with the board having more drivers than we + * know about, but do not expect new sensor states. */ + if (type < ARRAY_SIZE(efx_mcdi_sensor_type)) { + name = efx_mcdi_sensor_type[type].label; + hwmon_type = efx_mcdi_sensor_type[type].hwmon_type; + } + if (!name) + name = "No sensor name available"; + EFX_WARN_ON_PARANOID(state >= ARRAY_SIZE(sensor_status_names)); + state_txt = sensor_status_names[state]; + EFX_WARN_ON_PARANOID(hwmon_type >= EFX_HWMON_TYPES_COUNT); + unit = efx_hwmon_unit[hwmon_type]; + if (!unit) + unit = ""; + + netif_err(efx, hw, efx->net_dev, + "Sensor %d (%s) reports condition '%s' for value %d%s\n", + type, name, state_txt, value, unit); +} + +#ifdef CONFIG_SFC_MCDI_MON + +struct efx_mcdi_mon_attribute { + struct device_attribute dev_attr; + unsigned int index; + unsigned int type; + enum efx_hwmon_type hwmon_type; + unsigned int limit_value; + char name[12]; +}; + +static int efx_mcdi_mon_update(struct efx_nic *efx) +{ + struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); + MCDI_DECLARE_BUF(inbuf, MC_CMD_READ_SENSORS_EXT_IN_LEN); + int rc; + + MCDI_SET_QWORD(inbuf, READ_SENSORS_EXT_IN_DMA_ADDR, + hwmon->dma_buf.dma_addr); + MCDI_SET_DWORD(inbuf, READ_SENSORS_EXT_IN_LENGTH, hwmon->dma_buf.len); + + rc = efx_mcdi_rpc(efx, MC_CMD_READ_SENSORS, + inbuf, sizeof(inbuf), NULL, 0, NULL); + if (rc == 0) + hwmon->last_update = jiffies; + return rc; +} + +static int efx_mcdi_mon_get_entry(struct device *dev, unsigned int index, + efx_dword_t *entry) +{ + struct efx_nic *efx = dev_get_drvdata(dev->parent); + struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); + int rc; + + BUILD_BUG_ON(MC_CMD_READ_SENSORS_OUT_LEN != 0); + + mutex_lock(&hwmon->update_lock); + + /* Use cached value if last update was < 1 s ago */ + if (time_before(jiffies, hwmon->last_update + HZ)) + rc = 0; + else + rc = efx_mcdi_mon_update(efx); + + /* Copy out the requested entry */ + *entry = ((efx_dword_t *)hwmon->dma_buf.addr)[index]; + + mutex_unlock(&hwmon->update_lock); + + return rc; +} + +static ssize_t efx_mcdi_mon_show_value(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct efx_mcdi_mon_attribute *mon_attr = + container_of(attr, struct efx_mcdi_mon_attribute, dev_attr); + efx_dword_t entry; + unsigned int value, state; + int rc; + + rc = efx_mcdi_mon_get_entry(dev, mon_attr->index, &entry); + if (rc) + return rc; + + state = EFX_DWORD_FIELD(entry, MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE); + if (state == MC_CMD_SENSOR_STATE_NO_READING) + return -EBUSY; + + value = EFX_DWORD_FIELD(entry, MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_VALUE); + + switch (mon_attr->hwmon_type) { + case EFX_HWMON_TEMP: + /* Convert temperature from degrees to milli-degrees Celsius */ + value *= 1000; + break; + case EFX_HWMON_POWER: + /* Convert power from watts to microwatts */ + value *= 1000000; + break; + default: + /* No conversion needed */ + break; + } + + return sprintf(buf, "%u\n", value); +} + +static ssize_t efx_mcdi_mon_show_limit(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct efx_mcdi_mon_attribute *mon_attr = + container_of(attr, struct efx_mcdi_mon_attribute, dev_attr); + unsigned int value; + + value = mon_attr->limit_value; + + switch (mon_attr->hwmon_type) { + case EFX_HWMON_TEMP: + /* Convert temperature from degrees to milli-degrees Celsius */ + value *= 1000; + break; + case EFX_HWMON_POWER: + /* Convert power from watts to microwatts */ + value *= 1000000; + break; + default: + /* No conversion needed */ + break; + } + + return sprintf(buf, "%u\n", value); +} + +static ssize_t efx_mcdi_mon_show_alarm(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct efx_mcdi_mon_attribute *mon_attr = + container_of(attr, struct efx_mcdi_mon_attribute, dev_attr); + efx_dword_t entry; + int state; + int rc; + + rc = efx_mcdi_mon_get_entry(dev, mon_attr->index, &entry); + if (rc) + return rc; + + state = EFX_DWORD_FIELD(entry, MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE); + return sprintf(buf, "%d\n", state != MC_CMD_SENSOR_STATE_OK); +} + +static ssize_t efx_mcdi_mon_show_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct efx_mcdi_mon_attribute *mon_attr = + container_of(attr, struct efx_mcdi_mon_attribute, dev_attr); + return sprintf(buf, "%s\n", + efx_mcdi_sensor_type[mon_attr->type].label); +} + +static void +efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name, + ssize_t (*reader)(struct device *, + struct device_attribute *, char *), + unsigned int index, unsigned int type, + unsigned int limit_value) +{ + struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); + struct efx_mcdi_mon_attribute *attr = &hwmon->attrs[hwmon->n_attrs]; + + strlcpy(attr->name, name, sizeof(attr->name)); + attr->index = index; + attr->type = type; + if (type < ARRAY_SIZE(efx_mcdi_sensor_type)) + attr->hwmon_type = efx_mcdi_sensor_type[type].hwmon_type; + else + attr->hwmon_type = EFX_HWMON_UNKNOWN; + attr->limit_value = limit_value; + sysfs_attr_init(&attr->dev_attr.attr); + attr->dev_attr.attr.name = attr->name; + attr->dev_attr.attr.mode = 0444; + attr->dev_attr.show = reader; + hwmon->group.attrs[hwmon->n_attrs++] = &attr->dev_attr.attr; +} + +int efx_mcdi_mon_probe(struct efx_nic *efx) +{ + unsigned int n_temp = 0, n_cool = 0, n_in = 0, n_curr = 0, n_power = 0; + struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); + MCDI_DECLARE_BUF(inbuf, MC_CMD_SENSOR_INFO_EXT_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_SENSOR_INFO_OUT_LENMAX); + unsigned int n_pages, n_sensors, n_attrs, page; + size_t outlen; + char name[12]; + u32 mask; + int rc, i, j, type; + + /* Find out how many sensors are present */ + n_sensors = 0; + page = 0; + do { + MCDI_SET_DWORD(inbuf, SENSOR_INFO_EXT_IN_PAGE, page); + + rc = efx_mcdi_rpc(efx, MC_CMD_SENSOR_INFO, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_SENSOR_INFO_OUT_LENMIN) + return -EIO; + + mask = MCDI_DWORD(outbuf, SENSOR_INFO_OUT_MASK); + n_sensors += hweight32(mask & ~(1 << MC_CMD_SENSOR_PAGE0_NEXT)); + ++page; + } while (mask & (1 << MC_CMD_SENSOR_PAGE0_NEXT)); + n_pages = page; + + /* Don't create a device if there are none */ + if (n_sensors == 0) + return 0; + + rc = efx_nic_alloc_buffer( + efx, &hwmon->dma_buf, + n_sensors * MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_LEN, + GFP_KERNEL); + if (rc) + return rc; + + mutex_init(&hwmon->update_lock); + efx_mcdi_mon_update(efx); + + /* Allocate space for the maximum possible number of + * attributes for this set of sensors: + * value, min, max, crit, alarm and label for each sensor. + */ + n_attrs = 6 * n_sensors; + hwmon->attrs = kcalloc(n_attrs, sizeof(*hwmon->attrs), GFP_KERNEL); + if (!hwmon->attrs) { + rc = -ENOMEM; + goto fail; + } + hwmon->group.attrs = kcalloc(n_attrs + 1, sizeof(struct attribute *), + GFP_KERNEL); + if (!hwmon->group.attrs) { + rc = -ENOMEM; + goto fail; + } + + for (i = 0, j = -1, type = -1; ; i++) { + enum efx_hwmon_type hwmon_type; + const char *hwmon_prefix; + unsigned hwmon_index; + u16 min1, max1, min2, max2; + + /* Find next sensor type or exit if there is none */ + do { + type++; + + if ((type % 32) == 0) { + page = type / 32; + j = -1; + if (page == n_pages) + goto hwmon_register; + + MCDI_SET_DWORD(inbuf, SENSOR_INFO_EXT_IN_PAGE, + page); + rc = efx_mcdi_rpc(efx, MC_CMD_SENSOR_INFO, + inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), + &outlen); + if (rc) + goto fail; + if (outlen < MC_CMD_SENSOR_INFO_OUT_LENMIN) { + rc = -EIO; + goto fail; + } + + mask = (MCDI_DWORD(outbuf, + SENSOR_INFO_OUT_MASK) & + ~(1 << MC_CMD_SENSOR_PAGE0_NEXT)); + + /* Check again for short response */ + if (outlen < + MC_CMD_SENSOR_INFO_OUT_LEN(hweight32(mask))) { + rc = -EIO; + goto fail; + } + } + } while (!(mask & (1 << type % 32))); + j++; + + if (type < ARRAY_SIZE(efx_mcdi_sensor_type)) { + hwmon_type = efx_mcdi_sensor_type[type].hwmon_type; + + /* Skip sensors specific to a different port */ + if (hwmon_type != EFX_HWMON_UNKNOWN && + efx_mcdi_sensor_type[type].port >= 0 && + efx_mcdi_sensor_type[type].port != + efx_port_num(efx)) + continue; + } else { + hwmon_type = EFX_HWMON_UNKNOWN; + } + + switch (hwmon_type) { + case EFX_HWMON_TEMP: + hwmon_prefix = "temp"; + hwmon_index = ++n_temp; /* 1-based */ + break; + case EFX_HWMON_COOL: + /* This is likely to be a heatsink, but there + * is no convention for representing cooling + * devices other than fans. + */ + hwmon_prefix = "fan"; + hwmon_index = ++n_cool; /* 1-based */ + break; + default: + hwmon_prefix = "in"; + hwmon_index = n_in++; /* 0-based */ + break; + case EFX_HWMON_CURR: + hwmon_prefix = "curr"; + hwmon_index = ++n_curr; /* 1-based */ + break; + case EFX_HWMON_POWER: + hwmon_prefix = "power"; + hwmon_index = ++n_power; /* 1-based */ + break; + } + + min1 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY, + SENSOR_INFO_ENTRY, j, MIN1); + max1 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY, + SENSOR_INFO_ENTRY, j, MAX1); + min2 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY, + SENSOR_INFO_ENTRY, j, MIN2); + max2 = MCDI_ARRAY_FIELD(outbuf, SENSOR_ENTRY, + SENSOR_INFO_ENTRY, j, MAX2); + + if (min1 != max1) { + snprintf(name, sizeof(name), "%s%u_input", + hwmon_prefix, hwmon_index); + efx_mcdi_mon_add_attr( + efx, name, efx_mcdi_mon_show_value, i, type, 0); + + if (hwmon_type != EFX_HWMON_POWER) { + snprintf(name, sizeof(name), "%s%u_min", + hwmon_prefix, hwmon_index); + efx_mcdi_mon_add_attr( + efx, name, efx_mcdi_mon_show_limit, + i, type, min1); + } + + snprintf(name, sizeof(name), "%s%u_max", + hwmon_prefix, hwmon_index); + efx_mcdi_mon_add_attr( + efx, name, efx_mcdi_mon_show_limit, + i, type, max1); + + if (min2 != max2) { + /* Assume max2 is critical value. + * But we have no good way to expose min2. + */ + snprintf(name, sizeof(name), "%s%u_crit", + hwmon_prefix, hwmon_index); + efx_mcdi_mon_add_attr( + efx, name, efx_mcdi_mon_show_limit, + i, type, max2); + } + } + + snprintf(name, sizeof(name), "%s%u_alarm", + hwmon_prefix, hwmon_index); + efx_mcdi_mon_add_attr( + efx, name, efx_mcdi_mon_show_alarm, i, type, 0); + + if (type < ARRAY_SIZE(efx_mcdi_sensor_type) && + efx_mcdi_sensor_type[type].label) { + snprintf(name, sizeof(name), "%s%u_label", + hwmon_prefix, hwmon_index); + efx_mcdi_mon_add_attr( + efx, name, efx_mcdi_mon_show_label, i, type, 0); + } + } + +hwmon_register: + hwmon->groups[0] = &hwmon->group; + hwmon->device = hwmon_device_register_with_groups(&efx->pci_dev->dev, + KBUILD_MODNAME, NULL, + hwmon->groups); + if (IS_ERR(hwmon->device)) { + rc = PTR_ERR(hwmon->device); + goto fail; + } + + return 0; + +fail: + efx_mcdi_mon_remove(efx); + return rc; +} + +void efx_mcdi_mon_remove(struct efx_nic *efx) +{ + struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); + + if (hwmon->device) + hwmon_device_unregister(hwmon->device); + kfree(hwmon->attrs); + kfree(hwmon->group.attrs); + efx_nic_free_buffer(efx, &hwmon->dma_buf); +} + +#endif /* CONFIG_SFC_MCDI_MON */ diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port.c b/drivers/net/ethernet/sfc/siena/mcdi_port.c new file mode 100644 index 000000000000..94c6a345c0b1 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mcdi_port.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2009-2013 Solarflare Communications Inc. + */ + +/* + * Driver for PHY related operations via MCDI. + */ + +#include <linux/slab.h> +#include "efx.h" +#include "mcdi_port.h" +#include "mcdi.h" +#include "mcdi_pcol.h" +#include "nic.h" +#include "selftest.h" +#include "mcdi_port_common.h" + +static int efx_mcdi_mdio_read(struct net_device *net_dev, + int prtad, int devad, u16 addr) +{ + struct efx_nic *efx = netdev_priv(net_dev); + MCDI_DECLARE_BUF(inbuf, MC_CMD_MDIO_READ_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_MDIO_READ_OUT_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, MDIO_READ_IN_BUS, efx->mdio_bus); + MCDI_SET_DWORD(inbuf, MDIO_READ_IN_PRTAD, prtad); + MCDI_SET_DWORD(inbuf, MDIO_READ_IN_DEVAD, devad); + MCDI_SET_DWORD(inbuf, MDIO_READ_IN_ADDR, addr); + + rc = efx_mcdi_rpc(efx, MC_CMD_MDIO_READ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (MCDI_DWORD(outbuf, MDIO_READ_OUT_STATUS) != + MC_CMD_MDIO_STATUS_GOOD) + return -EIO; + + return (u16)MCDI_DWORD(outbuf, MDIO_READ_OUT_VALUE); +} + +static int efx_mcdi_mdio_write(struct net_device *net_dev, + int prtad, int devad, u16 addr, u16 value) +{ + struct efx_nic *efx = netdev_priv(net_dev); + MCDI_DECLARE_BUF(inbuf, MC_CMD_MDIO_WRITE_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_MDIO_WRITE_OUT_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_BUS, efx->mdio_bus); + MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_PRTAD, prtad); + MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_DEVAD, devad); + MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_ADDR, addr); + MCDI_SET_DWORD(inbuf, MDIO_WRITE_IN_VALUE, value); + + rc = efx_mcdi_rpc(efx, MC_CMD_MDIO_WRITE, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (MCDI_DWORD(outbuf, MDIO_WRITE_OUT_STATUS) != + MC_CMD_MDIO_STATUS_GOOD) + return -EIO; + + return 0; +} + +u32 efx_mcdi_phy_get_caps(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_data = efx->phy_data; + + return phy_data->supported_cap; +} + +bool efx_mcdi_mac_check_fault(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); + size_t outlength; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), &outlength); + if (rc) + return true; + + return MCDI_DWORD(outbuf, GET_LINK_OUT_MAC_FAULT) != 0; +} + +int efx_mcdi_port_probe(struct efx_nic *efx) +{ + int rc; + + /* Set up MDIO structure for PHY */ + efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; + efx->mdio.mdio_read = efx_mcdi_mdio_read; + efx->mdio.mdio_write = efx_mcdi_mdio_write; + + /* Fill out MDIO structure, loopback modes, and initial link state */ + rc = efx_mcdi_phy_probe(efx); + if (rc != 0) + return rc; + + return efx_mcdi_mac_init_stats(efx); +} + +void efx_mcdi_port_remove(struct efx_nic *efx) +{ + efx_mcdi_phy_remove(efx); + efx_mcdi_mac_fini_stats(efx); +} diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port.h b/drivers/net/ethernet/sfc/siena/mcdi_port.h new file mode 100644 index 000000000000..07863ddbe740 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mcdi_port.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2008-2013 Solarflare Communications Inc. + * Copyright 2019-2020 Xilinx Inc. + */ + +#ifndef EFX_MCDI_PORT_H +#define EFX_MCDI_PORT_H + +#include "net_driver.h" + +u32 efx_mcdi_phy_get_caps(struct efx_nic *efx); +bool efx_mcdi_mac_check_fault(struct efx_nic *efx); +int efx_mcdi_port_probe(struct efx_nic *efx); +void efx_mcdi_port_remove(struct efx_nic *efx); + +#endif /* EFX_MCDI_PORT_H */ diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port_common.c b/drivers/net/ethernet/sfc/siena/mcdi_port_common.c new file mode 100644 index 000000000000..899cc1671004 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mcdi_port_common.c @@ -0,0 +1,1301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "mcdi_port_common.h" +#include "efx_common.h" +#include "nic.h" + +int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0); + BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name)); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) { + rc = -EIO; + goto fail; + } + + cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS); + cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE); + cfg->supported_cap = + MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP); + cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL); + cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT); + cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK); + memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME), + sizeof(cfg->name)); + cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE); + cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK); + memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION), + sizeof(cfg->revision)); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +void efx_link_set_advertising(struct efx_nic *efx, + const unsigned long *advertising) +{ + memcpy(efx->link_advertising, advertising, + sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); + + efx->link_advertising[0] |= ADVERTISED_Autoneg; + if (advertising[0] & ADVERTISED_Pause) + efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); + else + efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); + if (advertising[0] & ADVERTISED_Asym_Pause) + efx->wanted_fc ^= EFX_FC_TX; +} + +int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, + u32 flags, u32 loopback_mode, u32 loopback_speed) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN); + + BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0); + + MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed); + + return efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST + + MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) { + rc = -EIO; + goto fail; + } + + *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset) +{ + #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ + linkset) + + bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS); + switch (media) { + case MC_CMD_MEDIA_KX4: + SET_BIT(Backplane); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseKX_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseKX4_Full); + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + SET_BIT(40000baseKR4_Full); + break; + + case MC_CMD_MEDIA_XFP: + case MC_CMD_MEDIA_SFP_PLUS: + case MC_CMD_MEDIA_QSFP_PLUS: + SET_BIT(FIBRE); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) { + SET_BIT(1000baseT_Full); + SET_BIT(1000baseX_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) { + SET_BIT(10000baseCR_Full); + SET_BIT(10000baseLR_Full); + SET_BIT(10000baseSR_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) { + SET_BIT(40000baseCR4_Full); + SET_BIT(40000baseSR4_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) { + SET_BIT(100000baseCR4_Full); + SET_BIT(100000baseSR4_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) { + SET_BIT(25000baseCR_Full); + SET_BIT(25000baseSR_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN)) + SET_BIT(50000baseCR2_Full); + break; + + case MC_CMD_MEDIA_BASE_T: + SET_BIT(TP); + if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN)) + SET_BIT(10baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN)) + SET_BIT(10baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN)) + SET_BIT(100baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN)) + SET_BIT(100baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN)) + SET_BIT(1000baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseT_Full); + break; + } + + if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) + SET_BIT(Pause); + if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) + SET_BIT(Asym_Pause); + if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) + SET_BIT(Autoneg); + + #undef SET_BIT +} + +u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset) +{ + u32 result = 0; + + #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ + linkset) + + if (TEST_BIT(10baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN); + if (TEST_BIT(10baseT_Full)) + result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN); + if (TEST_BIT(100baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN); + if (TEST_BIT(100baseT_Full)) + result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN); + if (TEST_BIT(1000baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN); + if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full) || + TEST_BIT(1000baseX_Full)) + result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN); + if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full) || + TEST_BIT(10000baseCR_Full) || TEST_BIT(10000baseLR_Full) || + TEST_BIT(10000baseSR_Full)) + result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN); + if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full) || + TEST_BIT(40000baseSR4_Full)) + result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN); + if (TEST_BIT(100000baseCR4_Full) || TEST_BIT(100000baseSR4_Full)) + result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN); + if (TEST_BIT(25000baseCR_Full) || TEST_BIT(25000baseSR_Full)) + result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN); + if (TEST_BIT(50000baseCR2_Full)) + result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN); + if (TEST_BIT(Pause)) + result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN); + if (TEST_BIT(Asym_Pause)) + result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN); + if (TEST_BIT(Autoneg)) + result |= (1 << MC_CMD_PHY_CAP_AN_LBN); + + #undef TEST_BIT + + return result; +} + +u32 efx_get_mcdi_phy_flags(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + enum efx_phy_mode mode, supported; + u32 flags; + + /* TODO: Advertise the capabilities supported by this PHY */ + supported = 0; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN)) + supported |= PHY_MODE_TX_DISABLED; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN)) + supported |= PHY_MODE_LOW_POWER; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN)) + supported |= PHY_MODE_OFF; + + mode = efx->phy_mode & supported; + + flags = 0; + if (mode & PHY_MODE_TX_DISABLED) + flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN); + if (mode & PHY_MODE_LOW_POWER) + flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN); + if (mode & PHY_MODE_OFF) + flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN); + + return flags; +} + +u8 mcdi_to_ethtool_media(u32 media) +{ + switch (media) { + case MC_CMD_MEDIA_XAUI: + case MC_CMD_MEDIA_CX4: + case MC_CMD_MEDIA_KX4: + return PORT_OTHER; + + case MC_CMD_MEDIA_XFP: + case MC_CMD_MEDIA_SFP_PLUS: + case MC_CMD_MEDIA_QSFP_PLUS: + return PORT_FIBRE; + + case MC_CMD_MEDIA_BASE_T: + return PORT_TP; + + default: + return PORT_OTHER; + } +} + +void efx_mcdi_phy_decode_link(struct efx_nic *efx, + struct efx_link_state *link_state, + u32 speed, u32 flags, u32 fcntl) +{ + switch (fcntl) { + case MC_CMD_FCNTL_AUTO: + WARN_ON(1); /* This is not a link mode */ + link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX; + break; + case MC_CMD_FCNTL_BIDIR: + link_state->fc = EFX_FC_TX | EFX_FC_RX; + break; + case MC_CMD_FCNTL_RESPOND: + link_state->fc = EFX_FC_RX; + break; + default: + WARN_ON(1); + fallthrough; + case MC_CMD_FCNTL_OFF: + link_state->fc = 0; + break; + } + + link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN)); + link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN)); + link_state->speed = speed; +} + +/* The semantics of the ethtool FEC mode bitmask are not well defined, + * particularly the meaning of combinations of bits. Which means we get to + * define our own semantics, as follows: + * OFF overrides any other bits, and means "disable all FEC" (with the + * exception of 25G KR4/CR4, where it is not possible to reject it if AN + * partner requests it). + * AUTO on its own means use cable requirements and link partner autoneg with + * fw-default preferences for the cable type. + * AUTO and either RS or BASER means use the specified FEC type if cable and + * link partner support it, otherwise autoneg/fw-default. + * RS or BASER alone means use the specified FEC type if cable and link partner + * support it and either requests it, otherwise no FEC. + * Both RS and BASER (whether AUTO or not) means use FEC if cable and link + * partner support it, preferring RS to BASER. + */ +u32 ethtool_fec_caps_to_mcdi(u32 supported_cap, u32 ethtool_cap) +{ + u32 ret = 0; + + if (ethtool_cap & ETHTOOL_FEC_OFF) + return 0; + + if (ethtool_cap & ETHTOOL_FEC_AUTO) + ret |= ((1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_RS_FEC_LBN)) & supported_cap; + if (ethtool_cap & ETHTOOL_FEC_RS && + supported_cap & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN)) + ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN); + if (ethtool_cap & ETHTOOL_FEC_BASER) { + if (supported_cap & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN)) + ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN); + if (supported_cap & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)) + ret |= (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN); + } + return ret; +} + +/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function + * can never produce, (baser xor rs) and neither req; the implementation below + * maps both of those to AUTO. This should never matter, and it's not clear + * what a better mapping would be anyway. + */ +u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g) +{ + bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN), + rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN), + baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) + : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN), + baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN) + : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN); + + if (!baser && !rs) + return ETHTOOL_FEC_OFF; + return (rs_req ? ETHTOOL_FEC_RS : 0) | + (baser_req ? ETHTOOL_FEC_BASER : 0) | + (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO); +} + +/* Verify that the forced flow control settings (!EFX_FC_AUTO) are + * supported by the link partner. Warn the user if this isn't the case + */ +void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + u32 rmtadv; + + /* The link partner capabilities are only relevant if the + * link supports flow control autonegotiation + */ + if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) + return; + + /* If flow control autoneg is supported and enabled, then fine */ + if (efx->wanted_fc & EFX_FC_AUTO) + return; + + rmtadv = 0; + if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) + rmtadv |= ADVERTISED_Pause; + if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) + rmtadv |= ADVERTISED_Asym_Pause; + + if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause) + netif_err(efx, link, efx->net_dev, + "warning: link partner doesn't support pause frames"); +} + +bool efx_mcdi_phy_poll(struct efx_nic *efx) +{ + struct efx_link_state old_state = efx->link_state; + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); + int rc; + + WARN_ON(!mutex_is_locked(&efx->mac_lock)); + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + efx->link_state.up = false; + else + efx_mcdi_phy_decode_link( + efx, &efx->link_state, + MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED), + MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS), + MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL)); + + return !efx_link_state_equal(&efx->link_state, &old_state); +} + +int efx_mcdi_phy_probe(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_data; + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); + u32 caps; + int rc; + + /* Initialise and populate phy_data */ + phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL); + if (phy_data == NULL) + return -ENOMEM; + + rc = efx_mcdi_get_phy_cfg(efx, phy_data); + if (rc != 0) + goto fail; + + /* Read initial link advertisement */ + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + goto fail; + + /* Fill out nic state */ + efx->phy_data = phy_data; + efx->phy_type = phy_data->type; + + efx->mdio_bus = phy_data->channel; + efx->mdio.prtad = phy_data->port; + efx->mdio.mmds = phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22); + efx->mdio.mode_support = 0; + if (phy_data->mmd_mask & (1 << MC_CMD_MMD_CLAUSE22)) + efx->mdio.mode_support |= MDIO_SUPPORTS_C22; + if (phy_data->mmd_mask & ~(1 << MC_CMD_MMD_CLAUSE22)) + efx->mdio.mode_support |= MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22; + + caps = MCDI_DWORD(outbuf, GET_LINK_OUT_CAP); + if (caps & (1 << MC_CMD_PHY_CAP_AN_LBN)) + mcdi_to_ethtool_linkset(phy_data->media, caps, + efx->link_advertising); + else + phy_data->forced_cap = caps; + + /* Assert that we can map efx -> mcdi loopback modes */ + BUILD_BUG_ON(LOOPBACK_NONE != MC_CMD_LOOPBACK_NONE); + BUILD_BUG_ON(LOOPBACK_DATA != MC_CMD_LOOPBACK_DATA); + BUILD_BUG_ON(LOOPBACK_GMAC != MC_CMD_LOOPBACK_GMAC); + BUILD_BUG_ON(LOOPBACK_XGMII != MC_CMD_LOOPBACK_XGMII); + BUILD_BUG_ON(LOOPBACK_XGXS != MC_CMD_LOOPBACK_XGXS); + BUILD_BUG_ON(LOOPBACK_XAUI != MC_CMD_LOOPBACK_XAUI); + BUILD_BUG_ON(LOOPBACK_GMII != MC_CMD_LOOPBACK_GMII); + BUILD_BUG_ON(LOOPBACK_SGMII != MC_CMD_LOOPBACK_SGMII); + BUILD_BUG_ON(LOOPBACK_XGBR != MC_CMD_LOOPBACK_XGBR); + BUILD_BUG_ON(LOOPBACK_XFI != MC_CMD_LOOPBACK_XFI); + BUILD_BUG_ON(LOOPBACK_XAUI_FAR != MC_CMD_LOOPBACK_XAUI_FAR); + BUILD_BUG_ON(LOOPBACK_GMII_FAR != MC_CMD_LOOPBACK_GMII_FAR); + BUILD_BUG_ON(LOOPBACK_SGMII_FAR != MC_CMD_LOOPBACK_SGMII_FAR); + BUILD_BUG_ON(LOOPBACK_XFI_FAR != MC_CMD_LOOPBACK_XFI_FAR); + BUILD_BUG_ON(LOOPBACK_GPHY != MC_CMD_LOOPBACK_GPHY); + BUILD_BUG_ON(LOOPBACK_PHYXS != MC_CMD_LOOPBACK_PHYXS); + BUILD_BUG_ON(LOOPBACK_PCS != MC_CMD_LOOPBACK_PCS); + BUILD_BUG_ON(LOOPBACK_PMAPMD != MC_CMD_LOOPBACK_PMAPMD); + BUILD_BUG_ON(LOOPBACK_XPORT != MC_CMD_LOOPBACK_XPORT); + BUILD_BUG_ON(LOOPBACK_XGMII_WS != MC_CMD_LOOPBACK_XGMII_WS); + BUILD_BUG_ON(LOOPBACK_XAUI_WS != MC_CMD_LOOPBACK_XAUI_WS); + BUILD_BUG_ON(LOOPBACK_XAUI_WS_FAR != MC_CMD_LOOPBACK_XAUI_WS_FAR); + BUILD_BUG_ON(LOOPBACK_XAUI_WS_NEAR != MC_CMD_LOOPBACK_XAUI_WS_NEAR); + BUILD_BUG_ON(LOOPBACK_GMII_WS != MC_CMD_LOOPBACK_GMII_WS); + BUILD_BUG_ON(LOOPBACK_XFI_WS != MC_CMD_LOOPBACK_XFI_WS); + BUILD_BUG_ON(LOOPBACK_XFI_WS_FAR != MC_CMD_LOOPBACK_XFI_WS_FAR); + BUILD_BUG_ON(LOOPBACK_PHYXS_WS != MC_CMD_LOOPBACK_PHYXS_WS); + + rc = efx_mcdi_loopback_modes(efx, &efx->loopback_modes); + if (rc != 0) + goto fail; + /* The MC indicates that LOOPBACK_NONE is a valid loopback mode, + * but by convention we don't + */ + efx->loopback_modes &= ~(1 << LOOPBACK_NONE); + + /* Set the initial link mode */ + efx_mcdi_phy_decode_link(efx, &efx->link_state, + MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED), + MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS), + MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL)); + + /* Record the initial FEC configuration (or nearest approximation + * representable in the ethtool configuration space) + */ + efx->fec_config = mcdi_fec_caps_to_ethtool(caps, + efx->link_state.speed == 25000 || + efx->link_state.speed == 50000); + + /* Default to Autonegotiated flow control if the PHY supports it */ + efx->wanted_fc = EFX_FC_RX | EFX_FC_TX; + if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) + efx->wanted_fc |= EFX_FC_AUTO; + efx_link_set_wanted_fc(efx, efx->wanted_fc); + + return 0; + +fail: + kfree(phy_data); + return rc; +} + +void efx_mcdi_phy_remove(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_data = efx->phy_data; + + efx->phy_data = NULL; + kfree(phy_data); +} + +void efx_mcdi_phy_get_link_ksettings(struct efx_nic *efx, struct ethtool_link_ksettings *cmd) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); + int rc; + + cmd->base.speed = efx->link_state.speed; + cmd->base.duplex = efx->link_state.fd; + cmd->base.port = mcdi_to_ethtool_media(phy_cfg->media); + cmd->base.phy_address = phy_cfg->port; + cmd->base.autoneg = !!(efx->link_advertising[0] & ADVERTISED_Autoneg); + cmd->base.mdio_support = (efx->mdio.mode_support & + (MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22)); + + mcdi_to_ethtool_linkset(phy_cfg->media, phy_cfg->supported_cap, + cmd->link_modes.supported); + memcpy(cmd->link_modes.advertising, efx->link_advertising, + sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + return; + mcdi_to_ethtool_linkset(phy_cfg->media, + MCDI_DWORD(outbuf, GET_LINK_OUT_LP_CAP), + cmd->link_modes.lp_advertising); +} + +int efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, const struct ethtool_link_ksettings *cmd) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + u32 caps; + int rc; + + if (cmd->base.autoneg) { + caps = (ethtool_linkset_to_mcdi_cap(cmd->link_modes.advertising) | + 1 << MC_CMD_PHY_CAP_AN_LBN); + } else if (cmd->base.duplex) { + switch (cmd->base.speed) { + case 10: caps = 1 << MC_CMD_PHY_CAP_10FDX_LBN; break; + case 100: caps = 1 << MC_CMD_PHY_CAP_100FDX_LBN; break; + case 1000: caps = 1 << MC_CMD_PHY_CAP_1000FDX_LBN; break; + case 10000: caps = 1 << MC_CMD_PHY_CAP_10000FDX_LBN; break; + case 40000: caps = 1 << MC_CMD_PHY_CAP_40000FDX_LBN; break; + case 100000: caps = 1 << MC_CMD_PHY_CAP_100000FDX_LBN; break; + case 25000: caps = 1 << MC_CMD_PHY_CAP_25000FDX_LBN; break; + case 50000: caps = 1 << MC_CMD_PHY_CAP_50000FDX_LBN; break; + default: return -EINVAL; + } + } else { + switch (cmd->base.speed) { + case 10: caps = 1 << MC_CMD_PHY_CAP_10HDX_LBN; break; + case 100: caps = 1 << MC_CMD_PHY_CAP_100HDX_LBN; break; + case 1000: caps = 1 << MC_CMD_PHY_CAP_1000HDX_LBN; break; + default: return -EINVAL; + } + } + + caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, efx->fec_config); + + rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx), + efx->loopback_mode, 0); + if (rc) + return rc; + + if (cmd->base.autoneg) { + efx_link_set_advertising(efx, cmd->link_modes.advertising); + phy_cfg->forced_cap = 0; + } else { + efx_link_clear_advertising(efx); + phy_cfg->forced_cap = caps; + } + return 0; +} + +int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN); + u32 caps, active, speed; /* MCDI format */ + bool is_25g = false; + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN) + return -EOPNOTSUPP; + + /* behaviour for 25G/50G links depends on 25G BASER bit */ + speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED); + is_25g = speed == 25000 || speed == 50000; + + caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP); + fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g); + /* BASER is never supported on 100G */ + if (speed == 100000) + fec->fec &= ~ETHTOOL_FEC_BASER; + + active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE); + switch (active) { + case MC_CMD_FEC_NONE: + fec->active_fec = ETHTOOL_FEC_OFF; + break; + case MC_CMD_FEC_BASER: + fec->active_fec = ETHTOOL_FEC_BASER; + break; + case MC_CMD_FEC_RS: + fec->active_fec = ETHTOOL_FEC_RS; + break; + default: + netif_warn(efx, hw, efx->net_dev, + "Firmware reports unrecognised FEC_TYPE %u\n", + active); + /* We don't know what firmware has picked. AUTO is as good a + * "can't happen" value as any other. + */ + fec->active_fec = ETHTOOL_FEC_AUTO; + break; + } + + return 0; +} + +/* Basic validation to ensure that the caps we are going to attempt to set are + * in fact supported by the adapter. Note that 'no FEC' is always supported. + */ +static int ethtool_fec_supported(u32 supported_cap, u32 ethtool_cap) +{ + if (ethtool_cap & ETHTOOL_FEC_OFF) + return 0; + + if (ethtool_cap && + !ethtool_fec_caps_to_mcdi(supported_cap, ethtool_cap)) + return -EINVAL; + return 0; +} + +int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + u32 caps; + int rc; + + rc = ethtool_fec_supported(phy_cfg->supported_cap, fec->fec); + if (rc) + return rc; + + /* Work out what efx_mcdi_phy_set_link_ksettings() would produce from + * saved advertising bits + */ + if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising)) + caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) | + 1 << MC_CMD_PHY_CAP_AN_LBN); + else + caps = phy_cfg->forced_cap; + + caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, fec->fec); + rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx), + efx->loopback_mode, 0); + if (rc) + return rc; + + /* Record the new FEC setting for subsequent set_link calls */ + efx->fec_config = fec->fec; + return 0; +} + +int efx_mcdi_phy_test_alive(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN) + return -EIO; + if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK) + return -EINVAL; + + return 0; +} + +int efx_mcdi_port_reconfigure(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + u32 caps = (efx->link_advertising[0] ? + ethtool_linkset_to_mcdi_cap(efx->link_advertising) : + phy_cfg->forced_cap); + + caps |= ethtool_fec_caps_to_mcdi(phy_cfg->supported_cap, efx->fec_config); + + return efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx), + efx->loopback_mode, 0); +} + +static const char *const mcdi_sft9001_cable_diag_names[] = { + "cable.pairA.length", + "cable.pairB.length", + "cable.pairC.length", + "cable.pairD.length", + "cable.pairA.status", + "cable.pairB.status", + "cable.pairC.status", + "cable.pairD.status", +}; + +static int efx_mcdi_bist(struct efx_nic *efx, unsigned int bist_mode, + int *results) +{ + unsigned int retry, i, count = 0; + size_t outlen; + u32 status; + MCDI_DECLARE_BUF(inbuf, MC_CMD_START_BIST_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_POLL_BIST_OUT_SFT9001_LEN); + u8 *ptr; + int rc; + + BUILD_BUG_ON(MC_CMD_START_BIST_OUT_LEN != 0); + MCDI_SET_DWORD(inbuf, START_BIST_IN_TYPE, bist_mode); + rc = efx_mcdi_rpc(efx, MC_CMD_START_BIST, + inbuf, MC_CMD_START_BIST_IN_LEN, NULL, 0, NULL); + if (rc) + goto out; + + /* Wait up to 10s for BIST to finish */ + for (retry = 0; retry < 100; ++retry) { + BUILD_BUG_ON(MC_CMD_POLL_BIST_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_POLL_BIST, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto out; + + status = MCDI_DWORD(outbuf, POLL_BIST_OUT_RESULT); + if (status != MC_CMD_POLL_BIST_RUNNING) + goto finished; + + msleep(100); + } + + rc = -ETIMEDOUT; + goto out; + +finished: + results[count++] = (status == MC_CMD_POLL_BIST_PASSED) ? 1 : -1; + + /* SFT9001 specific cable diagnostics output */ + if (efx->phy_type == PHY_TYPE_SFT9001B && + (bist_mode == MC_CMD_PHY_BIST_CABLE_SHORT || + bist_mode == MC_CMD_PHY_BIST_CABLE_LONG)) { + ptr = MCDI_PTR(outbuf, POLL_BIST_OUT_SFT9001_CABLE_LENGTH_A); + if (status == MC_CMD_POLL_BIST_PASSED && + outlen >= MC_CMD_POLL_BIST_OUT_SFT9001_LEN) { + for (i = 0; i < 8; i++) { + results[count + i] = + EFX_DWORD_FIELD(((efx_dword_t *)ptr)[i], + EFX_DWORD_0); + } + } + count += 8; + } + rc = count; + +out: + return rc; +} + +int efx_mcdi_phy_run_tests(struct efx_nic *efx, int *results, unsigned int flags) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + u32 mode; + int rc; + + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) { + rc = efx_mcdi_bist(efx, MC_CMD_PHY_BIST, results); + if (rc < 0) + return rc; + + results += rc; + } + + /* If we support both LONG and SHORT, then run each in response to + * break or not. Otherwise, run the one we support + */ + mode = 0; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN)) { + if ((flags & ETH_TEST_FL_OFFLINE) && + (phy_cfg->flags & + (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))) + mode = MC_CMD_PHY_BIST_CABLE_LONG; + else + mode = MC_CMD_PHY_BIST_CABLE_SHORT; + } else if (phy_cfg->flags & + (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN)) + mode = MC_CMD_PHY_BIST_CABLE_LONG; + + if (mode != 0) { + rc = efx_mcdi_bist(efx, mode, results); + if (rc < 0) + return rc; + results += rc; + } + + return 0; +} + +const char *efx_mcdi_phy_test_name(struct efx_nic *efx, unsigned int index) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_LBN)) { + if (index == 0) + return "bist"; + --index; + } + + if (phy_cfg->flags & ((1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_SHORT_LBN) | + (1 << MC_CMD_GET_PHY_CFG_OUT_BIST_CABLE_LONG_LBN))) { + if (index == 0) + return "cable"; + --index; + + if (efx->phy_type == PHY_TYPE_SFT9001B) { + if (index < ARRAY_SIZE(mcdi_sft9001_cable_diag_names)) + return mcdi_sft9001_cable_diag_names[index]; + index -= ARRAY_SIZE(mcdi_sft9001_cable_diag_names); + } + } + + return NULL; +} + +#define SFP_PAGE_SIZE 128 +#define SFF_DIAG_TYPE_OFFSET 92 +#define SFF_DIAG_ADDR_CHANGE BIT(2) +#define SFF_8079_NUM_PAGES 2 +#define SFF_8472_NUM_PAGES 4 +#define SFF_8436_NUM_PAGES 5 +#define SFF_DMT_LEVEL_OFFSET 94 + +/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom + * @efx: NIC context + * @page: EEPROM page number + * @data: Destination data pointer + * @offset: Offset in page to copy from in to data + * @space: Space available in data + * + * Return: + * >=0 - amount of data copied + * <0 - error + */ +static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx, + unsigned int page, + u8 *data, ssize_t offset, + ssize_t space) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX); + MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN); + unsigned int payload_len; + unsigned int to_copy; + size_t outlen; + int rc; + + if (offset > SFP_PAGE_SIZE) + return -EINVAL; + + to_copy = min(space, SFP_PAGE_SIZE - offset); + + MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO, + inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), + &outlen); + + if (rc) + return rc; + + if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST + + SFP_PAGE_SIZE)) + return -EIO; + + payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN); + if (payload_len != SFP_PAGE_SIZE) + return -EIO; + + memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset, + to_copy); + + return to_copy; +} + +static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx, + unsigned int page, + u8 byte) +{ + u8 data; + int rc; + + rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1); + if (rc == 1) + return data; + + return rc; +} + +static int efx_mcdi_phy_diag_type(struct efx_nic *efx) +{ + /* Page zero of the EEPROM includes the diagnostic type at byte 92. */ + return efx_mcdi_phy_get_module_eeprom_byte(efx, 0, + SFF_DIAG_TYPE_OFFSET); +} + +static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx) +{ + /* Page zero of the EEPROM includes the DMT level at byte 94. */ + return efx_mcdi_phy_get_module_eeprom_byte(efx, 0, + SFF_DMT_LEVEL_OFFSET); +} + +static u32 efx_mcdi_phy_module_type(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_data = efx->phy_data; + + if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS) + return phy_data->media; + + /* A QSFP+ NIC may actually have an SFP+ module attached. + * The ID is page 0, byte 0. + */ + switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) { + case 0x3: + return MC_CMD_MEDIA_SFP_PLUS; + case 0xc: + case 0xd: + return MC_CMD_MEDIA_QSFP_PLUS; + default: + return 0; + } +} + +int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, struct ethtool_eeprom *ee, u8 *data) +{ + int rc; + ssize_t space_remaining = ee->len; + unsigned int page_off; + bool ignore_missing; + int num_pages; + int page; + + switch (efx_mcdi_phy_module_type(efx)) { + case MC_CMD_MEDIA_SFP_PLUS: + num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ? + SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES; + page = 0; + ignore_missing = false; + break; + case MC_CMD_MEDIA_QSFP_PLUS: + num_pages = SFF_8436_NUM_PAGES; + page = -1; /* We obtain the lower page by asking for -1. */ + ignore_missing = true; /* Ignore missing pages after page 0. */ + break; + default: + return -EOPNOTSUPP; + } + + page_off = ee->offset % SFP_PAGE_SIZE; + page += ee->offset / SFP_PAGE_SIZE; + + while (space_remaining && (page < num_pages)) { + rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, + data, page_off, + space_remaining); + + if (rc > 0) { + space_remaining -= rc; + data += rc; + page_off = 0; + page++; + } else if (rc == 0) { + space_remaining = 0; + } else if (ignore_missing && (page > 0)) { + int intended_size = SFP_PAGE_SIZE - page_off; + + space_remaining -= intended_size; + if (space_remaining < 0) { + space_remaining = 0; + } else { + memset(data, 0, intended_size); + data += intended_size; + page_off = 0; + page++; + rc = 0; + } + } else { + return rc; + } + } + + return 0; +} + +int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo) +{ + int sff_8472_level; + int diag_type; + + switch (efx_mcdi_phy_module_type(efx)) { + case MC_CMD_MEDIA_SFP_PLUS: + sff_8472_level = efx_mcdi_phy_sff_8472_level(efx); + + /* If we can't read the diagnostics level we have none. */ + if (sff_8472_level < 0) + return -EOPNOTSUPP; + + /* Check if this module requires the (unsupported) address + * change operation. + */ + diag_type = efx_mcdi_phy_diag_type(efx); + + if (sff_8472_level == 0 || + (diag_type & SFF_DIAG_ADDR_CHANGE)) { + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + break; + + case MC_CMD_MEDIA_QSFP_PLUS: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static unsigned int efx_calc_mac_mtu(struct efx_nic *efx) +{ + return EFX_MAX_FRAME_LEN(efx->net_dev->mtu); +} + +int efx_mcdi_set_mac(struct efx_nic *efx) +{ + u32 fcntl; + MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN); + + BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0); + + /* This has no effect on EF10 */ + ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR), + efx->net_dev->dev_addr); + + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU, efx_calc_mac_mtu(efx)); + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0); + + /* Set simple MAC filter for Siena */ + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT, + SET_MAC_IN_REJECT_UNCST, efx->unicast_filter); + + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS, + SET_MAC_IN_FLAG_INCLUDE_FCS, + !!(efx->net_dev->features & NETIF_F_RXFCS)); + + switch (efx->wanted_fc) { + case EFX_FC_RX | EFX_FC_TX: + fcntl = MC_CMD_FCNTL_BIDIR; + break; + case EFX_FC_RX: + fcntl = MC_CMD_FCNTL_RESPOND; + break; + default: + fcntl = MC_CMD_FCNTL_OFF; + break; + } + if (efx->wanted_fc & EFX_FC_AUTO) + fcntl = MC_CMD_FCNTL_AUTO; + if (efx->fc_disable) + fcntl = MC_CMD_FCNTL_OFF; + + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl); + + return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes), + NULL, 0, NULL); +} + +int efx_mcdi_set_mtu(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_MAC_EXT_IN_LEN); + + BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0); + + MCDI_SET_DWORD(inbuf, SET_MAC_EXT_IN_MTU, efx_calc_mac_mtu(efx)); + + MCDI_POPULATE_DWORD_1(inbuf, SET_MAC_EXT_IN_CONTROL, + SET_MAC_EXT_IN_CFG_MTU, 1); + + return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +enum efx_stats_action { + EFX_STATS_ENABLE, + EFX_STATS_DISABLE, + EFX_STATS_PULL, +}; + +static int efx_mcdi_mac_stats(struct efx_nic *efx, + enum efx_stats_action action, int clear) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAC_STATS_IN_LEN); + int rc; + int change = action == EFX_STATS_PULL ? 0 : 1; + int enable = action == EFX_STATS_ENABLE ? 1 : 0; + int period = action == EFX_STATS_ENABLE ? 1000 : 0; + dma_addr_t dma_addr = efx->stats_buffer.dma_addr; + u32 dma_len = action != EFX_STATS_DISABLE ? + efx->num_mac_stats * sizeof(u64) : 0; + + BUILD_BUG_ON(MC_CMD_MAC_STATS_OUT_DMA_LEN != 0); + + MCDI_SET_QWORD(inbuf, MAC_STATS_IN_DMA_ADDR, dma_addr); + MCDI_POPULATE_DWORD_7(inbuf, MAC_STATS_IN_CMD, + MAC_STATS_IN_DMA, !!enable, + MAC_STATS_IN_CLEAR, clear, + MAC_STATS_IN_PERIODIC_CHANGE, change, + MAC_STATS_IN_PERIODIC_ENABLE, enable, + MAC_STATS_IN_PERIODIC_CLEAR, 0, + MAC_STATS_IN_PERIODIC_NOEVENT, 1, + MAC_STATS_IN_PERIOD_MS, period); + MCDI_SET_DWORD(inbuf, MAC_STATS_IN_DMA_LEN, dma_len); + + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) + MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, efx->vport_id); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_MAC_STATS, inbuf, sizeof(inbuf), + NULL, 0, NULL); + /* Expect ENOENT if DMA queues have not been set up */ + if (rc && (rc != -ENOENT || atomic_read(&efx->active_queues))) + efx_mcdi_display_error(efx, MC_CMD_MAC_STATS, sizeof(inbuf), + NULL, 0, rc); + return rc; +} + +void efx_mcdi_mac_start_stats(struct efx_nic *efx) +{ + __le64 *dma_stats = efx->stats_buffer.addr; + + dma_stats[efx->num_mac_stats - 1] = EFX_MC_STATS_GENERATION_INVALID; + + efx_mcdi_mac_stats(efx, EFX_STATS_ENABLE, 0); +} + +void efx_mcdi_mac_stop_stats(struct efx_nic *efx) +{ + efx_mcdi_mac_stats(efx, EFX_STATS_DISABLE, 0); +} + +#define EFX_MAC_STATS_WAIT_US 100 +#define EFX_MAC_STATS_WAIT_ATTEMPTS 10 + +void efx_mcdi_mac_pull_stats(struct efx_nic *efx) +{ + __le64 *dma_stats = efx->stats_buffer.addr; + int attempts = EFX_MAC_STATS_WAIT_ATTEMPTS; + + dma_stats[efx->num_mac_stats - 1] = EFX_MC_STATS_GENERATION_INVALID; + efx_mcdi_mac_stats(efx, EFX_STATS_PULL, 0); + + while (dma_stats[efx->num_mac_stats - 1] == + EFX_MC_STATS_GENERATION_INVALID && + attempts-- != 0) + udelay(EFX_MAC_STATS_WAIT_US); +} + +int efx_mcdi_mac_init_stats(struct efx_nic *efx) +{ + int rc; + + if (!efx->num_mac_stats) + return 0; + + /* Allocate buffer for stats */ + rc = efx_nic_alloc_buffer(efx, &efx->stats_buffer, + efx->num_mac_stats * sizeof(u64), GFP_KERNEL); + if (rc) { + netif_warn(efx, probe, efx->net_dev, + "failed to allocate DMA buffer: %d\n", rc); + return rc; + } + + netif_dbg(efx, probe, efx->net_dev, + "stats buffer at %llx (virt %p phys %llx)\n", + (u64) efx->stats_buffer.dma_addr, + efx->stats_buffer.addr, + (u64) virt_to_phys(efx->stats_buffer.addr)); + + return 0; +} + +void efx_mcdi_mac_fini_stats(struct efx_nic *efx) +{ + efx_nic_free_buffer(efx, &efx->stats_buffer); +} + +/* Get physical port number (EF10 only; on Siena it is same as PF number) */ +int efx_mcdi_port_get_number(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN); + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + return rc; + + return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT); +} + +static unsigned int efx_mcdi_event_link_speed[] = { + [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100, + [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000, + [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000, + [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000, + [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000, + [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000, + [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000, +}; + +void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev) +{ + u32 flags, fcntl, speed, lpa; + + speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED); + EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed)); + speed = efx_mcdi_event_link_speed[speed]; + + flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS); + fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL); + lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP); + + /* efx->link_state is only modified by efx_mcdi_phy_get_link(), + * which is only run after flushing the event queues. Therefore, it + * is safe to modify the link state outside of the mac_lock here. + */ + efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl); + + efx_mcdi_phy_check_fcntl(efx, lpa); + + efx_link_status_changed(efx); +} diff --git a/drivers/net/ethernet/sfc/siena/mcdi_port_common.h b/drivers/net/ethernet/sfc/siena/mcdi_port_common.h new file mode 100644 index 000000000000..ed31690e591c --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mcdi_port_common.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#ifndef EFX_MCDI_PORT_COMMON_H +#define EFX_MCDI_PORT_COMMON_H + +#include "net_driver.h" +#include "mcdi.h" +#include "mcdi_pcol.h" + +struct efx_mcdi_phy_data { + u32 flags; + u32 type; + u32 supported_cap; + u32 channel; + u32 port; + u32 stats_mask; + u8 name[20]; + u32 media; + u32 mmd_mask; + u8 revision[20]; + u32 forced_cap; +}; + +int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg); +void efx_link_set_advertising(struct efx_nic *efx, + const unsigned long *advertising); +int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, + u32 flags, u32 loopback_mode, u32 loopback_speed); +int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes); +void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset); +u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset); +u32 efx_get_mcdi_phy_flags(struct efx_nic *efx); +u8 mcdi_to_ethtool_media(u32 media); +void efx_mcdi_phy_decode_link(struct efx_nic *efx, + struct efx_link_state *link_state, + u32 speed, u32 flags, u32 fcntl); +u32 ethtool_fec_caps_to_mcdi(u32 supported_cap, u32 ethtool_cap); +u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g); +void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa); +bool efx_mcdi_phy_poll(struct efx_nic *efx); +int efx_mcdi_phy_probe(struct efx_nic *efx); +void efx_mcdi_phy_remove(struct efx_nic *efx); +void efx_mcdi_phy_get_link_ksettings(struct efx_nic *efx, struct ethtool_link_ksettings *cmd); +int efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, const struct ethtool_link_ksettings *cmd); +int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec); +int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec); +int efx_mcdi_phy_test_alive(struct efx_nic *efx); +int efx_mcdi_port_reconfigure(struct efx_nic *efx); +int efx_mcdi_phy_run_tests(struct efx_nic *efx, int *results, unsigned int flags); +const char *efx_mcdi_phy_test_name(struct efx_nic *efx, unsigned int index); +int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx, struct ethtool_eeprom *ee, u8 *data); +int efx_mcdi_phy_get_module_info(struct efx_nic *efx, struct ethtool_modinfo *modinfo); +int efx_mcdi_set_mac(struct efx_nic *efx); +int efx_mcdi_set_mtu(struct efx_nic *efx); +int efx_mcdi_mac_init_stats(struct efx_nic *efx); +void efx_mcdi_mac_fini_stats(struct efx_nic *efx); +int efx_mcdi_port_get_number(struct efx_nic *efx); +void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev); + +#endif diff --git a/drivers/net/ethernet/sfc/siena/mtd.c b/drivers/net/ethernet/sfc/siena/mtd.c new file mode 100644 index 000000000000..273c08e5455f --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/mtd.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2013 Solarflare Communications Inc. + */ + +#include <linux/module.h> +#include <linux/mtd/mtd.h> +#include <linux/slab.h> +#include <linux/rtnetlink.h> + +#include "net_driver.h" +#include "efx.h" + +#define to_efx_mtd_partition(mtd) \ + container_of(mtd, struct efx_mtd_partition, mtd) + +/* MTD interface */ + +static int efx_mtd_erase(struct mtd_info *mtd, struct erase_info *erase) +{ + struct efx_nic *efx = mtd->priv; + + return efx->type->mtd_erase(mtd, erase->addr, erase->len); +} + +static void efx_mtd_sync(struct mtd_info *mtd) +{ + struct efx_mtd_partition *part = to_efx_mtd_partition(mtd); + struct efx_nic *efx = mtd->priv; + int rc; + + rc = efx->type->mtd_sync(mtd); + if (rc) + pr_err("%s: %s sync failed (%d)\n", + part->name, part->dev_type_name, rc); +} + +static void efx_mtd_remove_partition(struct efx_mtd_partition *part) +{ + int rc; + + for (;;) { + rc = mtd_device_unregister(&part->mtd); + if (rc != -EBUSY) + break; + ssleep(1); + } + WARN_ON(rc); + list_del(&part->node); +} + +int efx_mtd_add(struct efx_nic *efx, struct efx_mtd_partition *parts, + size_t n_parts, size_t sizeof_part) +{ + struct efx_mtd_partition *part; + size_t i; + + for (i = 0; i < n_parts; i++) { + part = (struct efx_mtd_partition *)((char *)parts + + i * sizeof_part); + + part->mtd.writesize = 1; + + if (!(part->mtd.flags & MTD_NO_ERASE)) + part->mtd.flags |= MTD_WRITEABLE; + + part->mtd.owner = THIS_MODULE; + part->mtd.priv = efx; + part->mtd.name = part->name; + part->mtd._erase = efx_mtd_erase; + part->mtd._read = efx->type->mtd_read; + part->mtd._write = efx->type->mtd_write; + part->mtd._sync = efx_mtd_sync; + + efx->type->mtd_rename(part); + + if (mtd_device_register(&part->mtd, NULL, 0)) + goto fail; + + /* Add to list in order - efx_mtd_remove() depends on this */ + list_add_tail(&part->node, &efx->mtd_list); + } + + return 0; + +fail: + while (i--) { + part = (struct efx_mtd_partition *)((char *)parts + + i * sizeof_part); + efx_mtd_remove_partition(part); + } + /* Failure is unlikely here, but probably means we're out of memory */ + return -ENOMEM; +} + +void efx_mtd_remove(struct efx_nic *efx) +{ + struct efx_mtd_partition *parts, *part, *next; + + WARN_ON(efx_dev_registered(efx)); + + if (list_empty(&efx->mtd_list)) + return; + + parts = list_first_entry(&efx->mtd_list, struct efx_mtd_partition, + node); + + list_for_each_entry_safe(part, next, &efx->mtd_list, node) + efx_mtd_remove_partition(part); + + kfree(parts); +} + +void efx_mtd_rename(struct efx_nic *efx) +{ + struct efx_mtd_partition *part; + + ASSERT_RTNL(); + + list_for_each_entry(part, &efx->mtd_list, node) + efx->type->mtd_rename(part); +} diff --git a/drivers/net/ethernet/sfc/siena/net_driver.h b/drivers/net/ethernet/sfc/siena/net_driver.h new file mode 100644 index 000000000000..318db906a154 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/net_driver.h @@ -0,0 +1,1716 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2013 Solarflare Communications Inc. + */ + +/* Common definitions for all Efx net driver code */ + +#ifndef EFX_NET_DRIVER_H +#define EFX_NET_DRIVER_H + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/if_vlan.h> +#include <linux/timer.h> +#include <linux/mdio.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/device.h> +#include <linux/highmem.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/vmalloc.h> +#include <linux/mtd/mtd.h> +#include <net/busy_poll.h> +#include <net/xdp.h> + +#include "enum.h" +#include "bitfield.h" +#include "filter.h" + +/************************************************************************** + * + * Build definitions + * + **************************************************************************/ + +#ifdef DEBUG +#define EFX_WARN_ON_ONCE_PARANOID(x) WARN_ON_ONCE(x) +#define EFX_WARN_ON_PARANOID(x) WARN_ON(x) +#else +#define EFX_WARN_ON_ONCE_PARANOID(x) do {} while (0) +#define EFX_WARN_ON_PARANOID(x) do {} while (0) +#endif + +/************************************************************************** + * + * Efx data structures + * + **************************************************************************/ + +#define EFX_MAX_CHANNELS 32U +#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS +#define EFX_EXTRA_CHANNEL_IOV 0 +#define EFX_EXTRA_CHANNEL_PTP 1 +#define EFX_MAX_EXTRA_CHANNELS 2U + +/* Checksum generation is a per-queue option in hardware, so each + * queue visible to the networking core is backed by two hardware TX + * queues. */ +#define EFX_MAX_TX_TC 2 +#define EFX_MAX_CORE_TX_QUEUES (EFX_MAX_TX_TC * EFX_MAX_CHANNELS) +#define EFX_TXQ_TYPE_OUTER_CSUM 1 /* Outer checksum offload */ +#define EFX_TXQ_TYPE_INNER_CSUM 2 /* Inner checksum offload */ +#define EFX_TXQ_TYPE_HIGHPRI 4 /* High-priority (for TC) */ +#define EFX_TXQ_TYPES 8 +/* HIGHPRI is Siena-only, and INNER_CSUM is EF10, so no need for both */ +#define EFX_MAX_TXQ_PER_CHANNEL 4 +#define EFX_MAX_TX_QUEUES (EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_CHANNELS) + +/* Maximum possible MTU the driver supports */ +#define EFX_MAX_MTU (9 * 1024) + +/* Minimum MTU, from RFC791 (IP) */ +#define EFX_MIN_MTU 68 + +/* Maximum total header length for TSOv2 */ +#define EFX_TSO2_MAX_HDRLEN 208 + +/* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page, + * and should be a multiple of the cache line size. + */ +#define EFX_RX_USR_BUF_SIZE (2048 - 256) + +/* If possible, we should ensure cache line alignment at start and end + * of every buffer. Otherwise, we just need to ensure 4-byte + * alignment of the network header. + */ +#if NET_IP_ALIGN == 0 +#define EFX_RX_BUF_ALIGNMENT L1_CACHE_BYTES +#else +#define EFX_RX_BUF_ALIGNMENT 4 +#endif + +/* Non-standard XDP_PACKET_HEADROOM and tailroom to satisfy XDP_REDIRECT and + * still fit two standard MTU size packets into a single 4K page. + */ +#define EFX_XDP_HEADROOM 128 +#define EFX_XDP_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + +/* Forward declare Precision Time Protocol (PTP) support structure. */ +struct efx_ptp_data; +struct hwtstamp_config; + +struct efx_self_tests; + +/** + * struct efx_buffer - A general-purpose DMA buffer + * @addr: host base address of the buffer + * @dma_addr: DMA base address of the buffer + * @len: Buffer length, in bytes + * + * The NIC uses these buffers for its interrupt status registers and + * MAC stats dumps. + */ +struct efx_buffer { + void *addr; + dma_addr_t dma_addr; + unsigned int len; +}; + +/** + * struct efx_special_buffer - DMA buffer entered into buffer table + * @buf: Standard &struct efx_buffer + * @index: Buffer index within controller;s buffer table + * @entries: Number of buffer table entries + * + * The NIC has a buffer table that maps buffers of size %EFX_BUF_SIZE. + * Event and descriptor rings are addressed via one or more buffer + * table entries (and so can be physically non-contiguous, although we + * currently do not take advantage of that). On Falcon and Siena we + * have to take care of allocating and initialising the entries + * ourselves. On later hardware this is managed by the firmware and + * @index and @entries are left as 0. + */ +struct efx_special_buffer { + struct efx_buffer buf; + unsigned int index; + unsigned int entries; +}; + +/** + * struct efx_tx_buffer - buffer state for a TX descriptor + * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be + * freed when descriptor completes + * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data + * member is the associated buffer to drop a page reference on. + * @option: When @flags & %EFX_TX_BUF_OPTION, an EF10-specific option + * descriptor. + * @dma_addr: DMA address of the fragment. + * @flags: Flags for allocation and DMA mapping type + * @len: Length of this fragment. + * This field is zero when the queue slot is empty. + * @unmap_len: Length of this fragment to unmap + * @dma_offset: Offset of @dma_addr from the address of the backing DMA mapping. + * Only valid if @unmap_len != 0. + */ +struct efx_tx_buffer { + union { + const struct sk_buff *skb; + struct xdp_frame *xdpf; + }; + union { + efx_qword_t option; /* EF10 */ + dma_addr_t dma_addr; + }; + unsigned short flags; + unsigned short len; + unsigned short unmap_len; + unsigned short dma_offset; +}; +#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */ +#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ +#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ +#define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */ +#define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */ +#define EFX_TX_BUF_TSO_V3 0x40 /* empty buffer for a TSO_V3 descriptor */ + +/** + * struct efx_tx_queue - An Efx TX queue + * + * This is a ring buffer of TX fragments. + * Since the TX completion path always executes on the same + * CPU and the xmit path can operate on different CPUs, + * performance is increased by ensuring that the completion + * path and the xmit path operate on different cache lines. + * This is particularly important if the xmit path is always + * executing on one CPU which is different from the completion + * path. There is also a cache line for members which are + * read but not written on the fast path. + * + * @efx: The associated Efx NIC + * @queue: DMA queue number + * @label: Label for TX completion events. + * Is our index within @channel->tx_queue array. + * @type: configuration type of this TX queue. A bitmask of %EFX_TXQ_TYPE_* flags. + * @tso_version: Version of TSO in use for this queue. + * @tso_encap: Is encapsulated TSO supported? Supported in TSOv2 on 8000 series. + * @channel: The associated channel + * @core_txq: The networking core TX queue structure + * @buffer: The software buffer ring + * @cb_page: Array of pages of copy buffers. Carved up according to + * %EFX_TX_CB_ORDER into %EFX_TX_CB_SIZE-sized chunks. + * @txd: The hardware descriptor ring + * @ptr_mask: The size of the ring minus 1. + * @piobuf: PIO buffer region for this TX queue (shared with its partner). + * Size of the region is efx_piobuf_size. + * @piobuf_offset: Buffer offset to be specified in PIO descriptors + * @initialised: Has hardware queue been initialised? + * @timestamping: Is timestamping enabled for this channel? + * @xdp_tx: Is this an XDP tx queue? + * @read_count: Current read pointer. + * This is the number of buffers that have been removed from both rings. + * @old_write_count: The value of @write_count when last checked. + * This is here for performance reasons. The xmit path will + * only get the up-to-date value of @write_count if this + * variable indicates that the queue is empty. This is to + * avoid cache-line ping-pong between the xmit path and the + * completion path. + * @merge_events: Number of TX merged completion events + * @completed_timestamp_major: Top part of the most recent tx timestamp. + * @completed_timestamp_minor: Low part of the most recent tx timestamp. + * @insert_count: Current insert pointer + * This is the number of buffers that have been added to the + * software ring. + * @write_count: Current write pointer + * This is the number of buffers that have been added to the + * hardware ring. + * @packet_write_count: Completable write pointer + * This is the write pointer of the last packet written. + * Normally this will equal @write_count, but as option descriptors + * don't produce completion events, they won't update this. + * Filled in iff @efx->type->option_descriptors; only used for PIO. + * Thus, this is written and used on EF10, and neither on farch. + * @old_read_count: The value of read_count when last checked. + * This is here for performance reasons. The xmit path will + * only get the up-to-date value of read_count if this + * variable indicates that the queue is full. This is to + * avoid cache-line ping-pong between the xmit path and the + * completion path. + * @tso_bursts: Number of times TSO xmit invoked by kernel + * @tso_long_headers: Number of packets with headers too long for standard + * blocks + * @tso_packets: Number of packets via the TSO xmit path + * @tso_fallbacks: Number of times TSO fallback used + * @pushes: Number of times the TX push feature has been used + * @pio_packets: Number of times the TX PIO feature has been used + * @xmit_pending: Are any packets waiting to be pushed to the NIC + * @cb_packets: Number of times the TX copybreak feature has been used + * @notify_count: Count of notified descriptors to the NIC + * @empty_read_count: If the completion path has seen the queue as empty + * and the transmission path has not yet checked this, the value of + * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. + */ +struct efx_tx_queue { + /* Members which don't change on the fast path */ + struct efx_nic *efx ____cacheline_aligned_in_smp; + unsigned int queue; + unsigned int label; + unsigned int type; + unsigned int tso_version; + bool tso_encap; + struct efx_channel *channel; + struct netdev_queue *core_txq; + struct efx_tx_buffer *buffer; + struct efx_buffer *cb_page; + struct efx_special_buffer txd; + unsigned int ptr_mask; + void __iomem *piobuf; + unsigned int piobuf_offset; + bool initialised; + bool timestamping; + bool xdp_tx; + + /* Members used mainly on the completion path */ + unsigned int read_count ____cacheline_aligned_in_smp; + unsigned int old_write_count; + unsigned int merge_events; + unsigned int bytes_compl; + unsigned int pkts_compl; + u32 completed_timestamp_major; + u32 completed_timestamp_minor; + + /* Members used only on the xmit path */ + unsigned int insert_count ____cacheline_aligned_in_smp; + unsigned int write_count; + unsigned int packet_write_count; + unsigned int old_read_count; + unsigned int tso_bursts; + unsigned int tso_long_headers; + unsigned int tso_packets; + unsigned int tso_fallbacks; + unsigned int pushes; + unsigned int pio_packets; + bool xmit_pending; + unsigned int cb_packets; + unsigned int notify_count; + /* Statistics to supplement MAC stats */ + unsigned long tx_packets; + + /* Members shared between paths and sometimes updated */ + unsigned int empty_read_count ____cacheline_aligned_in_smp; +#define EFX_EMPTY_COUNT_VALID 0x80000000 + atomic_t flush_outstanding; +}; + +#define EFX_TX_CB_ORDER 7 +#define EFX_TX_CB_SIZE (1 << EFX_TX_CB_ORDER) - NET_IP_ALIGN + +/** + * struct efx_rx_buffer - An Efx RX data buffer + * @dma_addr: DMA base address of the buffer + * @page: The associated page buffer. + * Will be %NULL if the buffer slot is currently free. + * @page_offset: If pending: offset in @page of DMA base address. + * If completed: offset in @page of Ethernet header. + * @len: If pending: length for DMA descriptor. + * If completed: received length, excluding hash prefix. + * @flags: Flags for buffer and packet state. These are only set on the + * first buffer of a scattered packet. + */ +struct efx_rx_buffer { + dma_addr_t dma_addr; + struct page *page; + u16 page_offset; + u16 len; + u16 flags; +}; +#define EFX_RX_BUF_LAST_IN_PAGE 0x0001 +#define EFX_RX_PKT_CSUMMED 0x0002 +#define EFX_RX_PKT_DISCARD 0x0004 +#define EFX_RX_PKT_TCP 0x0040 +#define EFX_RX_PKT_PREFIX_LEN 0x0080 /* length is in prefix only */ +#define EFX_RX_PKT_CSUM_LEVEL 0x0200 + +/** + * struct efx_rx_page_state - Page-based rx buffer state + * + * Inserted at the start of every page allocated for receive buffers. + * Used to facilitate sharing dma mappings between recycled rx buffers + * and those passed up to the kernel. + * + * @dma_addr: The dma address of this page. + */ +struct efx_rx_page_state { + dma_addr_t dma_addr; + + unsigned int __pad[] ____cacheline_aligned; +}; + +/** + * struct efx_rx_queue - An Efx RX queue + * @efx: The associated Efx NIC + * @core_index: Index of network core RX queue. Will be >= 0 iff this + * is associated with a real RX queue. + * @buffer: The software buffer ring + * @rxd: The hardware descriptor ring + * @ptr_mask: The size of the ring minus 1. + * @refill_enabled: Enable refill whenever fill level is low + * @flush_pending: Set when a RX flush is pending. Has the same lifetime as + * @rxq_flush_pending. + * @added_count: Number of buffers added to the receive queue. + * @notified_count: Number of buffers given to NIC (<= @added_count). + * @removed_count: Number of buffers removed from the receive queue. + * @scatter_n: Used by NIC specific receive code. + * @scatter_len: Used by NIC specific receive code. + * @page_ring: The ring to store DMA mapped pages for reuse. + * @page_add: Counter to calculate the write pointer for the recycle ring. + * @page_remove: Counter to calculate the read pointer for the recycle ring. + * @page_recycle_count: The number of pages that have been recycled. + * @page_recycle_failed: The number of pages that couldn't be recycled because + * the kernel still held a reference to them. + * @page_recycle_full: The number of pages that were released because the + * recycle ring was full. + * @page_ptr_mask: The number of pages in the RX recycle ring minus 1. + * @max_fill: RX descriptor maximum fill level (<= ring size) + * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill + * (<= @max_fill) + * @min_fill: RX descriptor minimum non-zero fill level. + * This records the minimum fill level observed when a ring + * refill was triggered. + * @recycle_count: RX buffer recycle counter. + * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). + * @xdp_rxq_info: XDP specific RX queue information. + * @xdp_rxq_info_valid: Is xdp_rxq_info valid data?. + */ +struct efx_rx_queue { + struct efx_nic *efx; + int core_index; + struct efx_rx_buffer *buffer; + struct efx_special_buffer rxd; + unsigned int ptr_mask; + bool refill_enabled; + bool flush_pending; + + unsigned int added_count; + unsigned int notified_count; + unsigned int removed_count; + unsigned int scatter_n; + unsigned int scatter_len; + struct page **page_ring; + unsigned int page_add; + unsigned int page_remove; + unsigned int page_recycle_count; + unsigned int page_recycle_failed; + unsigned int page_recycle_full; + unsigned int page_ptr_mask; + unsigned int max_fill; + unsigned int fast_fill_trigger; + unsigned int min_fill; + unsigned int min_overfill; + unsigned int recycle_count; + struct timer_list slow_fill; + unsigned int slow_fill_count; + /* Statistics to supplement MAC stats */ + unsigned long rx_packets; + struct xdp_rxq_info xdp_rxq_info; + bool xdp_rxq_info_valid; +}; + +enum efx_sync_events_state { + SYNC_EVENTS_DISABLED = 0, + SYNC_EVENTS_QUIESCENT, + SYNC_EVENTS_REQUESTED, + SYNC_EVENTS_VALID, +}; + +/** + * struct efx_channel - An Efx channel + * + * A channel comprises an event queue, at least one TX queue, at least + * one RX queue, and an associated tasklet for processing the event + * queue. + * + * @efx: Associated Efx NIC + * @channel: Channel instance number + * @type: Channel type definition + * @eventq_init: Event queue initialised flag + * @enabled: Channel enabled indicator + * @irq: IRQ number (MSI and MSI-X only) + * @irq_moderation_us: IRQ moderation value (in microseconds) + * @napi_dev: Net device used with NAPI + * @napi_str: NAPI control structure + * @state: state for NAPI vs busy polling + * @state_lock: lock protecting @state + * @eventq: Event queue buffer + * @eventq_mask: Event queue pointer mask + * @eventq_read_ptr: Event queue read pointer + * @event_test_cpu: Last CPU to handle interrupt or test event for this channel + * @irq_count: Number of IRQs since last adaptive moderation decision + * @irq_mod_score: IRQ moderation score + * @rfs_filter_count: number of accelerated RFS filters currently in place; + * equals the count of @rps_flow_id slots filled + * @rfs_last_expiry: value of jiffies last time some accelerated RFS filters + * were checked for expiry + * @rfs_expire_index: next accelerated RFS filter ID to check for expiry + * @n_rfs_succeeded: number of successful accelerated RFS filter insertions + * @n_rfs_failed: number of failed accelerated RFS filter insertions + * @filter_work: Work item for efx_filter_rfs_expire() + * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, + * indexed by filter ID + * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors + * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors + * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors + * @n_rx_mcast_mismatch: Count of unmatched multicast frames + * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors + * @n_rx_overlength: Count of RX_OVERLENGTH errors + * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun + * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to + * lack of descriptors + * @n_rx_merge_events: Number of RX merged completion events + * @n_rx_merge_packets: Number of RX packets completed by merged events + * @n_rx_xdp_drops: Count of RX packets intentionally dropped due to XDP + * @n_rx_xdp_bad_drops: Count of RX packets dropped due to XDP errors + * @n_rx_xdp_tx: Count of RX packets retransmitted due to XDP + * @n_rx_xdp_redirect: Count of RX packets redirected to a different NIC by XDP + * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by + * __efx_rx_packet(), or zero if there is none + * @rx_pkt_index: Ring index of first buffer for next packet to be delivered + * by __efx_rx_packet(), if @rx_pkt_n_frags != 0 + * @rx_list: list of SKBs from current RX, awaiting processing + * @rx_queue: RX queue for this channel + * @tx_queue: TX queues for this channel + * @tx_queue_by_type: pointers into @tx_queue, or %NULL, indexed by txq type + * @sync_events_state: Current state of sync events on this channel + * @sync_timestamp_major: Major part of the last ptp sync event + * @sync_timestamp_minor: Minor part of the last ptp sync event + */ +struct efx_channel { + struct efx_nic *efx; + int channel; + const struct efx_channel_type *type; + bool eventq_init; + bool enabled; + int irq; + unsigned int irq_moderation_us; + struct net_device *napi_dev; + struct napi_struct napi_str; +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned long busy_poll_state; +#endif + struct efx_special_buffer eventq; + unsigned int eventq_mask; + unsigned int eventq_read_ptr; + int event_test_cpu; + + unsigned int irq_count; + unsigned int irq_mod_score; +#ifdef CONFIG_RFS_ACCEL + unsigned int rfs_filter_count; + unsigned int rfs_last_expiry; + unsigned int rfs_expire_index; + unsigned int n_rfs_succeeded; + unsigned int n_rfs_failed; + struct delayed_work filter_work; +#define RPS_FLOW_ID_INVALID 0xFFFFFFFF + u32 *rps_flow_id; +#endif + + unsigned int n_rx_tobe_disc; + unsigned int n_rx_ip_hdr_chksum_err; + unsigned int n_rx_tcp_udp_chksum_err; + unsigned int n_rx_outer_ip_hdr_chksum_err; + unsigned int n_rx_outer_tcp_udp_chksum_err; + unsigned int n_rx_inner_ip_hdr_chksum_err; + unsigned int n_rx_inner_tcp_udp_chksum_err; + unsigned int n_rx_eth_crc_err; + unsigned int n_rx_mcast_mismatch; + unsigned int n_rx_frm_trunc; + unsigned int n_rx_overlength; + unsigned int n_skbuff_leaks; + unsigned int n_rx_nodesc_trunc; + unsigned int n_rx_merge_events; + unsigned int n_rx_merge_packets; + unsigned int n_rx_xdp_drops; + unsigned int n_rx_xdp_bad_drops; + unsigned int n_rx_xdp_tx; + unsigned int n_rx_xdp_redirect; + + unsigned int rx_pkt_n_frags; + unsigned int rx_pkt_index; + + struct list_head *rx_list; + + struct efx_rx_queue rx_queue; + struct efx_tx_queue tx_queue[EFX_MAX_TXQ_PER_CHANNEL]; + struct efx_tx_queue *tx_queue_by_type[EFX_TXQ_TYPES]; + + enum efx_sync_events_state sync_events_state; + u32 sync_timestamp_major; + u32 sync_timestamp_minor; +}; + +/** + * struct efx_msi_context - Context for each MSI + * @efx: The associated NIC + * @index: Index of the channel/IRQ + * @name: Name of the channel/IRQ + * + * Unlike &struct efx_channel, this is never reallocated and is always + * safe for the IRQ handler to access. + */ +struct efx_msi_context { + struct efx_nic *efx; + unsigned int index; + char name[IFNAMSIZ + 6]; +}; + +/** + * struct efx_channel_type - distinguishes traffic and extra channels + * @handle_no_channel: Handle failure to allocate an extra channel + * @pre_probe: Set up extra state prior to initialisation + * @post_remove: Tear down extra state after finalisation, if allocated. + * May be called on channels that have not been probed. + * @get_name: Generate the channel's name (used for its IRQ handler) + * @copy: Copy the channel state prior to reallocation. May be %NULL if + * reallocation is not supported. + * @receive_skb: Handle an skb ready to be passed to netif_receive_skb() + * @want_txqs: Determine whether this channel should have TX queues + * created. If %NULL, TX queues are not created. + * @keep_eventq: Flag for whether event queue should be kept initialised + * while the device is stopped + * @want_pio: Flag for whether PIO buffers should be linked to this + * channel's TX queues. + */ +struct efx_channel_type { + void (*handle_no_channel)(struct efx_nic *); + int (*pre_probe)(struct efx_channel *); + void (*post_remove)(struct efx_channel *); + void (*get_name)(struct efx_channel *, char *buf, size_t len); + struct efx_channel *(*copy)(const struct efx_channel *); + bool (*receive_skb)(struct efx_channel *, struct sk_buff *); + bool (*want_txqs)(struct efx_channel *); + bool keep_eventq; + bool want_pio; +}; + +enum efx_led_mode { + EFX_LED_OFF = 0, + EFX_LED_ON = 1, + EFX_LED_DEFAULT = 2 +}; + +#define STRING_TABLE_LOOKUP(val, member) \ + ((val) < member ## _max) ? member ## _names[val] : "(invalid)" + +extern const char *const efx_loopback_mode_names[]; +extern const unsigned int efx_loopback_mode_max; +#define LOOPBACK_MODE(efx) \ + STRING_TABLE_LOOKUP((efx)->loopback_mode, efx_loopback_mode) + +enum efx_int_mode { + /* Be careful if altering to correct macro below */ + EFX_INT_MODE_MSIX = 0, + EFX_INT_MODE_MSI = 1, + EFX_INT_MODE_LEGACY = 2, + EFX_INT_MODE_MAX /* Insert any new items before this */ +}; +#define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) + +enum nic_state { + STATE_UNINIT = 0, /* device being probed/removed or is frozen */ + STATE_READY = 1, /* hardware ready and netdev registered */ + STATE_DISABLED = 2, /* device disabled due to hardware errors */ + STATE_RECOVERY = 3, /* device recovering from PCI error */ +}; + +/* Forward declaration */ +struct efx_nic; + +/* Pseudo bit-mask flow control field */ +#define EFX_FC_RX FLOW_CTRL_RX +#define EFX_FC_TX FLOW_CTRL_TX +#define EFX_FC_AUTO 4 + +/** + * struct efx_link_state - Current state of the link + * @up: Link is up + * @fd: Link is full-duplex + * @fc: Actual flow control flags + * @speed: Link speed (Mbps) + */ +struct efx_link_state { + bool up; + bool fd; + u8 fc; + unsigned int speed; +}; + +static inline bool efx_link_state_equal(const struct efx_link_state *left, + const struct efx_link_state *right) +{ + return left->up == right->up && left->fd == right->fd && + left->fc == right->fc && left->speed == right->speed; +} + +/** + * enum efx_phy_mode - PHY operating mode flags + * @PHY_MODE_NORMAL: on and should pass traffic + * @PHY_MODE_TX_DISABLED: on with TX disabled + * @PHY_MODE_LOW_POWER: set to low power through MDIO + * @PHY_MODE_OFF: switched off through external control + * @PHY_MODE_SPECIAL: on but will not pass traffic + */ +enum efx_phy_mode { + PHY_MODE_NORMAL = 0, + PHY_MODE_TX_DISABLED = 1, + PHY_MODE_LOW_POWER = 2, + PHY_MODE_OFF = 4, + PHY_MODE_SPECIAL = 8, +}; + +static inline bool efx_phy_mode_disabled(enum efx_phy_mode mode) +{ + return !!(mode & ~PHY_MODE_TX_DISABLED); +} + +/** + * struct efx_hw_stat_desc - Description of a hardware statistic + * @name: Name of the statistic as visible through ethtool, or %NULL if + * it should not be exposed + * @dma_width: Width in bits (0 for non-DMA statistics) + * @offset: Offset within stats (ignored for non-DMA statistics) + */ +struct efx_hw_stat_desc { + const char *name; + u16 dma_width; + u16 offset; +}; + +/* Number of bits used in a multicast filter hash address */ +#define EFX_MCAST_HASH_BITS 8 + +/* Number of (single-bit) entries in a multicast filter hash */ +#define EFX_MCAST_HASH_ENTRIES (1 << EFX_MCAST_HASH_BITS) + +/* An Efx multicast filter hash */ +union efx_multicast_hash { + u8 byte[EFX_MCAST_HASH_ENTRIES / 8]; + efx_oword_t oword[EFX_MCAST_HASH_ENTRIES / sizeof(efx_oword_t) / 8]; +}; + +struct vfdi_status; + +/* The reserved RSS context value */ +#define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff +/** + * struct efx_rss_context - A user-defined RSS context for filtering + * @list: node of linked list on which this struct is stored + * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or + * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC. + * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID. + * @user_id: the rss_context ID exposed to userspace over ethtool. + * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled + * @rx_hash_key: Toeplitz hash key for this RSS context + * @indir_table: Indirection table for this RSS context + */ +struct efx_rss_context { + struct list_head list; + u32 context_id; + u32 user_id; + bool rx_hash_udp_4tuple; + u8 rx_hash_key[40]; + u32 rx_indir_table[128]; +}; + +#ifdef CONFIG_RFS_ACCEL +/* Order of these is important, since filter_id >= %EFX_ARFS_FILTER_ID_PENDING + * is used to test if filter does or will exist. + */ +#define EFX_ARFS_FILTER_ID_PENDING -1 +#define EFX_ARFS_FILTER_ID_ERROR -2 +#define EFX_ARFS_FILTER_ID_REMOVING -3 +/** + * struct efx_arfs_rule - record of an ARFS filter and its IDs + * @node: linkage into hash table + * @spec: details of the filter (used as key for hash table). Use efx->type to + * determine which member to use. + * @rxq_index: channel to which the filter will steer traffic. + * @arfs_id: filter ID which was returned to ARFS + * @filter_id: index in software filter table. May be + * %EFX_ARFS_FILTER_ID_PENDING if filter was not inserted yet, + * %EFX_ARFS_FILTER_ID_ERROR if filter insertion failed, or + * %EFX_ARFS_FILTER_ID_REMOVING if expiry is currently removing the filter. + */ +struct efx_arfs_rule { + struct hlist_node node; + struct efx_filter_spec spec; + u16 rxq_index; + u16 arfs_id; + s32 filter_id; +}; + +/* Size chosen so that the table is one page (4kB) */ +#define EFX_ARFS_HASH_TABLE_SIZE 512 + +/** + * struct efx_async_filter_insertion - Request to asynchronously insert a filter + * @net_dev: Reference to the netdevice + * @spec: The filter to insert + * @work: Workitem for this request + * @rxq_index: Identifies the channel for which this request was made + * @flow_id: Identifies the kernel-side flow for which this request was made + */ +struct efx_async_filter_insertion { + struct net_device *net_dev; + struct efx_filter_spec spec; + struct work_struct work; + u16 rxq_index; + u32 flow_id; +}; + +/* Maximum number of ARFS workitems that may be in flight on an efx_nic */ +#define EFX_RPS_MAX_IN_FLIGHT 8 +#endif /* CONFIG_RFS_ACCEL */ + +enum efx_xdp_tx_queues_mode { + EFX_XDP_TX_QUEUES_DEDICATED, /* one queue per core, locking not needed */ + EFX_XDP_TX_QUEUES_SHARED, /* each queue used by more than 1 core */ + EFX_XDP_TX_QUEUES_BORROWED /* queues borrowed from net stack */ +}; + +/** + * struct efx_nic - an Efx NIC + * @name: Device name (net device name or bus id before net device registered) + * @pci_dev: The PCI device + * @node: List node for maintaning primary/secondary function lists + * @primary: &struct efx_nic instance for the primary function of this + * controller. May be the same structure, and may be %NULL if no + * primary function is bound. Serialised by rtnl_lock. + * @secondary_list: List of &struct efx_nic instances for the secondary PCI + * functions of the controller, if this is for the primary function. + * Serialised by rtnl_lock. + * @type: Controller type attributes + * @legacy_irq: IRQ number + * @workqueue: Workqueue for port reconfigures and the HW monitor. + * Work items do not hold and must not acquire RTNL. + * @workqueue_name: Name of workqueue + * @reset_work: Scheduled reset workitem + * @membase_phys: Memory BAR value as physical address + * @membase: Memory BAR value + * @vi_stride: step between per-VI registers / memory regions + * @interrupt_mode: Interrupt mode + * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds + * @timer_max_ns: Interrupt timer maximum value, in nanoseconds + * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues + * @irqs_hooked: Channel interrupts are hooked + * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues + * @irq_rx_moderation_us: IRQ moderation time for RX event queues + * @msg_enable: Log message enable flags + * @state: Device state number (%STATE_*). Serialised by the rtnl_lock. + * @reset_pending: Bitmask for pending resets + * @tx_queue: TX DMA queues + * @rx_queue: RX DMA queues + * @channel: Channels + * @msi_context: Context for each MSI + * @extra_channel_types: Types of extra (non-traffic) channels that + * should be allocated for this NIC + * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues. + * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit. + * @xdp_txq_queues_mode: XDP TX queues sharing strategy. + * @rxq_entries: Size of receive queues requested by user. + * @txq_entries: Size of transmit queues requested by user. + * @txq_stop_thresh: TX queue fill level at or above which we stop it. + * @txq_wake_thresh: TX queue fill level at or below which we wake it. + * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches + * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches + * @sram_lim_qw: Qword address limit of SRAM + * @next_buffer_table: First available buffer table id + * @n_channels: Number of channels in use + * @n_rx_channels: Number of channels used for RX (= number of RX queues) + * @n_tx_channels: Number of channels used for TX + * @n_extra_tx_channels: Number of extra channels with TX queues + * @tx_queues_per_channel: number of TX queues probed on each channel + * @n_xdp_channels: Number of channels used for XDP TX + * @xdp_channel_offset: Offset of zeroth channel used for XPD TX. + * @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel. + * @rx_ip_align: RX DMA address offset to have IP header aligned in + * in accordance with NET_IP_ALIGN + * @rx_dma_len: Current maximum RX DMA length + * @rx_buffer_order: Order (log2) of number of pages for each RX buffer + * @rx_buffer_truesize: Amortised allocation size of an RX buffer, + * for use in sk_buff::truesize + * @rx_prefix_size: Size of RX prefix before packet data + * @rx_packet_hash_offset: Offset of RX flow hash from start of packet data + * (valid only if @rx_prefix_size != 0; always negative) + * @rx_packet_len_offset: Offset of RX packet length from start of packet data + * (valid only for NICs that set %EFX_RX_PKT_PREFIX_LEN; always negative) + * @rx_packet_ts_offset: Offset of timestamp from start of packet data + * (valid only if channel->sync_timestamps_enabled; always negative) + * @rx_scatter: Scatter mode enabled for receives + * @rss_context: Main RSS context. Its @list member is the head of the list of + * RSS contexts created by user requests + * @rss_lock: Protects custom RSS context software state in @rss_context.list + * @vport_id: The function's vport ID, only relevant for PFs + * @int_error_count: Number of internal errors seen recently + * @int_error_expire: Time at which error count will be expired + * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot + * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will + * acknowledge but do nothing else. + * @irq_status: Interrupt status buffer + * @irq_zero_count: Number of legacy IRQs seen with queue flags == 0 + * @irq_level: IRQ level/index for IRQs not triggered by an event queue + * @selftest_work: Work item for asynchronous self-test + * @mtd_list: List of MTDs attached to the NIC + * @nic_data: Hardware dependent state + * @mcdi: Management-Controller-to-Driver Interface state + * @mac_lock: MAC access lock. Protects @port_enabled, @phy_mode, + * efx_monitor() and efx_reconfigure_port() + * @port_enabled: Port enabled indicator. + * Serialises efx_stop_all(), efx_start_all(), efx_monitor() and + * efx_mac_work() with kernel interfaces. Safe to read under any + * one of the rtnl_lock, mac_lock, or netif_tx_lock, but all three must + * be held to modify it. + * @port_initialized: Port initialized? + * @net_dev: Operating system network device. Consider holding the rtnl lock + * @fixed_features: Features which cannot be turned off + * @num_mac_stats: Number of MAC stats reported by firmware (MAC_STATS_NUM_STATS + * field of %MC_CMD_GET_CAPABILITIES_V4 response, or %MC_CMD_MAC_NSTATS) + * @stats_buffer: DMA buffer for statistics + * @phy_type: PHY type + * @phy_data: PHY private data (including PHY-specific stats) + * @mdio: PHY MDIO interface + * @mdio_bus: PHY MDIO bus ID (only used by Siena) + * @phy_mode: PHY operating mode. Serialised by @mac_lock. + * @link_advertising: Autonegotiation advertising flags + * @fec_config: Forward Error Correction configuration flags. For bit positions + * see &enum ethtool_fec_config_bits. + * @link_state: Current state of the link + * @n_link_state_changes: Number of times the link has changed state + * @unicast_filter: Flag for Falcon-arch simple unicast filter. + * Protected by @mac_lock. + * @multicast_hash: Multicast hash table for Falcon-arch. + * Protected by @mac_lock. + * @wanted_fc: Wanted flow control flags + * @fc_disable: When non-zero flow control is disabled. Typically used to + * ensure that network back pressure doesn't delay dma queue flushes. + * Serialised by the rtnl lock. + * @mac_work: Work item for changing MAC promiscuity and multicast hash + * @loopback_mode: Loopback status + * @loopback_modes: Supported loopback mode bitmask + * @loopback_selftest: Offline self-test private state + * @xdp_prog: Current XDP programme for this interface + * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state + * @filter_state: Architecture-dependent filter table state + * @rps_mutex: Protects RPS state of all channels + * @rps_slot_map: bitmap of in-flight entries in @rps_slot + * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work() + * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and + * @rps_next_id). + * @rps_hash_table: Mapping between ARFS filters and their various IDs + * @rps_next_id: next arfs_id for an ARFS filter + * @active_queues: Count of RX and TX queues that haven't been flushed and drained. + * @rxq_flush_pending: Count of number of receive queues that need to be flushed. + * Decremented when the efx_flush_rx_queue() is called. + * @rxq_flush_outstanding: Count of number of RX flushes started but not yet + * completed (either success or failure). Not used when MCDI is used to + * flush receive queues. + * @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions. + * @vf_count: Number of VFs intended to be enabled. + * @vf_init_count: Number of VFs that have been fully initialised. + * @vi_scale: log2 number of vnics per VF. + * @ptp_data: PTP state data + * @ptp_warned: has this NIC seen and warned about unexpected PTP events? + * @vpd_sn: Serial number read from VPD + * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their + * xdp_rxq_info structures? + * @netdev_notifier: Netdevice notifier. + * @mem_bar: The BAR that is mapped into membase. + * @reg_base: Offset from the start of the bar to the function control window. + * @monitor_work: Hardware monitor workitem + * @biu_lock: BIU (bus interface unit) lock + * @last_irq_cpu: Last CPU to handle a possible test interrupt. This + * field is used by efx_test_interrupts() to verify that an + * interrupt has occurred. + * @stats_lock: Statistics update lock. Must be held when calling + * efx_nic_type::{update,start,stop}_stats. + * @n_rx_noskb_drops: Count of RX packets dropped due to failure to allocate an skb + * + * This is stored in the private area of the &struct net_device. + */ +struct efx_nic { + /* The following fields should be written very rarely */ + + char name[IFNAMSIZ]; + struct list_head node; + struct efx_nic *primary; + struct list_head secondary_list; + struct pci_dev *pci_dev; + unsigned int port_num; + const struct efx_nic_type *type; + int legacy_irq; + bool eeh_disabled_legacy_irq; + struct workqueue_struct *workqueue; + char workqueue_name[16]; + struct work_struct reset_work; + resource_size_t membase_phys; + void __iomem *membase; + + unsigned int vi_stride; + + enum efx_int_mode interrupt_mode; + unsigned int timer_quantum_ns; + unsigned int timer_max_ns; + bool irq_rx_adaptive; + bool irqs_hooked; + unsigned int irq_mod_step_us; + unsigned int irq_rx_moderation_us; + u32 msg_enable; + + enum nic_state state; + unsigned long reset_pending; + + struct efx_channel *channel[EFX_MAX_CHANNELS]; + struct efx_msi_context msi_context[EFX_MAX_CHANNELS]; + const struct efx_channel_type * + extra_channel_type[EFX_MAX_EXTRA_CHANNELS]; + + unsigned int xdp_tx_queue_count; + struct efx_tx_queue **xdp_tx_queues; + enum efx_xdp_tx_queues_mode xdp_txq_queues_mode; + + unsigned rxq_entries; + unsigned txq_entries; + unsigned int txq_stop_thresh; + unsigned int txq_wake_thresh; + + unsigned tx_dc_base; + unsigned rx_dc_base; + unsigned sram_lim_qw; + unsigned next_buffer_table; + + unsigned int max_channels; + unsigned int max_vis; + unsigned int max_tx_channels; + unsigned n_channels; + unsigned n_rx_channels; + unsigned rss_spread; + unsigned tx_channel_offset; + unsigned n_tx_channels; + unsigned n_extra_tx_channels; + unsigned int tx_queues_per_channel; + unsigned int n_xdp_channels; + unsigned int xdp_channel_offset; + unsigned int xdp_tx_per_channel; + unsigned int rx_ip_align; + unsigned int rx_dma_len; + unsigned int rx_buffer_order; + unsigned int rx_buffer_truesize; + unsigned int rx_page_buf_step; + unsigned int rx_bufs_per_page; + unsigned int rx_pages_per_batch; + unsigned int rx_prefix_size; + int rx_packet_hash_offset; + int rx_packet_len_offset; + int rx_packet_ts_offset; + bool rx_scatter; + struct efx_rss_context rss_context; + struct mutex rss_lock; + u32 vport_id; + + unsigned int_error_count; + unsigned long int_error_expire; + + bool must_realloc_vis; + bool irq_soft_enabled; + struct efx_buffer irq_status; + unsigned irq_zero_count; + unsigned irq_level; + struct delayed_work selftest_work; + +#ifdef CONFIG_SFC_MTD + struct list_head mtd_list; +#endif + + void *nic_data; + struct efx_mcdi_data *mcdi; + + struct mutex mac_lock; + struct work_struct mac_work; + bool port_enabled; + + bool mc_bist_for_other_fn; + bool port_initialized; + struct net_device *net_dev; + + netdev_features_t fixed_features; + + u16 num_mac_stats; + struct efx_buffer stats_buffer; + u64 rx_nodesc_drops_total; + u64 rx_nodesc_drops_while_down; + bool rx_nodesc_drops_prev_state; + + unsigned int phy_type; + void *phy_data; + struct mdio_if_info mdio; + unsigned int mdio_bus; + enum efx_phy_mode phy_mode; + + __ETHTOOL_DECLARE_LINK_MODE_MASK(link_advertising); + u32 fec_config; + struct efx_link_state link_state; + unsigned int n_link_state_changes; + + bool unicast_filter; + union efx_multicast_hash multicast_hash; + u8 wanted_fc; + unsigned fc_disable; + + atomic_t rx_reset; + enum efx_loopback_mode loopback_mode; + u64 loopback_modes; + + void *loopback_selftest; + /* We access loopback_selftest immediately before running XDP, + * so we want them next to each other. + */ + struct bpf_prog __rcu *xdp_prog; + + struct rw_semaphore filter_sem; + void *filter_state; +#ifdef CONFIG_RFS_ACCEL + struct mutex rps_mutex; + unsigned long rps_slot_map; + struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT]; + spinlock_t rps_hash_lock; + struct hlist_head *rps_hash_table; + u32 rps_next_id; +#endif + + atomic_t active_queues; + atomic_t rxq_flush_pending; + atomic_t rxq_flush_outstanding; + wait_queue_head_t flush_wq; + +#ifdef CONFIG_SFC_SRIOV + unsigned vf_count; + unsigned vf_init_count; + unsigned vi_scale; +#endif + + struct efx_ptp_data *ptp_data; + bool ptp_warned; + + char *vpd_sn; + bool xdp_rxq_info_failed; + + struct notifier_block netdev_notifier; + + unsigned int mem_bar; + u32 reg_base; + + /* The following fields may be written more often */ + + struct delayed_work monitor_work ____cacheline_aligned_in_smp; + spinlock_t biu_lock; + int last_irq_cpu; + spinlock_t stats_lock; + atomic_t n_rx_noskb_drops; +}; + +static inline int efx_dev_registered(struct efx_nic *efx) +{ + return efx->net_dev->reg_state == NETREG_REGISTERED; +} + +static inline unsigned int efx_port_num(struct efx_nic *efx) +{ + return efx->port_num; +} + +struct efx_mtd_partition { + struct list_head node; + struct mtd_info mtd; + const char *dev_type_name; + const char *type_name; + char name[IFNAMSIZ + 20]; +}; + +struct efx_udp_tunnel { +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_INVALID 0xffff + u16 type; /* TUNNEL_ENCAP_UDP_PORT_ENTRY_foo, see mcdi_pcol.h */ + __be16 port; +}; + +/** + * struct efx_nic_type - Efx device type definition + * @mem_bar: Get the memory BAR + * @mem_map_size: Get memory BAR mapped size + * @probe: Probe the controller + * @remove: Free resources allocated by probe() + * @init: Initialise the controller + * @dimension_resources: Dimension controller resources (buffer table, + * and VIs once the available interrupt resources are clear) + * @fini: Shut down the controller + * @monitor: Periodic function for polling link state and hardware monitor + * @map_reset_reason: Map ethtool reset reason to a reset method + * @map_reset_flags: Map ethtool reset flags to a reset method, if possible + * @reset: Reset the controller hardware and possibly the PHY. This will + * be called while the controller is uninitialised. + * @probe_port: Probe the MAC and PHY + * @remove_port: Free resources allocated by probe_port() + * @handle_global_event: Handle a "global" event (may be %NULL) + * @fini_dmaq: Flush and finalise DMA queues (RX and TX queues) + * @prepare_flush: Prepare the hardware for flushing the DMA queues + * (for Falcon architecture) + * @finish_flush: Clean up after flushing the DMA queues (for Falcon + * architecture) + * @prepare_flr: Prepare for an FLR + * @finish_flr: Clean up after an FLR + * @describe_stats: Describe statistics for ethtool + * @update_stats: Update statistics not provided by event handling. + * Either argument may be %NULL. + * @update_stats_atomic: Update statistics while in atomic context, if that + * is more limiting than @update_stats. Otherwise, leave %NULL and + * driver core will call @update_stats. + * @start_stats: Start the regular fetching of statistics + * @pull_stats: Pull stats from the NIC and wait until they arrive. + * @stop_stats: Stop the regular fetching of statistics + * @push_irq_moderation: Apply interrupt moderation value + * @reconfigure_port: Push loopback/power/txdis changes to the MAC and PHY + * @prepare_enable_fc_tx: Prepare MAC to enable pause frame TX (may be %NULL) + * @reconfigure_mac: Push MAC address, MTU, flow control and filter settings + * to the hardware. Serialised by the mac_lock. + * @check_mac_fault: Check MAC fault state. True if fault present. + * @get_wol: Get WoL configuration from driver state + * @set_wol: Push WoL configuration to the NIC + * @resume_wol: Synchronise WoL state between driver and MC (e.g. after resume) + * @get_fec_stats: Get standard FEC statistics. + * @test_chip: Test registers. May use efx_farch_test_registers(), and is + * expected to reset the NIC. + * @test_nvram: Test validity of NVRAM contents + * @mcdi_request: Send an MCDI request with the given header and SDU. + * The SDU length may be any value from 0 up to the protocol- + * defined maximum, but its buffer will be padded to a multiple + * of 4 bytes. + * @mcdi_poll_response: Test whether an MCDI response is available. + * @mcdi_read_response: Read the MCDI response PDU. The offset will + * be a multiple of 4. The length may not be, but the buffer + * will be padded so it is safe to round up. + * @mcdi_poll_reboot: Test whether the MCDI has rebooted. If so, + * return an appropriate error code for aborting any current + * request; otherwise return 0. + * @irq_enable_master: Enable IRQs on the NIC. Each event queue must + * be separately enabled after this. + * @irq_test_generate: Generate a test IRQ + * @irq_disable_non_ev: Disable non-event IRQs on the NIC. Each event + * queue must be separately disabled before this. + * @irq_handle_msi: Handle MSI for a channel. The @dev_id argument is + * a pointer to the &struct efx_msi_context for the channel. + * @irq_handle_legacy: Handle legacy interrupt. The @dev_id argument + * is a pointer to the &struct efx_nic. + * @tx_probe: Allocate resources for TX queue (and select TXQ type) + * @tx_init: Initialise TX queue on the NIC + * @tx_remove: Free resources for TX queue + * @tx_write: Write TX descriptors and doorbell + * @tx_enqueue: Add an SKB to TX queue + * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC + * @rx_pull_rss_config: Read RSS hash key and indirection table back from the NIC + * @rx_push_rss_context_config: Write RSS hash key and indirection table for + * user RSS context to the NIC + * @rx_pull_rss_context_config: Read RSS hash key and indirection table for user + * RSS context back from the NIC + * @rx_probe: Allocate resources for RX queue + * @rx_init: Initialise RX queue on the NIC + * @rx_remove: Free resources for RX queue + * @rx_write: Write RX descriptors and doorbell + * @rx_defer_refill: Generate a refill reminder event + * @rx_packet: Receive the queued RX buffer on a channel + * @rx_buf_hash_valid: Determine whether the RX prefix contains a valid hash + * @ev_probe: Allocate resources for event queue + * @ev_init: Initialise event queue on the NIC + * @ev_fini: Deinitialise event queue on the NIC + * @ev_remove: Free resources for event queue + * @ev_process: Process events for a queue, up to the given NAPI quota + * @ev_read_ack: Acknowledge read events on a queue, rearming its IRQ + * @ev_test_generate: Generate a test event + * @filter_table_probe: Probe filter capabilities and set up filter software state + * @filter_table_restore: Restore filters removed from hardware + * @filter_table_remove: Remove filters from hardware and tear down software state + * @filter_update_rx_scatter: Update filters after change to rx scatter setting + * @filter_insert: add or replace a filter + * @filter_remove_safe: remove a filter by ID, carefully + * @filter_get_safe: retrieve a filter by ID, carefully + * @filter_clear_rx: Remove all RX filters whose priority is less than or + * equal to the given priority and is not %EFX_FILTER_PRI_AUTO + * @filter_count_rx_used: Get the number of filters in use at a given priority + * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1 + * @filter_get_rx_ids: Get list of RX filters at a given priority + * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS. + * This must check whether the specified table entry is used by RFS + * and that rps_may_expire_flow() returns true for it. + * @mtd_probe: Probe and add MTD partitions associated with this net device, + * using efx_mtd_add() + * @mtd_rename: Set an MTD partition name using the net device name + * @mtd_read: Read from an MTD partition + * @mtd_erase: Erase part of an MTD partition + * @mtd_write: Write to an MTD partition + * @mtd_sync: Wait for write-back to complete on MTD partition. This + * also notifies the driver that a writer has finished using this + * partition. + * @ptp_write_host_time: Send host time to MC as part of sync protocol + * @ptp_set_ts_sync_events: Enable or disable sync events for inline RX + * timestamping, possibly only temporarily for the purposes of a reset. + * @ptp_set_ts_config: Set hardware timestamp configuration. The flags + * and tx_type will already have been validated but this operation + * must validate and update rx_filter. + * @get_phys_port_id: Get the underlying physical port id. + * @set_mac_address: Set the MAC address of the device + * @tso_versions: Returns mask of firmware-assisted TSO versions supported. + * If %NULL, then device does not support any TSO version. + * @udp_tnl_push_ports: Push the list of UDP tunnel ports to the NIC if required. + * @udp_tnl_has_port: Check if a port has been added as UDP tunnel + * @print_additional_fwver: Dump NIC-specific additional FW version info + * @sensor_event: Handle a sensor event from MCDI + * @rx_recycle_ring_size: Size of the RX recycle ring + * @revision: Hardware architecture revision + * @txd_ptr_tbl_base: TX descriptor ring base address + * @rxd_ptr_tbl_base: RX descriptor ring base address + * @buf_tbl_base: Buffer table base address + * @evq_ptr_tbl_base: Event queue pointer table base address + * @evq_rptr_tbl_base: Event queue read-pointer table base address + * @max_dma_mask: Maximum possible DMA mask + * @rx_prefix_size: Size of RX prefix before packet data + * @rx_hash_offset: Offset of RX flow hash within prefix + * @rx_ts_offset: Offset of timestamp within prefix + * @rx_buffer_padding: Size of padding at end of RX packet + * @can_rx_scatter: NIC is able to scatter packets to multiple buffers + * @always_rx_scatter: NIC will always scatter packets to multiple buffers + * @option_descriptors: NIC supports TX option descriptors + * @min_interrupt_mode: Lowest capability interrupt mode supported + * from &enum efx_int_mode. + * @timer_period_max: Maximum period of interrupt timer (in ticks) + * @offload_features: net_device feature flags for protocol offload + * features implemented in hardware + * @mcdi_max_ver: Maximum MCDI version supported + * @hwtstamp_filters: Mask of hardware timestamp filter types supported + */ +struct efx_nic_type { + bool is_vf; + unsigned int (*mem_bar)(struct efx_nic *efx); + unsigned int (*mem_map_size)(struct efx_nic *efx); + int (*probe)(struct efx_nic *efx); + void (*remove)(struct efx_nic *efx); + int (*init)(struct efx_nic *efx); + int (*dimension_resources)(struct efx_nic *efx); + void (*fini)(struct efx_nic *efx); + void (*monitor)(struct efx_nic *efx); + enum reset_type (*map_reset_reason)(enum reset_type reason); + int (*map_reset_flags)(u32 *flags); + int (*reset)(struct efx_nic *efx, enum reset_type method); + int (*probe_port)(struct efx_nic *efx); + void (*remove_port)(struct efx_nic *efx); + bool (*handle_global_event)(struct efx_channel *channel, efx_qword_t *); + int (*fini_dmaq)(struct efx_nic *efx); + void (*prepare_flush)(struct efx_nic *efx); + void (*finish_flush)(struct efx_nic *efx); + void (*prepare_flr)(struct efx_nic *efx); + void (*finish_flr)(struct efx_nic *efx); + size_t (*describe_stats)(struct efx_nic *efx, u8 *names); + size_t (*update_stats)(struct efx_nic *efx, u64 *full_stats, + struct rtnl_link_stats64 *core_stats); + size_t (*update_stats_atomic)(struct efx_nic *efx, u64 *full_stats, + struct rtnl_link_stats64 *core_stats); + void (*start_stats)(struct efx_nic *efx); + void (*pull_stats)(struct efx_nic *efx); + void (*stop_stats)(struct efx_nic *efx); + void (*push_irq_moderation)(struct efx_channel *channel); + int (*reconfigure_port)(struct efx_nic *efx); + void (*prepare_enable_fc_tx)(struct efx_nic *efx); + int (*reconfigure_mac)(struct efx_nic *efx, bool mtu_only); + bool (*check_mac_fault)(struct efx_nic *efx); + void (*get_wol)(struct efx_nic *efx, struct ethtool_wolinfo *wol); + int (*set_wol)(struct efx_nic *efx, u32 type); + void (*resume_wol)(struct efx_nic *efx); + void (*get_fec_stats)(struct efx_nic *efx, + struct ethtool_fec_stats *fec_stats); + unsigned int (*check_caps)(const struct efx_nic *efx, + u8 flag, + u32 offset); + int (*test_chip)(struct efx_nic *efx, struct efx_self_tests *tests); + int (*test_nvram)(struct efx_nic *efx); + void (*mcdi_request)(struct efx_nic *efx, + const efx_dword_t *hdr, size_t hdr_len, + const efx_dword_t *sdu, size_t sdu_len); + bool (*mcdi_poll_response)(struct efx_nic *efx); + void (*mcdi_read_response)(struct efx_nic *efx, efx_dword_t *pdu, + size_t pdu_offset, size_t pdu_len); + int (*mcdi_poll_reboot)(struct efx_nic *efx); + void (*mcdi_reboot_detected)(struct efx_nic *efx); + void (*irq_enable_master)(struct efx_nic *efx); + int (*irq_test_generate)(struct efx_nic *efx); + void (*irq_disable_non_ev)(struct efx_nic *efx); + irqreturn_t (*irq_handle_msi)(int irq, void *dev_id); + irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id); + int (*tx_probe)(struct efx_tx_queue *tx_queue); + void (*tx_init)(struct efx_tx_queue *tx_queue); + void (*tx_remove)(struct efx_tx_queue *tx_queue); + void (*tx_write)(struct efx_tx_queue *tx_queue); + netdev_tx_t (*tx_enqueue)(struct efx_tx_queue *tx_queue, struct sk_buff *skb); + unsigned int (*tx_limit_len)(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len); + int (*rx_push_rss_config)(struct efx_nic *efx, bool user, + const u32 *rx_indir_table, const u8 *key); + int (*rx_pull_rss_config)(struct efx_nic *efx); + int (*rx_push_rss_context_config)(struct efx_nic *efx, + struct efx_rss_context *ctx, + const u32 *rx_indir_table, + const u8 *key); + int (*rx_pull_rss_context_config)(struct efx_nic *efx, + struct efx_rss_context *ctx); + void (*rx_restore_rss_contexts)(struct efx_nic *efx); + int (*rx_probe)(struct efx_rx_queue *rx_queue); + void (*rx_init)(struct efx_rx_queue *rx_queue); + void (*rx_remove)(struct efx_rx_queue *rx_queue); + void (*rx_write)(struct efx_rx_queue *rx_queue); + void (*rx_defer_refill)(struct efx_rx_queue *rx_queue); + void (*rx_packet)(struct efx_channel *channel); + bool (*rx_buf_hash_valid)(const u8 *prefix); + int (*ev_probe)(struct efx_channel *channel); + int (*ev_init)(struct efx_channel *channel); + void (*ev_fini)(struct efx_channel *channel); + void (*ev_remove)(struct efx_channel *channel); + int (*ev_process)(struct efx_channel *channel, int quota); + void (*ev_read_ack)(struct efx_channel *channel); + void (*ev_test_generate)(struct efx_channel *channel); + int (*filter_table_probe)(struct efx_nic *efx); + void (*filter_table_restore)(struct efx_nic *efx); + void (*filter_table_remove)(struct efx_nic *efx); + void (*filter_update_rx_scatter)(struct efx_nic *efx); + s32 (*filter_insert)(struct efx_nic *efx, + struct efx_filter_spec *spec, bool replace); + int (*filter_remove_safe)(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id); + int (*filter_get_safe)(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id, struct efx_filter_spec *); + int (*filter_clear_rx)(struct efx_nic *efx, + enum efx_filter_priority priority); + u32 (*filter_count_rx_used)(struct efx_nic *efx, + enum efx_filter_priority priority); + u32 (*filter_get_rx_id_limit)(struct efx_nic *efx); + s32 (*filter_get_rx_ids)(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 *buf, u32 size); +#ifdef CONFIG_RFS_ACCEL + bool (*filter_rfs_expire_one)(struct efx_nic *efx, u32 flow_id, + unsigned int index); +#endif +#ifdef CONFIG_SFC_MTD + int (*mtd_probe)(struct efx_nic *efx); + void (*mtd_rename)(struct efx_mtd_partition *part); + int (*mtd_read)(struct mtd_info *mtd, loff_t start, size_t len, + size_t *retlen, u8 *buffer); + int (*mtd_erase)(struct mtd_info *mtd, loff_t start, size_t len); + int (*mtd_write)(struct mtd_info *mtd, loff_t start, size_t len, + size_t *retlen, const u8 *buffer); + int (*mtd_sync)(struct mtd_info *mtd); +#endif + void (*ptp_write_host_time)(struct efx_nic *efx, u32 host_time); + int (*ptp_set_ts_sync_events)(struct efx_nic *efx, bool en, bool temp); + int (*ptp_set_ts_config)(struct efx_nic *efx, + struct hwtstamp_config *init); + int (*sriov_configure)(struct efx_nic *efx, int num_vfs); + int (*vlan_rx_add_vid)(struct efx_nic *efx, __be16 proto, u16 vid); + int (*vlan_rx_kill_vid)(struct efx_nic *efx, __be16 proto, u16 vid); + int (*get_phys_port_id)(struct efx_nic *efx, + struct netdev_phys_item_id *ppid); + int (*sriov_init)(struct efx_nic *efx); + void (*sriov_fini)(struct efx_nic *efx); + bool (*sriov_wanted)(struct efx_nic *efx); + void (*sriov_reset)(struct efx_nic *efx); + void (*sriov_flr)(struct efx_nic *efx, unsigned vf_i); + int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, const u8 *mac); + int (*sriov_set_vf_vlan)(struct efx_nic *efx, int vf_i, u16 vlan, + u8 qos); + int (*sriov_set_vf_spoofchk)(struct efx_nic *efx, int vf_i, + bool spoofchk); + int (*sriov_get_vf_config)(struct efx_nic *efx, int vf_i, + struct ifla_vf_info *ivi); + int (*sriov_set_vf_link_state)(struct efx_nic *efx, int vf_i, + int link_state); + int (*vswitching_probe)(struct efx_nic *efx); + int (*vswitching_restore)(struct efx_nic *efx); + void (*vswitching_remove)(struct efx_nic *efx); + int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr); + int (*set_mac_address)(struct efx_nic *efx); + u32 (*tso_versions)(struct efx_nic *efx); + int (*udp_tnl_push_ports)(struct efx_nic *efx); + bool (*udp_tnl_has_port)(struct efx_nic *efx, __be16 port); + size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf, + size_t len); + void (*sensor_event)(struct efx_nic *efx, efx_qword_t *ev); + unsigned int (*rx_recycle_ring_size)(const struct efx_nic *efx); + + int revision; + unsigned int txd_ptr_tbl_base; + unsigned int rxd_ptr_tbl_base; + unsigned int buf_tbl_base; + unsigned int evq_ptr_tbl_base; + unsigned int evq_rptr_tbl_base; + u64 max_dma_mask; + unsigned int rx_prefix_size; + unsigned int rx_hash_offset; + unsigned int rx_ts_offset; + unsigned int rx_buffer_padding; + bool can_rx_scatter; + bool always_rx_scatter; + bool option_descriptors; + unsigned int min_interrupt_mode; + unsigned int timer_period_max; + netdev_features_t offload_features; + int mcdi_max_ver; + unsigned int max_rx_ip_filters; + u32 hwtstamp_filters; + unsigned int rx_hash_key_size; +}; + +/************************************************************************** + * + * Prototypes and inline functions + * + *************************************************************************/ + +static inline struct efx_channel * +efx_get_channel(struct efx_nic *efx, unsigned index) +{ + EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_channels); + return efx->channel[index]; +} + +/* Iterate over all used channels */ +#define efx_for_each_channel(_channel, _efx) \ + for (_channel = (_efx)->channel[0]; \ + _channel; \ + _channel = (_channel->channel + 1 < (_efx)->n_channels) ? \ + (_efx)->channel[_channel->channel + 1] : NULL) + +/* Iterate over all used channels in reverse */ +#define efx_for_each_channel_rev(_channel, _efx) \ + for (_channel = (_efx)->channel[(_efx)->n_channels - 1]; \ + _channel; \ + _channel = _channel->channel ? \ + (_efx)->channel[_channel->channel - 1] : NULL) + +static inline struct efx_channel * +efx_get_tx_channel(struct efx_nic *efx, unsigned int index) +{ + EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_tx_channels); + return efx->channel[efx->tx_channel_offset + index]; +} + +static inline struct efx_channel * +efx_get_xdp_channel(struct efx_nic *efx, unsigned int index) +{ + EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_xdp_channels); + return efx->channel[efx->xdp_channel_offset + index]; +} + +static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel) +{ + return channel->channel - channel->efx->xdp_channel_offset < + channel->efx->n_xdp_channels; +} + +static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) +{ + return true; +} + +static inline unsigned int efx_channel_num_tx_queues(struct efx_channel *channel) +{ + if (efx_channel_is_xdp_tx(channel)) + return channel->efx->xdp_tx_per_channel; + return channel->efx->tx_queues_per_channel; +} + +static inline struct efx_tx_queue * +efx_channel_get_tx_queue(struct efx_channel *channel, unsigned int type) +{ + EFX_WARN_ON_ONCE_PARANOID(type >= EFX_TXQ_TYPES); + return channel->tx_queue_by_type[type]; +} + +static inline struct efx_tx_queue * +efx_get_tx_queue(struct efx_nic *efx, unsigned int index, unsigned int type) +{ + struct efx_channel *channel = efx_get_tx_channel(efx, index); + + return efx_channel_get_tx_queue(channel, type); +} + +/* Iterate over all TX queues belonging to a channel */ +#define efx_for_each_channel_tx_queue(_tx_queue, _channel) \ + if (!efx_channel_has_tx_queues(_channel)) \ + ; \ + else \ + for (_tx_queue = (_channel)->tx_queue; \ + _tx_queue < (_channel)->tx_queue + \ + efx_channel_num_tx_queues(_channel); \ + _tx_queue++) + +static inline bool efx_channel_has_rx_queue(struct efx_channel *channel) +{ + return channel->rx_queue.core_index >= 0; +} + +static inline struct efx_rx_queue * +efx_channel_get_rx_queue(struct efx_channel *channel) +{ + EFX_WARN_ON_ONCE_PARANOID(!efx_channel_has_rx_queue(channel)); + return &channel->rx_queue; +} + +/* Iterate over all RX queues belonging to a channel */ +#define efx_for_each_channel_rx_queue(_rx_queue, _channel) \ + if (!efx_channel_has_rx_queue(_channel)) \ + ; \ + else \ + for (_rx_queue = &(_channel)->rx_queue; \ + _rx_queue; \ + _rx_queue = NULL) + +static inline struct efx_channel * +efx_rx_queue_channel(struct efx_rx_queue *rx_queue) +{ + return container_of(rx_queue, struct efx_channel, rx_queue); +} + +static inline int efx_rx_queue_index(struct efx_rx_queue *rx_queue) +{ + return efx_rx_queue_channel(rx_queue)->channel; +} + +/* Returns a pointer to the specified receive buffer in the RX + * descriptor queue. + */ +static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, + unsigned int index) +{ + return &rx_queue->buffer[index]; +} + +static inline struct efx_rx_buffer * +efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) +{ + if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) + return efx_rx_buffer(rx_queue, 0); + else + return rx_buf + 1; +} + +/** + * EFX_MAX_FRAME_LEN - calculate maximum frame length + * + * This calculates the maximum frame length that will be used for a + * given MTU. The frame length will be equal to the MTU plus a + * constant amount of header space and padding. This is the quantity + * that the net driver will program into the MAC as the maximum frame + * length. + * + * The 10G MAC requires 8-byte alignment on the frame + * length, so we round up to the nearest 8. + * + * Re-clocking by the XGXS on RX can reduce an IPG to 32 bits (half an + * XGMII cycle). If the frame length reaches the maximum value in the + * same cycle, the XMAC can miss the IPG altogether. We work around + * this by adding a further 16 bytes. + */ +#define EFX_FRAME_PAD 16 +#define EFX_MAX_FRAME_LEN(mtu) \ + (ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EFX_FRAME_PAD), 8)) + +static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb) +{ + return skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP; +} +static inline void efx_xmit_hwtstamp_pending(struct sk_buff *skb) +{ + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +} + +/* Get the max fill level of the TX queues on this channel */ +static inline unsigned int +efx_channel_tx_fill_level(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + unsigned int fill_level = 0; + + efx_for_each_channel_tx_queue(tx_queue, channel) + fill_level = max(fill_level, + tx_queue->insert_count - tx_queue->read_count); + + return fill_level; +} + +/* Conservative approximation of efx_channel_tx_fill_level using cached value */ +static inline unsigned int +efx_channel_tx_old_fill_level(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + unsigned int fill_level = 0; + + efx_for_each_channel_tx_queue(tx_queue, channel) + fill_level = max(fill_level, + tx_queue->insert_count - tx_queue->old_read_count); + + return fill_level; +} + +/* Get all supported features. + * If a feature is not fixed, it is present in hw_features. + * If a feature is fixed, it does not present in hw_features, but + * always in features. + */ +static inline netdev_features_t efx_supported_features(const struct efx_nic *efx) +{ + const struct net_device *net_dev = efx->net_dev; + + return net_dev->features | net_dev->hw_features; +} + +/* Get the current TX queue insert index. */ +static inline unsigned int +efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue) +{ + return tx_queue->insert_count & tx_queue->ptr_mask; +} + +/* Get a TX buffer. */ +static inline struct efx_tx_buffer * +__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) +{ + return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)]; +} + +/* Get a TX buffer, checking it's not currently in use. */ +static inline struct efx_tx_buffer * +efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer = + __efx_tx_queue_get_insert_buffer(tx_queue); + + EFX_WARN_ON_ONCE_PARANOID(buffer->len); + EFX_WARN_ON_ONCE_PARANOID(buffer->flags); + EFX_WARN_ON_ONCE_PARANOID(buffer->unmap_len); + + return buffer; +} + +#endif /* EFX_NET_DRIVER_H */ diff --git a/drivers/net/ethernet/sfc/siena/nic.c b/drivers/net/ethernet/sfc/siena/nic.c new file mode 100644 index 000000000000..22fbb0ae77fb --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/nic.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2013 Solarflare Communications Inc. + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/cpu_rmap.h> +#include "net_driver.h" +#include "bitfield.h" +#include "efx.h" +#include "nic.h" +#include "ef10_regs.h" +#include "farch_regs.h" +#include "io.h" +#include "workarounds.h" +#include "mcdi_pcol.h" + +/************************************************************************** + * + * Generic buffer handling + * These buffers are used for interrupt status, MAC stats, etc. + * + **************************************************************************/ + +int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer, + unsigned int len, gfp_t gfp_flags) +{ + buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len, + &buffer->dma_addr, gfp_flags); + if (!buffer->addr) + return -ENOMEM; + buffer->len = len; + return 0; +} + +void efx_nic_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer) +{ + if (buffer->addr) { + dma_free_coherent(&efx->pci_dev->dev, buffer->len, + buffer->addr, buffer->dma_addr); + buffer->addr = NULL; + } +} + +/* Check whether an event is present in the eventq at the current + * read pointer. Only useful for self-test. + */ +bool efx_nic_event_present(struct efx_channel *channel) +{ + return efx_event_present(efx_event(channel, channel->eventq_read_ptr)); +} + +void efx_nic_event_test_start(struct efx_channel *channel) +{ + channel->event_test_cpu = -1; + smp_wmb(); + channel->efx->type->ev_test_generate(channel); +} + +int efx_nic_irq_test_start(struct efx_nic *efx) +{ + efx->last_irq_cpu = -1; + smp_wmb(); + return efx->type->irq_test_generate(efx); +} + +/* Hook interrupt handler(s) + * Try MSI and then legacy interrupts. + */ +int efx_nic_init_interrupt(struct efx_nic *efx) +{ + struct efx_channel *channel; + unsigned int n_irqs; + int rc; + + if (!EFX_INT_MODE_USE_MSI(efx)) { + rc = request_irq(efx->legacy_irq, + efx->type->irq_handle_legacy, IRQF_SHARED, + efx->name, efx); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "failed to hook legacy IRQ %d\n", + efx->pci_dev->irq); + goto fail1; + } + efx->irqs_hooked = true; + return 0; + } + +#ifdef CONFIG_RFS_ACCEL + if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + efx->net_dev->rx_cpu_rmap = + alloc_irq_cpu_rmap(efx->n_rx_channels); + if (!efx->net_dev->rx_cpu_rmap) { + rc = -ENOMEM; + goto fail1; + } + } +#endif + + /* Hook MSI or MSI-X interrupt */ + n_irqs = 0; + efx_for_each_channel(channel, efx) { + rc = request_irq(channel->irq, efx->type->irq_handle_msi, + IRQF_PROBE_SHARED, /* Not shared */ + efx->msi_context[channel->channel].name, + &efx->msi_context[channel->channel]); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "failed to hook IRQ %d\n", channel->irq); + goto fail2; + } + ++n_irqs; + +#ifdef CONFIG_RFS_ACCEL + if (efx->interrupt_mode == EFX_INT_MODE_MSIX && + channel->channel < efx->n_rx_channels) { + rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap, + channel->irq); + if (rc) + goto fail2; + } +#endif + } + + efx->irqs_hooked = true; + return 0; + + fail2: +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); + efx->net_dev->rx_cpu_rmap = NULL; +#endif + efx_for_each_channel(channel, efx) { + if (n_irqs-- == 0) + break; + free_irq(channel->irq, &efx->msi_context[channel->channel]); + } + fail1: + return rc; +} + +void efx_nic_fini_interrupt(struct efx_nic *efx) +{ + struct efx_channel *channel; + +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); + efx->net_dev->rx_cpu_rmap = NULL; +#endif + + if (!efx->irqs_hooked) + return; + if (EFX_INT_MODE_USE_MSI(efx)) { + /* Disable MSI/MSI-X interrupts */ + efx_for_each_channel(channel, efx) + free_irq(channel->irq, + &efx->msi_context[channel->channel]); + } else { + /* Disable legacy interrupt */ + free_irq(efx->legacy_irq, efx); + } + efx->irqs_hooked = false; +} + +/* Register dump */ + +#define REGISTER_REVISION_FA 1 +#define REGISTER_REVISION_FB 2 +#define REGISTER_REVISION_FC 3 +#define REGISTER_REVISION_FZ 3 /* last Falcon arch revision */ +#define REGISTER_REVISION_ED 4 +#define REGISTER_REVISION_EZ 4 /* latest EF10 revision */ + +struct efx_nic_reg { + u32 offset:24; + u32 min_revision:3, max_revision:3; +}; + +#define REGISTER(name, arch, min_rev, max_rev) { \ + arch ## R_ ## min_rev ## max_rev ## _ ## name, \ + REGISTER_REVISION_ ## arch ## min_rev, \ + REGISTER_REVISION_ ## arch ## max_rev \ +} +#define REGISTER_AA(name) REGISTER(name, F, A, A) +#define REGISTER_AB(name) REGISTER(name, F, A, B) +#define REGISTER_AZ(name) REGISTER(name, F, A, Z) +#define REGISTER_BB(name) REGISTER(name, F, B, B) +#define REGISTER_BZ(name) REGISTER(name, F, B, Z) +#define REGISTER_CZ(name) REGISTER(name, F, C, Z) +#define REGISTER_DZ(name) REGISTER(name, E, D, Z) + +static const struct efx_nic_reg efx_nic_regs[] = { + REGISTER_AZ(ADR_REGION), + REGISTER_AZ(INT_EN_KER), + REGISTER_BZ(INT_EN_CHAR), + REGISTER_AZ(INT_ADR_KER), + REGISTER_BZ(INT_ADR_CHAR), + /* INT_ACK_KER is WO */ + /* INT_ISR0 is RC */ + REGISTER_AZ(HW_INIT), + REGISTER_CZ(USR_EV_CFG), + REGISTER_AB(EE_SPI_HCMD), + REGISTER_AB(EE_SPI_HADR), + REGISTER_AB(EE_SPI_HDATA), + REGISTER_AB(EE_BASE_PAGE), + REGISTER_AB(EE_VPD_CFG0), + /* EE_VPD_SW_CNTL and EE_VPD_SW_DATA are not used */ + /* PMBX_DBG_IADDR and PBMX_DBG_IDATA are indirect */ + /* PCIE_CORE_INDIRECT is indirect */ + REGISTER_AB(NIC_STAT), + REGISTER_AB(GPIO_CTL), + REGISTER_AB(GLB_CTL), + /* FATAL_INTR_KER and FATAL_INTR_CHAR are partly RC */ + REGISTER_BZ(DP_CTRL), + REGISTER_AZ(MEM_STAT), + REGISTER_AZ(CS_DEBUG), + REGISTER_AZ(ALTERA_BUILD), + REGISTER_AZ(CSR_SPARE), + REGISTER_AB(PCIE_SD_CTL0123), + REGISTER_AB(PCIE_SD_CTL45), + REGISTER_AB(PCIE_PCS_CTL_STAT), + /* DEBUG_DATA_OUT is not used */ + /* DRV_EV is WO */ + REGISTER_AZ(EVQ_CTL), + REGISTER_AZ(EVQ_CNT1), + REGISTER_AZ(EVQ_CNT2), + REGISTER_AZ(BUF_TBL_CFG), + REGISTER_AZ(SRM_RX_DC_CFG), + REGISTER_AZ(SRM_TX_DC_CFG), + REGISTER_AZ(SRM_CFG), + /* BUF_TBL_UPD is WO */ + REGISTER_AZ(SRM_UPD_EVQ), + REGISTER_AZ(SRAM_PARITY), + REGISTER_AZ(RX_CFG), + REGISTER_BZ(RX_FILTER_CTL), + /* RX_FLUSH_DESCQ is WO */ + REGISTER_AZ(RX_DC_CFG), + REGISTER_AZ(RX_DC_PF_WM), + REGISTER_BZ(RX_RSS_TKEY), + /* RX_NODESC_DROP is RC */ + REGISTER_AA(RX_SELF_RST), + /* RX_DEBUG, RX_PUSH_DROP are not used */ + REGISTER_CZ(RX_RSS_IPV6_REG1), + REGISTER_CZ(RX_RSS_IPV6_REG2), + REGISTER_CZ(RX_RSS_IPV6_REG3), + /* TX_FLUSH_DESCQ is WO */ + REGISTER_AZ(TX_DC_CFG), + REGISTER_AA(TX_CHKSM_CFG), + REGISTER_AZ(TX_CFG), + /* TX_PUSH_DROP is not used */ + REGISTER_AZ(TX_RESERVED), + REGISTER_BZ(TX_PACE), + /* TX_PACE_DROP_QID is RC */ + REGISTER_BB(TX_VLAN), + REGISTER_BZ(TX_IPFIL_PORTEN), + REGISTER_AB(MD_TXD), + REGISTER_AB(MD_RXD), + REGISTER_AB(MD_CS), + REGISTER_AB(MD_PHY_ADR), + REGISTER_AB(MD_ID), + /* MD_STAT is RC */ + REGISTER_AB(MAC_STAT_DMA), + REGISTER_AB(MAC_CTRL), + REGISTER_BB(GEN_MODE), + REGISTER_AB(MAC_MC_HASH_REG0), + REGISTER_AB(MAC_MC_HASH_REG1), + REGISTER_AB(GM_CFG1), + REGISTER_AB(GM_CFG2), + /* GM_IPG and GM_HD are not used */ + REGISTER_AB(GM_MAX_FLEN), + /* GM_TEST is not used */ + REGISTER_AB(GM_ADR1), + REGISTER_AB(GM_ADR2), + REGISTER_AB(GMF_CFG0), + REGISTER_AB(GMF_CFG1), + REGISTER_AB(GMF_CFG2), + REGISTER_AB(GMF_CFG3), + REGISTER_AB(GMF_CFG4), + REGISTER_AB(GMF_CFG5), + REGISTER_BB(TX_SRC_MAC_CTL), + REGISTER_AB(XM_ADR_LO), + REGISTER_AB(XM_ADR_HI), + REGISTER_AB(XM_GLB_CFG), + REGISTER_AB(XM_TX_CFG), + REGISTER_AB(XM_RX_CFG), + REGISTER_AB(XM_MGT_INT_MASK), + REGISTER_AB(XM_FC), + REGISTER_AB(XM_PAUSE_TIME), + REGISTER_AB(XM_TX_PARAM), + REGISTER_AB(XM_RX_PARAM), + /* XM_MGT_INT_MSK (note no 'A') is RC */ + REGISTER_AB(XX_PWR_RST), + REGISTER_AB(XX_SD_CTL), + REGISTER_AB(XX_TXDRV_CTL), + /* XX_PRBS_CTL, XX_PRBS_CHK and XX_PRBS_ERR are not used */ + /* XX_CORE_STAT is partly RC */ + REGISTER_DZ(BIU_HW_REV_ID), + REGISTER_DZ(MC_DB_LWRD), + REGISTER_DZ(MC_DB_HWRD), +}; + +struct efx_nic_reg_table { + u32 offset:24; + u32 min_revision:3, max_revision:3; + u32 step:6, rows:21; +}; + +#define REGISTER_TABLE_DIMENSIONS(_, offset, arch, min_rev, max_rev, step, rows) { \ + offset, \ + REGISTER_REVISION_ ## arch ## min_rev, \ + REGISTER_REVISION_ ## arch ## max_rev, \ + step, rows \ +} +#define REGISTER_TABLE(name, arch, min_rev, max_rev) \ + REGISTER_TABLE_DIMENSIONS( \ + name, arch ## R_ ## min_rev ## max_rev ## _ ## name, \ + arch, min_rev, max_rev, \ + arch ## R_ ## min_rev ## max_rev ## _ ## name ## _STEP, \ + arch ## R_ ## min_rev ## max_rev ## _ ## name ## _ROWS) +#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, F, A, A) +#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, F, A, Z) +#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, F, B, B) +#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, F, B, Z) +#define REGISTER_TABLE_BB_CZ(name) \ + REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, B, B, \ + FR_BZ_ ## name ## _STEP, \ + FR_BB_ ## name ## _ROWS), \ + REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, C, Z, \ + FR_BZ_ ## name ## _STEP, \ + FR_CZ_ ## name ## _ROWS) +#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, F, C, Z) +#define REGISTER_TABLE_DZ(name) REGISTER_TABLE(name, E, D, Z) + +static const struct efx_nic_reg_table efx_nic_reg_tables[] = { + /* DRIVER is not used */ + /* EVQ_RPTR, TIMER_COMMAND, USR_EV and {RX,TX}_DESC_UPD are WO */ + REGISTER_TABLE_BB(TX_IPFIL_TBL), + REGISTER_TABLE_BB(TX_SRC_MAC_TBL), + REGISTER_TABLE_AA(RX_DESC_PTR_TBL_KER), + REGISTER_TABLE_BB_CZ(RX_DESC_PTR_TBL), + REGISTER_TABLE_AA(TX_DESC_PTR_TBL_KER), + REGISTER_TABLE_BB_CZ(TX_DESC_PTR_TBL), + REGISTER_TABLE_AA(EVQ_PTR_TBL_KER), + REGISTER_TABLE_BB_CZ(EVQ_PTR_TBL), + /* We can't reasonably read all of the buffer table (up to 8MB!). + * However this driver will only use a few entries. Reading + * 1K entries allows for some expansion of queue count and + * size before we need to change the version. */ + REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL_KER, FR_AA_BUF_FULL_TBL_KER, + F, A, A, 8, 1024), + REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL, FR_BZ_BUF_FULL_TBL, + F, B, Z, 8, 1024), + REGISTER_TABLE_CZ(RX_MAC_FILTER_TBL0), + REGISTER_TABLE_BB_CZ(TIMER_TBL), + REGISTER_TABLE_BB_CZ(TX_PACE_TBL), + REGISTER_TABLE_BZ(RX_INDIRECTION_TBL), + /* TX_FILTER_TBL0 is huge and not used by this driver */ + REGISTER_TABLE_CZ(TX_MAC_FILTER_TBL0), + REGISTER_TABLE_CZ(MC_TREG_SMEM), + /* MSIX_PBA_TABLE is not mapped */ + /* SRM_DBG is not mapped (and is redundant with BUF_FLL_TBL) */ + REGISTER_TABLE_BZ(RX_FILTER_TBL0), + REGISTER_TABLE_DZ(BIU_MC_SFT_STATUS), +}; + +size_t efx_nic_get_regs_len(struct efx_nic *efx) +{ + const struct efx_nic_reg *reg; + const struct efx_nic_reg_table *table; + size_t len = 0; + + for (reg = efx_nic_regs; + reg < efx_nic_regs + ARRAY_SIZE(efx_nic_regs); + reg++) + if (efx->type->revision >= reg->min_revision && + efx->type->revision <= reg->max_revision) + len += sizeof(efx_oword_t); + + for (table = efx_nic_reg_tables; + table < efx_nic_reg_tables + ARRAY_SIZE(efx_nic_reg_tables); + table++) + if (efx->type->revision >= table->min_revision && + efx->type->revision <= table->max_revision) + len += table->rows * min_t(size_t, table->step, 16); + + return len; +} + +void efx_nic_get_regs(struct efx_nic *efx, void *buf) +{ + const struct efx_nic_reg *reg; + const struct efx_nic_reg_table *table; + + for (reg = efx_nic_regs; + reg < efx_nic_regs + ARRAY_SIZE(efx_nic_regs); + reg++) { + if (efx->type->revision >= reg->min_revision && + efx->type->revision <= reg->max_revision) { + efx_reado(efx, (efx_oword_t *)buf, reg->offset); + buf += sizeof(efx_oword_t); + } + } + + for (table = efx_nic_reg_tables; + table < efx_nic_reg_tables + ARRAY_SIZE(efx_nic_reg_tables); + table++) { + size_t size, i; + + if (!(efx->type->revision >= table->min_revision && + efx->type->revision <= table->max_revision)) + continue; + + size = min_t(size_t, table->step, 16); + + for (i = 0; i < table->rows; i++) { + switch (table->step) { + case 4: /* 32-bit SRAM */ + efx_readd(efx, buf, table->offset + 4 * i); + break; + case 8: /* 64-bit SRAM */ + efx_sram_readq(efx, + efx->membase + table->offset, + buf, i); + break; + case 16: /* 128-bit-readable register */ + efx_reado_table(efx, buf, table->offset, i); + break; + case 32: /* 128-bit register, interleaved */ + efx_reado_table(efx, buf, table->offset, 2 * i); + break; + default: + WARN_ON(1); + return; + } + buf += size; + } + } +} + +/** + * efx_nic_describe_stats - Describe supported statistics for ethtool + * @desc: Array of &struct efx_hw_stat_desc describing the statistics + * @count: Length of the @desc array + * @mask: Bitmask of which elements of @desc are enabled + * @names: Buffer to copy names to, or %NULL. The names are copied + * starting at intervals of %ETH_GSTRING_LEN bytes. + * + * Returns the number of visible statistics, i.e. the number of set + * bits in the first @count bits of @mask for which a name is defined. + */ +size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count, + const unsigned long *mask, u8 *names) +{ + size_t visible = 0; + size_t index; + + for_each_set_bit(index, mask, count) { + if (desc[index].name) { + if (names) { + strlcpy(names, desc[index].name, + ETH_GSTRING_LEN); + names += ETH_GSTRING_LEN; + } + ++visible; + } + } + + return visible; +} + +/** + * efx_nic_copy_stats - Copy stats from the DMA buffer in to an + * intermediate buffer. This is used to get a consistent + * set of stats while the DMA buffer can be written at any time + * by the NIC. + * @efx: The associated NIC. + * @dest: Destination buffer. Must be the same size as the DMA buffer. + */ +int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest) +{ + __le64 *dma_stats = efx->stats_buffer.addr; + __le64 generation_start, generation_end; + int rc = 0, retry; + + if (!dest) + return 0; + + if (!dma_stats) + goto return_zeroes; + + /* If we're unlucky enough to read statistics during the DMA, wait + * up to 10ms for it to finish (typically takes <500us) + */ + for (retry = 0; retry < 100; ++retry) { + generation_end = dma_stats[efx->num_mac_stats - 1]; + if (generation_end == EFX_MC_STATS_GENERATION_INVALID) + goto return_zeroes; + rmb(); + memcpy(dest, dma_stats, efx->num_mac_stats * sizeof(__le64)); + rmb(); + generation_start = dma_stats[MC_CMD_MAC_GENERATION_START]; + if (generation_end == generation_start) + return 0; /* return good data */ + udelay(100); + } + + rc = -EIO; + +return_zeroes: + memset(dest, 0, efx->num_mac_stats * sizeof(u64)); + return rc; +} + +/** + * efx_nic_update_stats - Convert statistics DMA buffer to array of u64 + * @desc: Array of &struct efx_hw_stat_desc describing the DMA buffer + * layout. DMA widths of 0, 16, 32 and 64 are supported; where + * the width is specified as 0 the corresponding element of + * @stats is not updated. + * @count: Length of the @desc array + * @mask: Bitmask of which elements of @desc are enabled + * @stats: Buffer to update with the converted statistics. The length + * of this array must be at least @count. + * @dma_buf: DMA buffer containing hardware statistics + * @accumulate: If set, the converted values will be added rather than + * directly stored to the corresponding elements of @stats + */ +void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count, + const unsigned long *mask, + u64 *stats, const void *dma_buf, bool accumulate) +{ + size_t index; + + for_each_set_bit(index, mask, count) { + if (desc[index].dma_width) { + const void *addr = dma_buf + desc[index].offset; + u64 val; + + switch (desc[index].dma_width) { + case 16: + val = le16_to_cpup((__le16 *)addr); + break; + case 32: + val = le32_to_cpup((__le32 *)addr); + break; + case 64: + val = le64_to_cpup((__le64 *)addr); + break; + default: + WARN_ON(1); + val = 0; + break; + } + + if (accumulate) + stats[index] += val; + else + stats[index] = val; + } + } +} + +void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *rx_nodesc_drops) +{ + /* if down, or this is the first update after coming up */ + if (!(efx->net_dev->flags & IFF_UP) || !efx->rx_nodesc_drops_prev_state) + efx->rx_nodesc_drops_while_down += + *rx_nodesc_drops - efx->rx_nodesc_drops_total; + efx->rx_nodesc_drops_total = *rx_nodesc_drops; + efx->rx_nodesc_drops_prev_state = !!(efx->net_dev->flags & IFF_UP); + *rx_nodesc_drops -= efx->rx_nodesc_drops_while_down; +} diff --git a/drivers/net/ethernet/sfc/siena/nic.h b/drivers/net/ethernet/sfc/siena/nic.h new file mode 100644 index 000000000000..251868235ae4 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/nic.h @@ -0,0 +1,392 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2013 Solarflare Communications Inc. + */ + +#ifndef EFX_NIC_H +#define EFX_NIC_H + +#include "nic_common.h" +#include "efx.h" + +u32 efx_farch_fpga_ver(struct efx_nic *efx); + +enum { + PHY_TYPE_NONE = 0, + PHY_TYPE_TXC43128 = 1, + PHY_TYPE_88E1111 = 2, + PHY_TYPE_SFX7101 = 3, + PHY_TYPE_QT2022C2 = 4, + PHY_TYPE_PM8358 = 6, + PHY_TYPE_SFT9001A = 8, + PHY_TYPE_QT2025C = 9, + PHY_TYPE_SFT9001B = 10, +}; + +enum { + SIENA_STAT_tx_bytes = GENERIC_STAT_COUNT, + SIENA_STAT_tx_good_bytes, + SIENA_STAT_tx_bad_bytes, + SIENA_STAT_tx_packets, + SIENA_STAT_tx_bad, + SIENA_STAT_tx_pause, + SIENA_STAT_tx_control, + SIENA_STAT_tx_unicast, + SIENA_STAT_tx_multicast, + SIENA_STAT_tx_broadcast, + SIENA_STAT_tx_lt64, + SIENA_STAT_tx_64, + SIENA_STAT_tx_65_to_127, + SIENA_STAT_tx_128_to_255, + SIENA_STAT_tx_256_to_511, + SIENA_STAT_tx_512_to_1023, + SIENA_STAT_tx_1024_to_15xx, + SIENA_STAT_tx_15xx_to_jumbo, + SIENA_STAT_tx_gtjumbo, + SIENA_STAT_tx_collision, + SIENA_STAT_tx_single_collision, + SIENA_STAT_tx_multiple_collision, + SIENA_STAT_tx_excessive_collision, + SIENA_STAT_tx_deferred, + SIENA_STAT_tx_late_collision, + SIENA_STAT_tx_excessive_deferred, + SIENA_STAT_tx_non_tcpudp, + SIENA_STAT_tx_mac_src_error, + SIENA_STAT_tx_ip_src_error, + SIENA_STAT_rx_bytes, + SIENA_STAT_rx_good_bytes, + SIENA_STAT_rx_bad_bytes, + SIENA_STAT_rx_packets, + SIENA_STAT_rx_good, + SIENA_STAT_rx_bad, + SIENA_STAT_rx_pause, + SIENA_STAT_rx_control, + SIENA_STAT_rx_unicast, + SIENA_STAT_rx_multicast, + SIENA_STAT_rx_broadcast, + SIENA_STAT_rx_lt64, + SIENA_STAT_rx_64, + SIENA_STAT_rx_65_to_127, + SIENA_STAT_rx_128_to_255, + SIENA_STAT_rx_256_to_511, + SIENA_STAT_rx_512_to_1023, + SIENA_STAT_rx_1024_to_15xx, + SIENA_STAT_rx_15xx_to_jumbo, + SIENA_STAT_rx_gtjumbo, + SIENA_STAT_rx_bad_gtjumbo, + SIENA_STAT_rx_overflow, + SIENA_STAT_rx_false_carrier, + SIENA_STAT_rx_symbol_error, + SIENA_STAT_rx_align_error, + SIENA_STAT_rx_length_error, + SIENA_STAT_rx_internal_error, + SIENA_STAT_rx_nodesc_drop_cnt, + SIENA_STAT_COUNT +}; + +/** + * struct siena_nic_data - Siena NIC state + * @efx: Pointer back to main interface structure + * @wol_filter_id: Wake-on-LAN packet filter id + * @stats: Hardware statistics + * @vf: Array of &struct siena_vf objects + * @vf_buftbl_base: The zeroth buffer table index used to back VF queues. + * @vfdi_status: Common VFDI status page to be dmad to VF address space. + * @local_addr_list: List of local addresses. Protected by %local_lock. + * @local_page_list: List of DMA addressable pages used to broadcast + * %local_addr_list. Protected by %local_lock. + * @local_lock: Mutex protecting %local_addr_list and %local_page_list. + * @peer_work: Work item to broadcast peer addresses to VMs. + */ +struct siena_nic_data { + struct efx_nic *efx; + int wol_filter_id; + u64 stats[SIENA_STAT_COUNT]; +#ifdef CONFIG_SFC_SRIOV + struct siena_vf *vf; + struct efx_channel *vfdi_channel; + unsigned vf_buftbl_base; + struct efx_buffer vfdi_status; + struct list_head local_addr_list; + struct list_head local_page_list; + struct mutex local_lock; + struct work_struct peer_work; +#endif +}; + +enum { + EF10_STAT_port_tx_bytes = GENERIC_STAT_COUNT, + EF10_STAT_port_tx_packets, + EF10_STAT_port_tx_pause, + EF10_STAT_port_tx_control, + EF10_STAT_port_tx_unicast, + EF10_STAT_port_tx_multicast, + EF10_STAT_port_tx_broadcast, + EF10_STAT_port_tx_lt64, + EF10_STAT_port_tx_64, + EF10_STAT_port_tx_65_to_127, + EF10_STAT_port_tx_128_to_255, + EF10_STAT_port_tx_256_to_511, + EF10_STAT_port_tx_512_to_1023, + EF10_STAT_port_tx_1024_to_15xx, + EF10_STAT_port_tx_15xx_to_jumbo, + EF10_STAT_port_rx_bytes, + EF10_STAT_port_rx_bytes_minus_good_bytes, + EF10_STAT_port_rx_good_bytes, + EF10_STAT_port_rx_bad_bytes, + EF10_STAT_port_rx_packets, + EF10_STAT_port_rx_good, + EF10_STAT_port_rx_bad, + EF10_STAT_port_rx_pause, + EF10_STAT_port_rx_control, + EF10_STAT_port_rx_unicast, + EF10_STAT_port_rx_multicast, + EF10_STAT_port_rx_broadcast, + EF10_STAT_port_rx_lt64, + EF10_STAT_port_rx_64, + EF10_STAT_port_rx_65_to_127, + EF10_STAT_port_rx_128_to_255, + EF10_STAT_port_rx_256_to_511, + EF10_STAT_port_rx_512_to_1023, + EF10_STAT_port_rx_1024_to_15xx, + EF10_STAT_port_rx_15xx_to_jumbo, + EF10_STAT_port_rx_gtjumbo, + EF10_STAT_port_rx_bad_gtjumbo, + EF10_STAT_port_rx_overflow, + EF10_STAT_port_rx_align_error, + EF10_STAT_port_rx_length_error, + EF10_STAT_port_rx_nodesc_drops, + EF10_STAT_port_rx_pm_trunc_bb_overflow, + EF10_STAT_port_rx_pm_discard_bb_overflow, + EF10_STAT_port_rx_pm_trunc_vfifo_full, + EF10_STAT_port_rx_pm_discard_vfifo_full, + EF10_STAT_port_rx_pm_trunc_qbb, + EF10_STAT_port_rx_pm_discard_qbb, + EF10_STAT_port_rx_pm_discard_mapping, + EF10_STAT_port_rx_dp_q_disabled_packets, + EF10_STAT_port_rx_dp_di_dropped_packets, + EF10_STAT_port_rx_dp_streaming_packets, + EF10_STAT_port_rx_dp_hlb_fetch, + EF10_STAT_port_rx_dp_hlb_wait, + EF10_STAT_rx_unicast, + EF10_STAT_rx_unicast_bytes, + EF10_STAT_rx_multicast, + EF10_STAT_rx_multicast_bytes, + EF10_STAT_rx_broadcast, + EF10_STAT_rx_broadcast_bytes, + EF10_STAT_rx_bad, + EF10_STAT_rx_bad_bytes, + EF10_STAT_rx_overflow, + EF10_STAT_tx_unicast, + EF10_STAT_tx_unicast_bytes, + EF10_STAT_tx_multicast, + EF10_STAT_tx_multicast_bytes, + EF10_STAT_tx_broadcast, + EF10_STAT_tx_broadcast_bytes, + EF10_STAT_tx_bad, + EF10_STAT_tx_bad_bytes, + EF10_STAT_tx_overflow, + EF10_STAT_V1_COUNT, + EF10_STAT_fec_uncorrected_errors = EF10_STAT_V1_COUNT, + EF10_STAT_fec_corrected_errors, + EF10_STAT_fec_corrected_symbols_lane0, + EF10_STAT_fec_corrected_symbols_lane1, + EF10_STAT_fec_corrected_symbols_lane2, + EF10_STAT_fec_corrected_symbols_lane3, + EF10_STAT_ctpio_vi_busy_fallback, + EF10_STAT_ctpio_long_write_success, + EF10_STAT_ctpio_missing_dbell_fail, + EF10_STAT_ctpio_overflow_fail, + EF10_STAT_ctpio_underflow_fail, + EF10_STAT_ctpio_timeout_fail, + EF10_STAT_ctpio_noncontig_wr_fail, + EF10_STAT_ctpio_frm_clobber_fail, + EF10_STAT_ctpio_invalid_wr_fail, + EF10_STAT_ctpio_vi_clobber_fallback, + EF10_STAT_ctpio_unqualified_fallback, + EF10_STAT_ctpio_runt_fallback, + EF10_STAT_ctpio_success, + EF10_STAT_ctpio_fallback, + EF10_STAT_ctpio_poison, + EF10_STAT_ctpio_erase, + EF10_STAT_COUNT +}; + +/* Maximum number of TX PIO buffers we may allocate to a function. + * This matches the total number of buffers on each SFC9100-family + * controller. + */ +#define EF10_TX_PIOBUF_COUNT 16 + +/** + * struct efx_ef10_nic_data - EF10 architecture NIC state + * @mcdi_buf: DMA buffer for MCDI + * @warm_boot_count: Last seen MC warm boot count + * @vi_base: Absolute index of first VI in this function + * @n_allocated_vis: Number of VIs allocated to this function + * @n_piobufs: Number of PIO buffers allocated to this function + * @wc_membase: Base address of write-combining mapping of the memory BAR + * @pio_write_base: Base address for writing PIO buffers + * @pio_write_vi_base: Relative VI number for @pio_write_base + * @piobuf_handle: Handle of each PIO buffer allocated + * @piobuf_size: size of a single PIO buffer + * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC + * reboot + * @mc_stats: Scratch buffer for converting statistics to the kernel's format + * @stats: Hardware statistics + * @workaround_35388: Flag: firmware supports workaround for bug 35388 + * @workaround_26807: Flag: firmware supports workaround for bug 26807 + * @workaround_61265: Flag: firmware supports workaround for bug 61265 + * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated + * after MC reboot + * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of + * %MC_CMD_GET_CAPABILITIES response) + * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of + * %MC_CMD_GET_CAPABILITIES response) + * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU + * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU + * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot + * @pf_index: The number for this PF, or the parent PF if this is a VF +#ifdef CONFIG_SFC_SRIOV + * @vf: Pointer to VF data structure +#endif + * @vport_mac: The MAC address on the vport, only for PFs; VFs will be zero + * @vlan_list: List of VLANs added over the interface. Serialised by vlan_lock. + * @vlan_lock: Lock to serialize access to vlan_list. + * @udp_tunnels: UDP tunnel port numbers and types. + * @udp_tunnels_dirty: flag indicating a reboot occurred while pushing + * @udp_tunnels to hardware and thus the push must be re-done. + * @udp_tunnels_lock: Serialises writes to @udp_tunnels and @udp_tunnels_dirty. + */ +struct efx_ef10_nic_data { + struct efx_buffer mcdi_buf; + u16 warm_boot_count; + unsigned int vi_base; + unsigned int n_allocated_vis; + unsigned int n_piobufs; + void __iomem *wc_membase, *pio_write_base; + unsigned int pio_write_vi_base; + unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT]; + u16 piobuf_size; + bool must_restore_piobufs; + __le64 *mc_stats; + u64 stats[EF10_STAT_COUNT]; + bool workaround_35388; + bool workaround_26807; + bool workaround_61265; + bool must_check_datapath_caps; + u32 datapath_caps; + u32 datapath_caps2; + unsigned int rx_dpcpu_fw_id; + unsigned int tx_dpcpu_fw_id; + bool must_probe_vswitching; + unsigned int pf_index; + u8 port_id[ETH_ALEN]; +#ifdef CONFIG_SFC_SRIOV + unsigned int vf_index; + struct ef10_vf *vf; +#endif + u8 vport_mac[ETH_ALEN]; + struct list_head vlan_list; + struct mutex vlan_lock; + struct efx_udp_tunnel udp_tunnels[16]; + bool udp_tunnels_dirty; + struct mutex udp_tunnels_lock; + u64 licensed_features; +}; + +/* TSOv2 */ +int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + bool *data_mapped); + +extern const struct efx_nic_type efx_hunt_a0_nic_type; +extern const struct efx_nic_type efx_hunt_a0_vf_nic_type; + +int falcon_probe_board(struct efx_nic *efx, u16 revision_info); + +/* Falcon/Siena queue operations */ +int efx_farch_tx_probe(struct efx_tx_queue *tx_queue); +void efx_farch_tx_init(struct efx_tx_queue *tx_queue); +void efx_farch_tx_fini(struct efx_tx_queue *tx_queue); +void efx_farch_tx_remove(struct efx_tx_queue *tx_queue); +void efx_farch_tx_write(struct efx_tx_queue *tx_queue); +unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len); +int efx_farch_rx_probe(struct efx_rx_queue *rx_queue); +void efx_farch_rx_init(struct efx_rx_queue *rx_queue); +void efx_farch_rx_fini(struct efx_rx_queue *rx_queue); +void efx_farch_rx_remove(struct efx_rx_queue *rx_queue); +void efx_farch_rx_write(struct efx_rx_queue *rx_queue); +void efx_farch_rx_defer_refill(struct efx_rx_queue *rx_queue); +int efx_farch_ev_probe(struct efx_channel *channel); +int efx_farch_ev_init(struct efx_channel *channel); +void efx_farch_ev_fini(struct efx_channel *channel); +void efx_farch_ev_remove(struct efx_channel *channel); +int efx_farch_ev_process(struct efx_channel *channel, int quota); +void efx_farch_ev_read_ack(struct efx_channel *channel); +void efx_farch_ev_test_generate(struct efx_channel *channel); + +/* Falcon/Siena filter operations */ +int efx_farch_filter_table_probe(struct efx_nic *efx); +void efx_farch_filter_table_restore(struct efx_nic *efx); +void efx_farch_filter_table_remove(struct efx_nic *efx); +void efx_farch_filter_update_rx_scatter(struct efx_nic *efx); +s32 efx_farch_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec, + bool replace); +int efx_farch_filter_remove_safe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id); +int efx_farch_filter_get_safe(struct efx_nic *efx, + enum efx_filter_priority priority, u32 filter_id, + struct efx_filter_spec *); +int efx_farch_filter_clear_rx(struct efx_nic *efx, + enum efx_filter_priority priority); +u32 efx_farch_filter_count_rx_used(struct efx_nic *efx, + enum efx_filter_priority priority); +u32 efx_farch_filter_get_rx_id_limit(struct efx_nic *efx); +s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx, + enum efx_filter_priority priority, u32 *buf, + u32 size); +#ifdef CONFIG_RFS_ACCEL +bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, + unsigned int index); +#endif +void efx_farch_filter_sync_rx_mode(struct efx_nic *efx); + +/* Falcon/Siena interrupts */ +void efx_farch_irq_enable_master(struct efx_nic *efx); +int efx_farch_irq_test_generate(struct efx_nic *efx); +void efx_farch_irq_disable_master(struct efx_nic *efx); +irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id); +irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id); +irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx); + +/* Global Resources */ +void siena_prepare_flush(struct efx_nic *efx); +int efx_farch_fini_dmaq(struct efx_nic *efx); +void efx_farch_finish_flr(struct efx_nic *efx); +void siena_finish_flush(struct efx_nic *efx); +void falcon_start_nic_stats(struct efx_nic *efx); +void falcon_stop_nic_stats(struct efx_nic *efx); +int falcon_reset_xaui(struct efx_nic *efx); +void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw); +void efx_farch_init_common(struct efx_nic *efx); +void efx_farch_rx_push_indir_table(struct efx_nic *efx); +void efx_farch_rx_pull_indir_table(struct efx_nic *efx); + +/* Tests */ +struct efx_farch_register_test { + unsigned address; + efx_oword_t mask; +}; + +int efx_farch_test_registers(struct efx_nic *efx, + const struct efx_farch_register_test *regs, + size_t n_regs); + +void efx_farch_generate_event(struct efx_nic *efx, unsigned int evq, + efx_qword_t *event); + +#endif /* EFX_NIC_H */ diff --git a/drivers/net/ethernet/sfc/siena/nic_common.h b/drivers/net/ethernet/sfc/siena/nic_common.h new file mode 100644 index 000000000000..0cef35c0c559 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/nic_common.h @@ -0,0 +1,262 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2013 Solarflare Communications Inc. + * Copyright 2019-2020 Xilinx Inc. + */ + +#ifndef EFX_NIC_COMMON_H +#define EFX_NIC_COMMON_H + +#include "net_driver.h" +#include "efx_common.h" +#include "mcdi.h" +#include "ptp.h" + +enum { + /* Revisions 0-2 were Falcon A0, A1 and B0 respectively. + * They are not supported by this driver but these revision numbers + * form part of the ethtool API for register dumping. + */ + EFX_REV_SIENA_A0 = 3, + EFX_REV_HUNT_A0 = 4, + EFX_REV_EF100 = 5, +}; + +static inline int efx_nic_rev(struct efx_nic *efx) +{ + return efx->type->revision; +} + +/* Read the current event from the event queue */ +static inline efx_qword_t *efx_event(struct efx_channel *channel, + unsigned int index) +{ + return ((efx_qword_t *) (channel->eventq.buf.addr)) + + (index & channel->eventq_mask); +} + +/* See if an event is present + * + * We check both the high and low dword of the event for all ones. We + * wrote all ones when we cleared the event, and no valid event can + * have all ones in either its high or low dwords. This approach is + * robust against reordering. + * + * Note that using a single 64-bit comparison is incorrect; even + * though the CPU read will be atomic, the DMA write may not be. + */ +static inline int efx_event_present(efx_qword_t *event) +{ + return !(EFX_DWORD_IS_ALL_ONES(event->dword[0]) | + EFX_DWORD_IS_ALL_ONES(event->dword[1])); +} + +/* Returns a pointer to the specified transmit descriptor in the TX + * descriptor queue belonging to the specified channel. + */ +static inline efx_qword_t * +efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) +{ + return ((efx_qword_t *) (tx_queue->txd.buf.addr)) + index; +} + +/* Report whether this TX queue would be empty for the given write_count. + * May return false negative. + */ +static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned int write_count) +{ + unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count); + + if (empty_read_count == 0) + return false; + + return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0; +} + +int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + bool *data_mapped); + +/* Decide whether to push a TX descriptor to the NIC vs merely writing + * the doorbell. This can reduce latency when we are adding a single + * descriptor to an empty queue, but is otherwise pointless. Further, + * Falcon and Siena have hardware bugs (SF bug 33851) that may be + * triggered if we don't check this. + * We use the write_count used for the last doorbell push, to get the + * NIC's view of the tx queue. + */ +static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue, + unsigned int write_count) +{ + bool was_empty = efx_nic_tx_is_empty(tx_queue, write_count); + + tx_queue->empty_read_count = 0; + return was_empty && tx_queue->write_count - write_count == 1; +} + +/* Returns a pointer to the specified descriptor in the RX descriptor queue */ +static inline efx_qword_t * +efx_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index) +{ + return ((efx_qword_t *) (rx_queue->rxd.buf.addr)) + index; +} + +/* Alignment of PCIe DMA boundaries (4KB) */ +#define EFX_PAGE_SIZE 4096 +/* Size and alignment of buffer table entries (same) */ +#define EFX_BUF_SIZE EFX_PAGE_SIZE + +/* NIC-generic software stats */ +enum { + GENERIC_STAT_rx_noskb_drops, + GENERIC_STAT_rx_nodesc_trunc, + GENERIC_STAT_COUNT +}; + +#define EFX_GENERIC_SW_STAT(ext_name) \ + [GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 } + +/* TX data path */ +static inline int efx_nic_probe_tx(struct efx_tx_queue *tx_queue) +{ + return tx_queue->efx->type->tx_probe(tx_queue); +} +static inline void efx_nic_init_tx(struct efx_tx_queue *tx_queue) +{ + tx_queue->efx->type->tx_init(tx_queue); +} +static inline void efx_nic_remove_tx(struct efx_tx_queue *tx_queue) +{ + if (tx_queue->efx->type->tx_remove) + tx_queue->efx->type->tx_remove(tx_queue); +} +static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) +{ + tx_queue->efx->type->tx_write(tx_queue); +} + +/* RX data path */ +static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue) +{ + return rx_queue->efx->type->rx_probe(rx_queue); +} +static inline void efx_nic_init_rx(struct efx_rx_queue *rx_queue) +{ + rx_queue->efx->type->rx_init(rx_queue); +} +static inline void efx_nic_remove_rx(struct efx_rx_queue *rx_queue) +{ + rx_queue->efx->type->rx_remove(rx_queue); +} +static inline void efx_nic_notify_rx_desc(struct efx_rx_queue *rx_queue) +{ + rx_queue->efx->type->rx_write(rx_queue); +} +static inline void efx_nic_generate_fill_event(struct efx_rx_queue *rx_queue) +{ + rx_queue->efx->type->rx_defer_refill(rx_queue); +} + +/* Event data path */ +static inline int efx_nic_probe_eventq(struct efx_channel *channel) +{ + return channel->efx->type->ev_probe(channel); +} +static inline int efx_nic_init_eventq(struct efx_channel *channel) +{ + return channel->efx->type->ev_init(channel); +} +static inline void efx_nic_fini_eventq(struct efx_channel *channel) +{ + channel->efx->type->ev_fini(channel); +} +static inline void efx_nic_remove_eventq(struct efx_channel *channel) +{ + channel->efx->type->ev_remove(channel); +} +static inline int +efx_nic_process_eventq(struct efx_channel *channel, int quota) +{ + return channel->efx->type->ev_process(channel, quota); +} +static inline void efx_nic_eventq_read_ack(struct efx_channel *channel) +{ + channel->efx->type->ev_read_ack(channel); +} + +void efx_nic_event_test_start(struct efx_channel *channel); + +bool efx_nic_event_present(struct efx_channel *channel); + +static inline void efx_sensor_event(struct efx_nic *efx, efx_qword_t *ev) +{ + if (efx->type->sensor_event) + efx->type->sensor_event(efx, ev); +} + +static inline unsigned int efx_rx_recycle_ring_size(const struct efx_nic *efx) +{ + return efx->type->rx_recycle_ring_size(efx); +} + +/* Some statistics are computed as A - B where A and B each increase + * linearly with some hardware counter(s) and the counters are read + * asynchronously. If the counters contributing to B are always read + * after those contributing to A, the computed value may be lower than + * the true value by some variable amount, and may decrease between + * subsequent computations. + * + * We should never allow statistics to decrease or to exceed the true + * value. Since the computed value will never be greater than the + * true value, we can achieve this by only storing the computed value + * when it increases. + */ +static inline void efx_update_diff_stat(u64 *stat, u64 diff) +{ + if ((s64)(diff - *stat) > 0) + *stat = diff; +} + +/* Interrupts */ +int efx_nic_init_interrupt(struct efx_nic *efx); +int efx_nic_irq_test_start(struct efx_nic *efx); +void efx_nic_fini_interrupt(struct efx_nic *efx); + +static inline int efx_nic_event_test_irq_cpu(struct efx_channel *channel) +{ + return READ_ONCE(channel->event_test_cpu); +} +static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx) +{ + return READ_ONCE(efx->last_irq_cpu); +} + +/* Global Resources */ +int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer, + unsigned int len, gfp_t gfp_flags); +void efx_nic_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer); + +size_t efx_nic_get_regs_len(struct efx_nic *efx); +void efx_nic_get_regs(struct efx_nic *efx, void *buf); + +#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1)) + +size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count, + const unsigned long *mask, u8 *names); +int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest); +void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count, + const unsigned long *mask, u64 *stats, + const void *dma_buf, bool accumulate); +void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *stat); +static inline size_t efx_nic_update_stats_atomic(struct efx_nic *efx, u64 *full_stats, + struct rtnl_link_stats64 *core_stats) +{ + if (efx->type->update_stats_atomic) + return efx->type->update_stats_atomic(efx, full_stats, core_stats); + return efx->type->update_stats(efx, full_stats, core_stats); +} + +#define EFX_MAX_FLUSH_TIME 5000 + +#endif /* EFX_NIC_COMMON_H */ diff --git a/drivers/net/ethernet/sfc/siena/ptp.c b/drivers/net/ethernet/sfc/siena/ptp.c new file mode 100644 index 000000000000..f0ef515e2ade --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/ptp.c @@ -0,0 +1,2210 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2011-2013 Solarflare Communications Inc. + */ + +/* Theory of operation: + * + * PTP support is assisted by firmware running on the MC, which provides + * the hardware timestamping capabilities. Both transmitted and received + * PTP event packets are queued onto internal queues for subsequent processing; + * this is because the MC operations are relatively long and would block + * block NAPI/interrupt operation. + * + * Receive event processing: + * The event contains the packet's UUID and sequence number, together + * with the hardware timestamp. The PTP receive packet queue is searched + * for this UUID/sequence number and, if found, put on a pending queue. + * Packets not matching are delivered without timestamps (MCDI events will + * always arrive after the actual packet). + * It is important for the operation of the PTP protocol that the ordering + * of packets between the event and general port is maintained. + * + * Work queue processing: + * If work waiting, synchronise host/hardware time + * + * Transmit: send packet through MC, which returns the transmission time + * that is converted to an appropriate timestamp. + * + * Receive: the packet's reception time is converted to an appropriate + * timestamp. + */ +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/time.h> +#include <linux/ktime.h> +#include <linux/module.h> +#include <linux/pps_kernel.h> +#include <linux/ptp_clock_kernel.h> +#include "net_driver.h" +#include "efx.h" +#include "mcdi.h" +#include "mcdi_pcol.h" +#include "io.h" +#include "farch_regs.h" +#include "tx.h" +#include "nic.h" /* indirectly includes ptp.h */ + +/* Maximum number of events expected to make up a PTP event */ +#define MAX_EVENT_FRAGS 3 + +/* Maximum delay, ms, to begin synchronisation */ +#define MAX_SYNCHRONISE_WAIT_MS 2 + +/* How long, at most, to spend synchronising */ +#define SYNCHRONISE_PERIOD_NS 250000 + +/* How often to update the shared memory time */ +#define SYNCHRONISATION_GRANULARITY_NS 200 + +/* Minimum permitted length of a (corrected) synchronisation time */ +#define DEFAULT_MIN_SYNCHRONISATION_NS 120 + +/* Maximum permitted length of a (corrected) synchronisation time */ +#define MAX_SYNCHRONISATION_NS 1000 + +/* How many (MC) receive events that can be queued */ +#define MAX_RECEIVE_EVENTS 8 + +/* Length of (modified) moving average. */ +#define AVERAGE_LENGTH 16 + +/* How long an unmatched event or packet can be held */ +#define PKT_EVENT_LIFETIME_MS 10 + +/* Offsets into PTP packet for identification. These offsets are from the + * start of the IP header, not the MAC header. Note that neither PTP V1 nor + * PTP V2 permit the use of IPV4 options. + */ +#define PTP_DPORT_OFFSET 22 + +#define PTP_V1_VERSION_LENGTH 2 +#define PTP_V1_VERSION_OFFSET 28 + +#define PTP_V1_UUID_LENGTH 6 +#define PTP_V1_UUID_OFFSET 50 + +#define PTP_V1_SEQUENCE_LENGTH 2 +#define PTP_V1_SEQUENCE_OFFSET 58 + +/* The minimum length of a PTP V1 packet for offsets, etc. to be valid: + * includes IP header. + */ +#define PTP_V1_MIN_LENGTH 64 + +#define PTP_V2_VERSION_LENGTH 1 +#define PTP_V2_VERSION_OFFSET 29 + +#define PTP_V2_UUID_LENGTH 8 +#define PTP_V2_UUID_OFFSET 48 + +/* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2), + * the MC only captures the last six bytes of the clock identity. These values + * reflect those, not the ones used in the standard. The standard permits + * mapping of V1 UUIDs to V2 UUIDs with these same values. + */ +#define PTP_V2_MC_UUID_LENGTH 6 +#define PTP_V2_MC_UUID_OFFSET 50 + +#define PTP_V2_SEQUENCE_LENGTH 2 +#define PTP_V2_SEQUENCE_OFFSET 58 + +/* The minimum length of a PTP V2 packet for offsets, etc. to be valid: + * includes IP header. + */ +#define PTP_V2_MIN_LENGTH 63 + +#define PTP_MIN_LENGTH 63 + +#define PTP_ADDRESS 0xe0000181 /* 224.0.1.129 */ +#define PTP_EVENT_PORT 319 +#define PTP_GENERAL_PORT 320 + +/* Annoyingly the format of the version numbers are different between + * versions 1 and 2 so it isn't possible to simply look for 1 or 2. + */ +#define PTP_VERSION_V1 1 + +#define PTP_VERSION_V2 2 +#define PTP_VERSION_V2_MASK 0x0f + +enum ptp_packet_state { + PTP_PACKET_STATE_UNMATCHED = 0, + PTP_PACKET_STATE_MATCHED, + PTP_PACKET_STATE_TIMED_OUT, + PTP_PACKET_STATE_MATCH_UNWANTED +}; + +/* NIC synchronised with single word of time only comprising + * partial seconds and full nanoseconds: 10^9 ~ 2^30 so 2 bits for seconds. + */ +#define MC_NANOSECOND_BITS 30 +#define MC_NANOSECOND_MASK ((1 << MC_NANOSECOND_BITS) - 1) +#define MC_SECOND_MASK ((1 << (32 - MC_NANOSECOND_BITS)) - 1) + +/* Maximum parts-per-billion adjustment that is acceptable */ +#define MAX_PPB 1000000 + +/* Precalculate scale word to avoid long long division at runtime */ +/* This is equivalent to 2^66 / 10^9. */ +#define PPB_SCALE_WORD ((1LL << (57)) / 1953125LL) + +/* How much to shift down after scaling to convert to FP40 */ +#define PPB_SHIFT_FP40 26 +/* ... and FP44. */ +#define PPB_SHIFT_FP44 22 + +#define PTP_SYNC_ATTEMPTS 4 + +/** + * struct efx_ptp_match - Matching structure, stored in sk_buff's cb area. + * @words: UUID and (partial) sequence number + * @expiry: Time after which the packet should be delivered irrespective of + * event arrival. + * @state: The state of the packet - whether it is ready for processing or + * whether that is of no interest. + */ +struct efx_ptp_match { + u32 words[DIV_ROUND_UP(PTP_V1_UUID_LENGTH, 4)]; + unsigned long expiry; + enum ptp_packet_state state; +}; + +/** + * struct efx_ptp_event_rx - A PTP receive event (from MC) + * @link: list of events + * @seq0: First part of (PTP) UUID + * @seq1: Second part of (PTP) UUID and sequence number + * @hwtimestamp: Event timestamp + * @expiry: Time which the packet arrived + */ +struct efx_ptp_event_rx { + struct list_head link; + u32 seq0; + u32 seq1; + ktime_t hwtimestamp; + unsigned long expiry; +}; + +/** + * struct efx_ptp_timeset - Synchronisation between host and MC + * @host_start: Host time immediately before hardware timestamp taken + * @major: Hardware timestamp, major + * @minor: Hardware timestamp, minor + * @host_end: Host time immediately after hardware timestamp taken + * @wait: Number of NIC clock ticks between hardware timestamp being read and + * host end time being seen + * @window: Difference of host_end and host_start + * @valid: Whether this timeset is valid + */ +struct efx_ptp_timeset { + u32 host_start; + u32 major; + u32 minor; + u32 host_end; + u32 wait; + u32 window; /* Derived: end - start, allowing for wrap */ +}; + +/** + * struct efx_ptp_data - Precision Time Protocol (PTP) state + * @efx: The NIC context + * @channel: The PTP channel (Siena only) + * @rx_ts_inline: Flag for whether RX timestamps are inline (else they are + * separate events) + * @rxq: Receive SKB queue (awaiting timestamps) + * @txq: Transmit SKB queue + * @evt_list: List of MC receive events awaiting packets + * @evt_free_list: List of free events + * @evt_lock: Lock for manipulating evt_list and evt_free_list + * @rx_evts: Instantiated events (on evt_list and evt_free_list) + * @workwq: Work queue for processing pending PTP operations + * @work: Work task + * @reset_required: A serious error has occurred and the PTP task needs to be + * reset (disable, enable). + * @rxfilter_event: Receive filter when operating + * @rxfilter_general: Receive filter when operating + * @rxfilter_installed: Receive filter installed + * @config: Current timestamp configuration + * @enabled: PTP operation enabled + * @mode: Mode in which PTP operating (PTP version) + * @ns_to_nic_time: Function to convert from scalar nanoseconds to NIC time + * @nic_to_kernel_time: Function to convert from NIC to kernel time + * @nic_time: contains time details + * @nic_time.minor_max: Wrap point for NIC minor times + * @nic_time.sync_event_diff_min: Minimum acceptable difference between time + * in packet prefix and last MCDI time sync event i.e. how much earlier than + * the last sync event time a packet timestamp can be. + * @nic_time.sync_event_diff_max: Maximum acceptable difference between time + * in packet prefix and last MCDI time sync event i.e. how much later than + * the last sync event time a packet timestamp can be. + * @nic_time.sync_event_minor_shift: Shift required to make minor time from + * field in MCDI time sync event. + * @min_synchronisation_ns: Minimum acceptable corrected sync window + * @capabilities: Capabilities flags from the NIC + * @ts_corrections: contains corrections details + * @ts_corrections.ptp_tx: Required driver correction of PTP packet transmit + * timestamps + * @ts_corrections.ptp_rx: Required driver correction of PTP packet receive + * timestamps + * @ts_corrections.pps_out: PPS output error (information only) + * @ts_corrections.pps_in: Required driver correction of PPS input timestamps + * @ts_corrections.general_tx: Required driver correction of general packet + * transmit timestamps + * @ts_corrections.general_rx: Required driver correction of general packet + * receive timestamps + * @evt_frags: Partly assembled PTP events + * @evt_frag_idx: Current fragment number + * @evt_code: Last event code + * @start: Address at which MC indicates ready for synchronisation + * @host_time_pps: Host time at last PPS + * @adjfreq_ppb_shift: Shift required to convert scaled parts-per-billion + * frequency adjustment into a fixed point fractional nanosecond format. + * @current_adjfreq: Current ppb adjustment. + * @phc_clock: Pointer to registered phc device (if primary function) + * @phc_clock_info: Registration structure for phc device + * @pps_work: pps work task for handling pps events + * @pps_workwq: pps work queue + * @nic_ts_enabled: Flag indicating if NIC generated TS events are handled + * @txbuf: Buffer for use when transmitting (PTP) packets to MC (avoids + * allocations in main data path). + * @good_syncs: Number of successful synchronisations. + * @fast_syncs: Number of synchronisations requiring short delay + * @bad_syncs: Number of failed synchronisations. + * @sync_timeouts: Number of synchronisation timeouts + * @no_time_syncs: Number of synchronisations with no good times. + * @invalid_sync_windows: Number of sync windows with bad durations. + * @undersize_sync_windows: Number of corrected sync windows that are too small + * @oversize_sync_windows: Number of corrected sync windows that are too large + * @rx_no_timestamp: Number of packets received without a timestamp. + * @timeset: Last set of synchronisation statistics. + * @xmit_skb: Transmit SKB function. + */ +struct efx_ptp_data { + struct efx_nic *efx; + struct efx_channel *channel; + bool rx_ts_inline; + struct sk_buff_head rxq; + struct sk_buff_head txq; + struct list_head evt_list; + struct list_head evt_free_list; + spinlock_t evt_lock; + struct efx_ptp_event_rx rx_evts[MAX_RECEIVE_EVENTS]; + struct workqueue_struct *workwq; + struct work_struct work; + bool reset_required; + u32 rxfilter_event; + u32 rxfilter_general; + bool rxfilter_installed; + struct hwtstamp_config config; + bool enabled; + unsigned int mode; + void (*ns_to_nic_time)(s64 ns, u32 *nic_major, u32 *nic_minor); + ktime_t (*nic_to_kernel_time)(u32 nic_major, u32 nic_minor, + s32 correction); + struct { + u32 minor_max; + u32 sync_event_diff_min; + u32 sync_event_diff_max; + unsigned int sync_event_minor_shift; + } nic_time; + unsigned int min_synchronisation_ns; + unsigned int capabilities; + struct { + s32 ptp_tx; + s32 ptp_rx; + s32 pps_out; + s32 pps_in; + s32 general_tx; + s32 general_rx; + } ts_corrections; + efx_qword_t evt_frags[MAX_EVENT_FRAGS]; + int evt_frag_idx; + int evt_code; + struct efx_buffer start; + struct pps_event_time host_time_pps; + unsigned int adjfreq_ppb_shift; + s64 current_adjfreq; + struct ptp_clock *phc_clock; + struct ptp_clock_info phc_clock_info; + struct work_struct pps_work; + struct workqueue_struct *pps_workwq; + bool nic_ts_enabled; + efx_dword_t txbuf[MCDI_TX_BUF_LEN(MC_CMD_PTP_IN_TRANSMIT_LENMAX)]; + + unsigned int good_syncs; + unsigned int fast_syncs; + unsigned int bad_syncs; + unsigned int sync_timeouts; + unsigned int no_time_syncs; + unsigned int invalid_sync_windows; + unsigned int undersize_sync_windows; + unsigned int oversize_sync_windows; + unsigned int rx_no_timestamp; + struct efx_ptp_timeset + timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM]; + void (*xmit_skb)(struct efx_nic *efx, struct sk_buff *skb); +}; + +static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta); +static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta); +static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts); +static int efx_phc_settime(struct ptp_clock_info *ptp, + const struct timespec64 *e_ts); +static int efx_phc_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, int on); + +bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx) +{ + return efx_has_cap(efx, TX_MAC_TIMESTAMPING); +} + +/* PTP 'extra' channel is still a traffic channel, but we only create TX queues + * if PTP uses MAC TX timestamps, not if PTP uses the MC directly to transmit. + */ +static bool efx_ptp_want_txqs(struct efx_channel *channel) +{ + return efx_ptp_use_mac_tx_timestamps(channel->efx); +} + +#define PTP_SW_STAT(ext_name, field_name) \ + { #ext_name, 0, offsetof(struct efx_ptp_data, field_name) } +#define PTP_MC_STAT(ext_name, mcdi_name) \ + { #ext_name, 32, MC_CMD_PTP_OUT_STATUS_STATS_ ## mcdi_name ## _OFST } +static const struct efx_hw_stat_desc efx_ptp_stat_desc[] = { + PTP_SW_STAT(ptp_good_syncs, good_syncs), + PTP_SW_STAT(ptp_fast_syncs, fast_syncs), + PTP_SW_STAT(ptp_bad_syncs, bad_syncs), + PTP_SW_STAT(ptp_sync_timeouts, sync_timeouts), + PTP_SW_STAT(ptp_no_time_syncs, no_time_syncs), + PTP_SW_STAT(ptp_invalid_sync_windows, invalid_sync_windows), + PTP_SW_STAT(ptp_undersize_sync_windows, undersize_sync_windows), + PTP_SW_STAT(ptp_oversize_sync_windows, oversize_sync_windows), + PTP_SW_STAT(ptp_rx_no_timestamp, rx_no_timestamp), + PTP_MC_STAT(ptp_tx_timestamp_packets, TX), + PTP_MC_STAT(ptp_rx_timestamp_packets, RX), + PTP_MC_STAT(ptp_timestamp_packets, TS), + PTP_MC_STAT(ptp_filter_matches, FM), + PTP_MC_STAT(ptp_non_filter_matches, NFM), +}; +#define PTP_STAT_COUNT ARRAY_SIZE(efx_ptp_stat_desc) +static const unsigned long efx_ptp_stat_mask[] = { + [0 ... BITS_TO_LONGS(PTP_STAT_COUNT) - 1] = ~0UL, +}; + +size_t efx_ptp_describe_stats(struct efx_nic *efx, u8 *strings) +{ + if (!efx->ptp_data) + return 0; + + return efx_nic_describe_stats(efx_ptp_stat_desc, PTP_STAT_COUNT, + efx_ptp_stat_mask, strings); +} + +size_t efx_ptp_update_stats(struct efx_nic *efx, u64 *stats) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_STATUS_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_STATUS_LEN); + size_t i; + int rc; + + if (!efx->ptp_data) + return 0; + + /* Copy software statistics */ + for (i = 0; i < PTP_STAT_COUNT; i++) { + if (efx_ptp_stat_desc[i].dma_width) + continue; + stats[i] = *(unsigned int *)((char *)efx->ptp_data + + efx_ptp_stat_desc[i].offset); + } + + /* Fetch MC statistics. We *must* fill in all statistics or + * risk leaking kernel memory to userland, so if the MCDI + * request fails we pretend we got zeroes. + */ + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_STATUS); + MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + if (rc) + memset(outbuf, 0, sizeof(outbuf)); + efx_nic_update_stats(efx_ptp_stat_desc, PTP_STAT_COUNT, + efx_ptp_stat_mask, + stats, _MCDI_PTR(outbuf, 0), false); + + return PTP_STAT_COUNT; +} + +/* For Siena platforms NIC time is s and ns */ +static void efx_ptp_ns_to_s_ns(s64 ns, u32 *nic_major, u32 *nic_minor) +{ + struct timespec64 ts = ns_to_timespec64(ns); + *nic_major = (u32)ts.tv_sec; + *nic_minor = ts.tv_nsec; +} + +static ktime_t efx_ptp_s_ns_to_ktime_correction(u32 nic_major, u32 nic_minor, + s32 correction) +{ + ktime_t kt = ktime_set(nic_major, nic_minor); + if (correction >= 0) + kt = ktime_add_ns(kt, (u64)correction); + else + kt = ktime_sub_ns(kt, (u64)-correction); + return kt; +} + +/* To convert from s27 format to ns we multiply then divide by a power of 2. + * For the conversion from ns to s27, the operation is also converted to a + * multiply and shift. + */ +#define S27_TO_NS_SHIFT (27) +#define NS_TO_S27_MULT (((1ULL << 63) + NSEC_PER_SEC / 2) / NSEC_PER_SEC) +#define NS_TO_S27_SHIFT (63 - S27_TO_NS_SHIFT) +#define S27_MINOR_MAX (1 << S27_TO_NS_SHIFT) + +/* For Huntington platforms NIC time is in seconds and fractions of a second + * where the minor register only uses 27 bits in units of 2^-27s. + */ +static void efx_ptp_ns_to_s27(s64 ns, u32 *nic_major, u32 *nic_minor) +{ + struct timespec64 ts = ns_to_timespec64(ns); + u32 maj = (u32)ts.tv_sec; + u32 min = (u32)(((u64)ts.tv_nsec * NS_TO_S27_MULT + + (1ULL << (NS_TO_S27_SHIFT - 1))) >> NS_TO_S27_SHIFT); + + /* The conversion can result in the minor value exceeding the maximum. + * In this case, round up to the next second. + */ + if (min >= S27_MINOR_MAX) { + min -= S27_MINOR_MAX; + maj++; + } + + *nic_major = maj; + *nic_minor = min; +} + +static inline ktime_t efx_ptp_s27_to_ktime(u32 nic_major, u32 nic_minor) +{ + u32 ns = (u32)(((u64)nic_minor * NSEC_PER_SEC + + (1ULL << (S27_TO_NS_SHIFT - 1))) >> S27_TO_NS_SHIFT); + return ktime_set(nic_major, ns); +} + +static ktime_t efx_ptp_s27_to_ktime_correction(u32 nic_major, u32 nic_minor, + s32 correction) +{ + /* Apply the correction and deal with carry */ + nic_minor += correction; + if ((s32)nic_minor < 0) { + nic_minor += S27_MINOR_MAX; + nic_major--; + } else if (nic_minor >= S27_MINOR_MAX) { + nic_minor -= S27_MINOR_MAX; + nic_major++; + } + + return efx_ptp_s27_to_ktime(nic_major, nic_minor); +} + +/* For Medford2 platforms the time is in seconds and quarter nanoseconds. */ +static void efx_ptp_ns_to_s_qns(s64 ns, u32 *nic_major, u32 *nic_minor) +{ + struct timespec64 ts = ns_to_timespec64(ns); + + *nic_major = (u32)ts.tv_sec; + *nic_minor = ts.tv_nsec * 4; +} + +static ktime_t efx_ptp_s_qns_to_ktime_correction(u32 nic_major, u32 nic_minor, + s32 correction) +{ + ktime_t kt; + + nic_minor = DIV_ROUND_CLOSEST(nic_minor, 4); + correction = DIV_ROUND_CLOSEST(correction, 4); + + kt = ktime_set(nic_major, nic_minor); + + if (correction >= 0) + kt = ktime_add_ns(kt, (u64)correction); + else + kt = ktime_sub_ns(kt, (u64)-correction); + return kt; +} + +struct efx_channel *efx_ptp_channel(struct efx_nic *efx) +{ + return efx->ptp_data ? efx->ptp_data->channel : NULL; +} + +static u32 last_sync_timestamp_major(struct efx_nic *efx) +{ + struct efx_channel *channel = efx_ptp_channel(efx); + u32 major = 0; + + if (channel) + major = channel->sync_timestamp_major; + return major; +} + +/* The 8000 series and later can provide the time from the MAC, which is only + * 48 bits long and provides meta-information in the top 2 bits. + */ +static ktime_t +efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx, + struct efx_ptp_data *ptp, + u32 nic_major, u32 nic_minor, + s32 correction) +{ + u32 sync_timestamp; + ktime_t kt = { 0 }; + s16 delta; + + if (!(nic_major & 0x80000000)) { + WARN_ON_ONCE(nic_major >> 16); + + /* Medford provides 48 bits of timestamp, so we must get the top + * 16 bits from the timesync event state. + * + * We only have the lower 16 bits of the time now, but we do + * have a full resolution timestamp at some point in past. As + * long as the difference between the (real) now and the sync + * is less than 2^15, then we can reconstruct the difference + * between those two numbers using only the lower 16 bits of + * each. + * + * Put another way + * + * a - b = ((a mod k) - b) mod k + * + * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know + * (a mod k) and b, so can calculate the delta, a - b. + * + */ + sync_timestamp = last_sync_timestamp_major(efx); + + /* Because delta is s16 this does an implicit mask down to + * 16 bits which is what we need, assuming + * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that + * we can deal with the (unlikely) case of sync timestamps + * arriving from the future. + */ + delta = nic_major - sync_timestamp; + + /* Recover the fully specified time now, by applying the offset + * to the (fully specified) sync time. + */ + nic_major = sync_timestamp + delta; + + kt = ptp->nic_to_kernel_time(nic_major, nic_minor, + correction); + } + return kt; +} + +ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + ktime_t kt; + + if (efx_ptp_use_mac_tx_timestamps(efx)) + kt = efx_ptp_mac_nic_to_ktime_correction(efx, ptp, + tx_queue->completed_timestamp_major, + tx_queue->completed_timestamp_minor, + ptp->ts_corrections.general_tx); + else + kt = ptp->nic_to_kernel_time( + tx_queue->completed_timestamp_major, + tx_queue->completed_timestamp_minor, + ptp->ts_corrections.general_tx); + return kt; +} + +/* Get PTP attributes and set up time conversions */ +static int efx_ptp_get_attributes(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_ATTRIBUTES_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN); + struct efx_ptp_data *ptp = efx->ptp_data; + int rc; + u32 fmt; + size_t out_len; + + /* Get the PTP attributes. If the NIC doesn't support the operation we + * use the default format for compatibility with older NICs i.e. + * seconds and nanoseconds. + */ + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_GET_ATTRIBUTES); + MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &out_len); + if (rc == 0) { + fmt = MCDI_DWORD(outbuf, PTP_OUT_GET_ATTRIBUTES_TIME_FORMAT); + } else if (rc == -EINVAL) { + fmt = MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS; + } else if (rc == -EPERM) { + pci_info(efx->pci_dev, "no PTP support\n"); + return rc; + } else { + efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), + outbuf, sizeof(outbuf), rc); + return rc; + } + + switch (fmt) { + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION: + ptp->ns_to_nic_time = efx_ptp_ns_to_s27; + ptp->nic_to_kernel_time = efx_ptp_s27_to_ktime_correction; + ptp->nic_time.minor_max = 1 << 27; + ptp->nic_time.sync_event_minor_shift = 19; + break; + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS: + ptp->ns_to_nic_time = efx_ptp_ns_to_s_ns; + ptp->nic_to_kernel_time = efx_ptp_s_ns_to_ktime_correction; + ptp->nic_time.minor_max = 1000000000; + ptp->nic_time.sync_event_minor_shift = 22; + break; + case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_QTR_NANOSECONDS: + ptp->ns_to_nic_time = efx_ptp_ns_to_s_qns; + ptp->nic_to_kernel_time = efx_ptp_s_qns_to_ktime_correction; + ptp->nic_time.minor_max = 4000000000UL; + ptp->nic_time.sync_event_minor_shift = 24; + break; + default: + return -ERANGE; + } + + /* Precalculate acceptable difference between the minor time in the + * packet prefix and the last MCDI time sync event. We expect the + * packet prefix timestamp to be after of sync event by up to one + * sync event interval (0.25s) but we allow it to exceed this by a + * fuzz factor of (0.1s) + */ + ptp->nic_time.sync_event_diff_min = ptp->nic_time.minor_max + - (ptp->nic_time.minor_max / 10); + ptp->nic_time.sync_event_diff_max = (ptp->nic_time.minor_max / 4) + + (ptp->nic_time.minor_max / 10); + + /* MC_CMD_PTP_OP_GET_ATTRIBUTES has been extended twice from an older + * operation MC_CMD_PTP_OP_GET_TIME_FORMAT. The function now may return + * a value to use for the minimum acceptable corrected synchronization + * window and may return further capabilities. + * If we have the extra information store it. For older firmware that + * does not implement the extended command use the default value. + */ + if (rc == 0 && + out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST) + ptp->min_synchronisation_ns = + MCDI_DWORD(outbuf, + PTP_OUT_GET_ATTRIBUTES_SYNC_WINDOW_MIN); + else + ptp->min_synchronisation_ns = DEFAULT_MIN_SYNCHRONISATION_NS; + + if (rc == 0 && + out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN) + ptp->capabilities = MCDI_DWORD(outbuf, + PTP_OUT_GET_ATTRIBUTES_CAPABILITIES); + else + ptp->capabilities = 0; + + /* Set up the shift for conversion between frequency + * adjustments in parts-per-billion and the fixed-point + * fractional ns format that the adapter uses. + */ + if (ptp->capabilities & (1 << MC_CMD_PTP_OUT_GET_ATTRIBUTES_FP44_FREQ_ADJ_LBN)) + ptp->adjfreq_ppb_shift = PPB_SHIFT_FP44; + else + ptp->adjfreq_ppb_shift = PPB_SHIFT_FP40; + + return 0; +} + +/* Get PTP timestamp corrections */ +static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_TIMESTAMP_CORRECTIONS_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN); + int rc; + size_t out_len; + + /* Get the timestamp corrections from the NIC. If this operation is + * not supported (older NICs) then no correction is required. + */ + MCDI_SET_DWORD(inbuf, PTP_IN_OP, + MC_CMD_PTP_OP_GET_TIMESTAMP_CORRECTIONS); + MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &out_len); + if (rc == 0) { + efx->ptp_data->ts_corrections.ptp_tx = MCDI_DWORD(outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT); + efx->ptp_data->ts_corrections.ptp_rx = MCDI_DWORD(outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_RECEIVE); + efx->ptp_data->ts_corrections.pps_out = MCDI_DWORD(outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_OUT); + efx->ptp_data->ts_corrections.pps_in = MCDI_DWORD(outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_IN); + + if (out_len >= MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN) { + efx->ptp_data->ts_corrections.general_tx = MCDI_DWORD( + outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_TX); + efx->ptp_data->ts_corrections.general_rx = MCDI_DWORD( + outbuf, + PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_RX); + } else { + efx->ptp_data->ts_corrections.general_tx = + efx->ptp_data->ts_corrections.ptp_tx; + efx->ptp_data->ts_corrections.general_rx = + efx->ptp_data->ts_corrections.ptp_rx; + } + } else if (rc == -EINVAL) { + efx->ptp_data->ts_corrections.ptp_tx = 0; + efx->ptp_data->ts_corrections.ptp_rx = 0; + efx->ptp_data->ts_corrections.pps_out = 0; + efx->ptp_data->ts_corrections.pps_in = 0; + efx->ptp_data->ts_corrections.general_tx = 0; + efx->ptp_data->ts_corrections.general_rx = 0; + } else { + efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), outbuf, + sizeof(outbuf), rc); + return rc; + } + + return 0; +} + +/* Enable MCDI PTP support. */ +static int efx_ptp_enable(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_ENABLE_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + int rc; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ENABLE); + MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); + MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_QUEUE, + efx->ptp_data->channel ? + efx->ptp_data->channel->channel : 0); + MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_MODE, efx->ptp_data->mode); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + rc = (rc == -EALREADY) ? 0 : rc; + if (rc) + efx_mcdi_display_error(efx, MC_CMD_PTP, + MC_CMD_PTP_IN_ENABLE_LEN, + outbuf, sizeof(outbuf), rc); + return rc; +} + +/* Disable MCDI PTP support. + * + * Note that this function should never rely on the presence of ptp_data - + * may be called before that exists. + */ +static int efx_ptp_disable(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_DISABLE_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + int rc; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_DISABLE); + MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + rc = (rc == -EALREADY) ? 0 : rc; + /* If we get ENOSYS, the NIC doesn't support PTP, and thus this function + * should only have been called during probe. + */ + if (rc == -ENOSYS || rc == -EPERM) + pci_info(efx->pci_dev, "no PTP support\n"); + else if (rc) + efx_mcdi_display_error(efx, MC_CMD_PTP, + MC_CMD_PTP_IN_DISABLE_LEN, + outbuf, sizeof(outbuf), rc); + return rc; +} + +static void efx_ptp_deliver_rx_queue(struct sk_buff_head *q) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(q))) { + local_bh_disable(); + netif_receive_skb(skb); + local_bh_enable(); + } +} + +static void efx_ptp_handle_no_channel(struct efx_nic *efx) +{ + netif_err(efx, drv, efx->net_dev, + "ERROR: PTP requires MSI-X and 1 additional interrupt" + "vector. PTP disabled\n"); +} + +/* Repeatedly send the host time to the MC which will capture the hardware + * time. + */ +static void efx_ptp_send_times(struct efx_nic *efx, + struct pps_event_time *last_time) +{ + struct pps_event_time now; + struct timespec64 limit; + struct efx_ptp_data *ptp = efx->ptp_data; + int *mc_running = ptp->start.addr; + + pps_get_ts(&now); + limit = now.ts_real; + timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS); + + /* Write host time for specified period or until MC is done */ + while ((timespec64_compare(&now.ts_real, &limit) < 0) && + READ_ONCE(*mc_running)) { + struct timespec64 update_time; + unsigned int host_time; + + /* Don't update continuously to avoid saturating the PCIe bus */ + update_time = now.ts_real; + timespec64_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS); + do { + pps_get_ts(&now); + } while ((timespec64_compare(&now.ts_real, &update_time) < 0) && + READ_ONCE(*mc_running)); + + /* Synchronise NIC with single word of time only */ + host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS | + now.ts_real.tv_nsec); + /* Update host time in NIC memory */ + efx->type->ptp_write_host_time(efx, host_time); + } + *last_time = now; +} + +/* Read a timeset from the MC's results and partial process. */ +static void efx_ptp_read_timeset(MCDI_DECLARE_STRUCT_PTR(data), + struct efx_ptp_timeset *timeset) +{ + unsigned start_ns, end_ns; + + timeset->host_start = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTSTART); + timeset->major = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MAJOR); + timeset->minor = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_MINOR); + timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND), + timeset->wait = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_WAITNS); + + /* Ignore seconds */ + start_ns = timeset->host_start & MC_NANOSECOND_MASK; + end_ns = timeset->host_end & MC_NANOSECOND_MASK; + /* Allow for rollover */ + if (end_ns < start_ns) + end_ns += NSEC_PER_SEC; + /* Determine duration of operation */ + timeset->window = end_ns - start_ns; +} + +/* Process times received from MC. + * + * Extract times from returned results, and establish the minimum value + * seen. The minimum value represents the "best" possible time and events + * too much greater than this are rejected - the machine is, perhaps, too + * busy. A number of readings are taken so that, hopefully, at least one good + * synchronisation will be seen in the results. + */ +static int +efx_ptp_process_times(struct efx_nic *efx, MCDI_DECLARE_STRUCT_PTR(synch_buf), + size_t response_length, + const struct pps_event_time *last_time) +{ + unsigned number_readings = + MCDI_VAR_ARRAY_LEN(response_length, + PTP_OUT_SYNCHRONIZE_TIMESET); + unsigned i; + unsigned ngood = 0; + unsigned last_good = 0; + struct efx_ptp_data *ptp = efx->ptp_data; + u32 last_sec; + u32 start_sec; + struct timespec64 delta; + ktime_t mc_time; + + if (number_readings == 0) + return -EAGAIN; + + /* Read the set of results and find the last good host-MC + * synchronization result. The MC times when it finishes reading the + * host time so the corrected window time should be fairly constant + * for a given platform. Increment stats for any results that appear + * to be erroneous. + */ + for (i = 0; i < number_readings; i++) { + s32 window, corrected; + struct timespec64 wait; + + efx_ptp_read_timeset( + MCDI_ARRAY_STRUCT_PTR(synch_buf, + PTP_OUT_SYNCHRONIZE_TIMESET, i), + &ptp->timeset[i]); + + wait = ktime_to_timespec64( + ptp->nic_to_kernel_time(0, ptp->timeset[i].wait, 0)); + window = ptp->timeset[i].window; + corrected = window - wait.tv_nsec; + + /* We expect the uncorrected synchronization window to be at + * least as large as the interval between host start and end + * times. If it is smaller than this then this is mostly likely + * to be a consequence of the host's time being adjusted. + * Check that the corrected sync window is in a reasonable + * range. If it is out of range it is likely to be because an + * interrupt or other delay occurred between reading the system + * time and writing it to MC memory. + */ + if (window < SYNCHRONISATION_GRANULARITY_NS) { + ++ptp->invalid_sync_windows; + } else if (corrected >= MAX_SYNCHRONISATION_NS) { + ++ptp->oversize_sync_windows; + } else if (corrected < ptp->min_synchronisation_ns) { + ++ptp->undersize_sync_windows; + } else { + ngood++; + last_good = i; + } + } + + if (ngood == 0) { + netif_warn(efx, drv, efx->net_dev, + "PTP no suitable synchronisations\n"); + return -EAGAIN; + } + + /* Calculate delay from last good sync (host time) to last_time. + * It is possible that the seconds rolled over between taking + * the start reading and the last value written by the host. The + * timescales are such that a gap of more than one second is never + * expected. delta is *not* normalised. + */ + start_sec = ptp->timeset[last_good].host_start >> MC_NANOSECOND_BITS; + last_sec = last_time->ts_real.tv_sec & MC_SECOND_MASK; + if (start_sec != last_sec && + ((start_sec + 1) & MC_SECOND_MASK) != last_sec) { + netif_warn(efx, hw, efx->net_dev, + "PTP bad synchronisation seconds\n"); + return -EAGAIN; + } + delta.tv_sec = (last_sec - start_sec) & 1; + delta.tv_nsec = + last_time->ts_real.tv_nsec - + (ptp->timeset[last_good].host_start & MC_NANOSECOND_MASK); + + /* Convert the NIC time at last good sync into kernel time. + * No correction is required - this time is the output of a + * firmware process. + */ + mc_time = ptp->nic_to_kernel_time(ptp->timeset[last_good].major, + ptp->timeset[last_good].minor, 0); + + /* Calculate delay from NIC top of second to last_time */ + delta.tv_nsec += ktime_to_timespec64(mc_time).tv_nsec; + + /* Set PPS timestamp to match NIC top of second */ + ptp->host_time_pps = *last_time; + pps_sub_ts(&ptp->host_time_pps, delta); + + return 0; +} + +/* Synchronize times between the host and the MC */ +static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + MCDI_DECLARE_BUF(synch_buf, MC_CMD_PTP_OUT_SYNCHRONIZE_LENMAX); + size_t response_length; + int rc; + unsigned long timeout; + struct pps_event_time last_time = {}; + unsigned int loops = 0; + int *start = ptp->start.addr; + + MCDI_SET_DWORD(synch_buf, PTP_IN_OP, MC_CMD_PTP_OP_SYNCHRONIZE); + MCDI_SET_DWORD(synch_buf, PTP_IN_PERIPH_ID, 0); + MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_NUMTIMESETS, + num_readings); + MCDI_SET_QWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR, + ptp->start.dma_addr); + + /* Clear flag that signals MC ready */ + WRITE_ONCE(*start, 0); + rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf, + MC_CMD_PTP_IN_SYNCHRONIZE_LEN); + EFX_WARN_ON_ONCE_PARANOID(rc); + + /* Wait for start from MCDI (or timeout) */ + timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS); + while (!READ_ONCE(*start) && (time_before(jiffies, timeout))) { + udelay(20); /* Usually start MCDI execution quickly */ + loops++; + } + + if (loops <= 1) + ++ptp->fast_syncs; + if (!time_before(jiffies, timeout)) + ++ptp->sync_timeouts; + + if (READ_ONCE(*start)) + efx_ptp_send_times(efx, &last_time); + + /* Collect results */ + rc = efx_mcdi_rpc_finish(efx, MC_CMD_PTP, + MC_CMD_PTP_IN_SYNCHRONIZE_LEN, + synch_buf, sizeof(synch_buf), + &response_length); + if (rc == 0) { + rc = efx_ptp_process_times(efx, synch_buf, response_length, + &last_time); + if (rc == 0) + ++ptp->good_syncs; + else + ++ptp->no_time_syncs; + } + + /* Increment the bad syncs counter if the synchronize fails, whatever + * the reason. + */ + if (rc != 0) + ++ptp->bad_syncs; + + return rc; +} + +/* Transmit a PTP packet via the dedicated hardware timestamped queue. */ +static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) +{ + struct efx_ptp_data *ptp_data = efx->ptp_data; + u8 type = efx_tx_csum_type_skb(skb); + struct efx_tx_queue *tx_queue; + + tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); + if (tx_queue && tx_queue->timestamping) { + efx_enqueue_skb(tx_queue, skb); + } else { + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); + dev_kfree_skb_any(skb); + } +} + +/* Transmit a PTP packet, via the MCDI interface, to the wire. */ +static void efx_ptp_xmit_skb_mc(struct efx_nic *efx, struct sk_buff *skb) +{ + struct efx_ptp_data *ptp_data = efx->ptp_data; + struct skb_shared_hwtstamps timestamps; + int rc = -EIO; + MCDI_DECLARE_BUF(txtime, MC_CMD_PTP_OUT_TRANSMIT_LEN); + size_t len; + + MCDI_SET_DWORD(ptp_data->txbuf, PTP_IN_OP, MC_CMD_PTP_OP_TRANSMIT); + MCDI_SET_DWORD(ptp_data->txbuf, PTP_IN_PERIPH_ID, 0); + MCDI_SET_DWORD(ptp_data->txbuf, PTP_IN_TRANSMIT_LENGTH, skb->len); + if (skb_shinfo(skb)->nr_frags != 0) { + rc = skb_linearize(skb); + if (rc != 0) + goto fail; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + rc = skb_checksum_help(skb); + if (rc != 0) + goto fail; + } + skb_copy_from_linear_data(skb, + MCDI_PTR(ptp_data->txbuf, + PTP_IN_TRANSMIT_PACKET), + skb->len); + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, + ptp_data->txbuf, MC_CMD_PTP_IN_TRANSMIT_LEN(skb->len), + txtime, sizeof(txtime), &len); + if (rc != 0) + goto fail; + + memset(×tamps, 0, sizeof(timestamps)); + timestamps.hwtstamp = ptp_data->nic_to_kernel_time( + MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MAJOR), + MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MINOR), + ptp_data->ts_corrections.ptp_tx); + + skb_tstamp_tx(skb, ×tamps); + + rc = 0; + +fail: + dev_kfree_skb_any(skb); + + return; +} + +static void efx_ptp_drop_time_expired_events(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct list_head *cursor; + struct list_head *next; + + if (ptp->rx_ts_inline) + return; + + /* Drop time-expired events */ + spin_lock_bh(&ptp->evt_lock); + list_for_each_safe(cursor, next, &ptp->evt_list) { + struct efx_ptp_event_rx *evt; + + evt = list_entry(cursor, struct efx_ptp_event_rx, + link); + if (time_after(jiffies, evt->expiry)) { + list_move(&evt->link, &ptp->evt_free_list); + netif_warn(efx, hw, efx->net_dev, + "PTP rx event dropped\n"); + } + } + spin_unlock_bh(&ptp->evt_lock); +} + +static enum ptp_packet_state efx_ptp_match_rx(struct efx_nic *efx, + struct sk_buff *skb) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + bool evts_waiting; + struct list_head *cursor; + struct list_head *next; + struct efx_ptp_match *match; + enum ptp_packet_state rc = PTP_PACKET_STATE_UNMATCHED; + + WARN_ON_ONCE(ptp->rx_ts_inline); + + spin_lock_bh(&ptp->evt_lock); + evts_waiting = !list_empty(&ptp->evt_list); + spin_unlock_bh(&ptp->evt_lock); + + if (!evts_waiting) + return PTP_PACKET_STATE_UNMATCHED; + + match = (struct efx_ptp_match *)skb->cb; + /* Look for a matching timestamp in the event queue */ + spin_lock_bh(&ptp->evt_lock); + list_for_each_safe(cursor, next, &ptp->evt_list) { + struct efx_ptp_event_rx *evt; + + evt = list_entry(cursor, struct efx_ptp_event_rx, link); + if ((evt->seq0 == match->words[0]) && + (evt->seq1 == match->words[1])) { + struct skb_shared_hwtstamps *timestamps; + + /* Match - add in hardware timestamp */ + timestamps = skb_hwtstamps(skb); + timestamps->hwtstamp = evt->hwtimestamp; + + match->state = PTP_PACKET_STATE_MATCHED; + rc = PTP_PACKET_STATE_MATCHED; + list_move(&evt->link, &ptp->evt_free_list); + break; + } + } + spin_unlock_bh(&ptp->evt_lock); + + return rc; +} + +/* Process any queued receive events and corresponding packets + * + * q is returned with all the packets that are ready for delivery. + */ +static void efx_ptp_process_events(struct efx_nic *efx, struct sk_buff_head *q) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&ptp->rxq))) { + struct efx_ptp_match *match; + + match = (struct efx_ptp_match *)skb->cb; + if (match->state == PTP_PACKET_STATE_MATCH_UNWANTED) { + __skb_queue_tail(q, skb); + } else if (efx_ptp_match_rx(efx, skb) == + PTP_PACKET_STATE_MATCHED) { + __skb_queue_tail(q, skb); + } else if (time_after(jiffies, match->expiry)) { + match->state = PTP_PACKET_STATE_TIMED_OUT; + ++ptp->rx_no_timestamp; + __skb_queue_tail(q, skb); + } else { + /* Replace unprocessed entry and stop */ + skb_queue_head(&ptp->rxq, skb); + break; + } + } +} + +/* Complete processing of a received packet */ +static inline void efx_ptp_process_rx(struct efx_nic *efx, struct sk_buff *skb) +{ + local_bh_disable(); + netif_receive_skb(skb); + local_bh_enable(); +} + +static void efx_ptp_remove_multicast_filters(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + + if (ptp->rxfilter_installed) { + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_general); + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_event); + ptp->rxfilter_installed = false; + } +} + +static int efx_ptp_insert_multicast_filters(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct efx_filter_spec rxfilter; + int rc; + + if (!ptp->channel || ptp->rxfilter_installed) + return 0; + + /* Must filter on both event and general ports to ensure + * that there is no packet re-ordering. + */ + efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0, + efx_rx_queue_index( + efx_channel_get_rx_queue(ptp->channel))); + rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP, + htonl(PTP_ADDRESS), + htons(PTP_EVENT_PORT)); + if (rc != 0) + return rc; + + rc = efx_filter_insert_filter(efx, &rxfilter, true); + if (rc < 0) + return rc; + ptp->rxfilter_event = rc; + + efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0, + efx_rx_queue_index( + efx_channel_get_rx_queue(ptp->channel))); + rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP, + htonl(PTP_ADDRESS), + htons(PTP_GENERAL_PORT)); + if (rc != 0) + goto fail; + + rc = efx_filter_insert_filter(efx, &rxfilter, true); + if (rc < 0) + goto fail; + ptp->rxfilter_general = rc; + + ptp->rxfilter_installed = true; + return 0; + +fail: + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_event); + return rc; +} + +static int efx_ptp_start(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + int rc; + + ptp->reset_required = false; + + rc = efx_ptp_insert_multicast_filters(efx); + if (rc) + return rc; + + rc = efx_ptp_enable(efx); + if (rc != 0) + goto fail; + + ptp->evt_frag_idx = 0; + ptp->current_adjfreq = 0; + + return 0; + +fail: + efx_ptp_remove_multicast_filters(efx); + return rc; +} + +static int efx_ptp_stop(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct list_head *cursor; + struct list_head *next; + int rc; + + if (ptp == NULL) + return 0; + + rc = efx_ptp_disable(efx); + + efx_ptp_remove_multicast_filters(efx); + + /* Make sure RX packets are really delivered */ + efx_ptp_deliver_rx_queue(&efx->ptp_data->rxq); + skb_queue_purge(&efx->ptp_data->txq); + + /* Drop any pending receive events */ + spin_lock_bh(&efx->ptp_data->evt_lock); + list_for_each_safe(cursor, next, &efx->ptp_data->evt_list) { + list_move(cursor, &efx->ptp_data->evt_free_list); + } + spin_unlock_bh(&efx->ptp_data->evt_lock); + + return rc; +} + +static int efx_ptp_restart(struct efx_nic *efx) +{ + if (efx->ptp_data && efx->ptp_data->enabled) + return efx_ptp_start(efx); + return 0; +} + +static void efx_ptp_pps_worker(struct work_struct *work) +{ + struct efx_ptp_data *ptp = + container_of(work, struct efx_ptp_data, pps_work); + struct efx_nic *efx = ptp->efx; + struct ptp_clock_event ptp_evt; + + if (efx_ptp_synchronize(efx, PTP_SYNC_ATTEMPTS)) + return; + + ptp_evt.type = PTP_CLOCK_PPSUSR; + ptp_evt.pps_times = ptp->host_time_pps; + ptp_clock_event(ptp->phc_clock, &ptp_evt); +} + +static void efx_ptp_worker(struct work_struct *work) +{ + struct efx_ptp_data *ptp_data = + container_of(work, struct efx_ptp_data, work); + struct efx_nic *efx = ptp_data->efx; + struct sk_buff *skb; + struct sk_buff_head tempq; + + if (ptp_data->reset_required) { + efx_ptp_stop(efx); + efx_ptp_start(efx); + return; + } + + efx_ptp_drop_time_expired_events(efx); + + __skb_queue_head_init(&tempq); + efx_ptp_process_events(efx, &tempq); + + while ((skb = skb_dequeue(&ptp_data->txq))) + ptp_data->xmit_skb(efx, skb); + + while ((skb = __skb_dequeue(&tempq))) + efx_ptp_process_rx(efx, skb); +} + +static const struct ptp_clock_info efx_phc_clock_info = { + .owner = THIS_MODULE, + .name = "sfc", + .max_adj = MAX_PPB, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 1, + .adjfreq = efx_phc_adjfreq, + .adjtime = efx_phc_adjtime, + .gettime64 = efx_phc_gettime, + .settime64 = efx_phc_settime, + .enable = efx_phc_enable, +}; + +/* Initialise PTP state. */ +int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) +{ + struct efx_ptp_data *ptp; + int rc = 0; + unsigned int pos; + + ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL); + efx->ptp_data = ptp; + if (!efx->ptp_data) + return -ENOMEM; + + ptp->efx = efx; + ptp->channel = channel; + ptp->rx_ts_inline = efx_nic_rev(efx) >= EFX_REV_HUNT_A0; + + rc = efx_nic_alloc_buffer(efx, &ptp->start, sizeof(int), GFP_KERNEL); + if (rc != 0) + goto fail1; + + skb_queue_head_init(&ptp->rxq); + skb_queue_head_init(&ptp->txq); + ptp->workwq = create_singlethread_workqueue("sfc_ptp"); + if (!ptp->workwq) { + rc = -ENOMEM; + goto fail2; + } + + if (efx_ptp_use_mac_tx_timestamps(efx)) { + ptp->xmit_skb = efx_ptp_xmit_skb_queue; + /* Request sync events on this channel. */ + channel->sync_events_state = SYNC_EVENTS_QUIESCENT; + } else { + ptp->xmit_skb = efx_ptp_xmit_skb_mc; + } + + INIT_WORK(&ptp->work, efx_ptp_worker); + ptp->config.flags = 0; + ptp->config.tx_type = HWTSTAMP_TX_OFF; + ptp->config.rx_filter = HWTSTAMP_FILTER_NONE; + INIT_LIST_HEAD(&ptp->evt_list); + INIT_LIST_HEAD(&ptp->evt_free_list); + spin_lock_init(&ptp->evt_lock); + for (pos = 0; pos < MAX_RECEIVE_EVENTS; pos++) + list_add(&ptp->rx_evts[pos].link, &ptp->evt_free_list); + + /* Get the NIC PTP attributes and set up time conversions */ + rc = efx_ptp_get_attributes(efx); + if (rc < 0) + goto fail3; + + /* Get the timestamp corrections */ + rc = efx_ptp_get_timestamp_corrections(efx); + if (rc < 0) + goto fail3; + + if (efx->mcdi->fn_flags & + (1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_PRIMARY)) { + ptp->phc_clock_info = efx_phc_clock_info; + ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info, + &efx->pci_dev->dev); + if (IS_ERR(ptp->phc_clock)) { + rc = PTR_ERR(ptp->phc_clock); + goto fail3; + } else if (ptp->phc_clock) { + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } + } + } + ptp->nic_ts_enabled = false; + + return 0; +fail4: + ptp_clock_unregister(efx->ptp_data->phc_clock); + +fail3: + destroy_workqueue(efx->ptp_data->workwq); + +fail2: + efx_nic_free_buffer(efx, &ptp->start); + +fail1: + kfree(efx->ptp_data); + efx->ptp_data = NULL; + + return rc; +} + +/* Initialise PTP channel. + * + * Setting core_index to zero causes the queue to be initialised and doesn't + * overlap with 'rxq0' because ptp.c doesn't use skb_record_rx_queue. + */ +static int efx_ptp_probe_channel(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + int rc; + + channel->irq_moderation_us = 0; + channel->rx_queue.core_index = 0; + + rc = efx_ptp_probe(efx, channel); + /* Failure to probe PTP is not fatal; this channel will just not be + * used for anything. + * In the case of EPERM, efx_ptp_probe will print its own message (in + * efx_ptp_get_attributes()), so we don't need to. + */ + if (rc && rc != -EPERM) + netif_warn(efx, drv, efx->net_dev, + "Failed to probe PTP, rc=%d\n", rc); + return 0; +} + +void efx_ptp_remove(struct efx_nic *efx) +{ + if (!efx->ptp_data) + return; + + (void)efx_ptp_disable(efx); + + cancel_work_sync(&efx->ptp_data->work); + if (efx->ptp_data->pps_workwq) + cancel_work_sync(&efx->ptp_data->pps_work); + + skb_queue_purge(&efx->ptp_data->rxq); + skb_queue_purge(&efx->ptp_data->txq); + + if (efx->ptp_data->phc_clock) { + destroy_workqueue(efx->ptp_data->pps_workwq); + ptp_clock_unregister(efx->ptp_data->phc_clock); + } + + destroy_workqueue(efx->ptp_data->workwq); + + efx_nic_free_buffer(efx, &efx->ptp_data->start); + kfree(efx->ptp_data); + efx->ptp_data = NULL; +} + +static void efx_ptp_remove_channel(struct efx_channel *channel) +{ + efx_ptp_remove(channel->efx); +} + +static void efx_ptp_get_channel_name(struct efx_channel *channel, + char *buf, size_t len) +{ + snprintf(buf, len, "%s-ptp", channel->efx->name); +} + +/* Determine whether this packet should be processed by the PTP module + * or transmitted conventionally. + */ +bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + return efx->ptp_data && + efx->ptp_data->enabled && + skb->len >= PTP_MIN_LENGTH && + skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM && + likely(skb->protocol == htons(ETH_P_IP)) && + skb_transport_header_was_set(skb) && + skb_network_header_len(skb) >= sizeof(struct iphdr) && + ip_hdr(skb)->protocol == IPPROTO_UDP && + skb_headlen(skb) >= + skb_transport_offset(skb) + sizeof(struct udphdr) && + udp_hdr(skb)->dest == htons(PTP_EVENT_PORT); +} + +/* Receive a PTP packet. Packets are queued until the arrival of + * the receive timestamp from the MC - this will probably occur after the + * packet arrival because of the processing in the MC. + */ +static bool efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) +{ + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb; + u8 *match_data_012, *match_data_345; + unsigned int version; + u8 *data; + + match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); + + /* Correct version? */ + if (ptp->mode == MC_CMD_PTP_MODE_V1) { + if (!pskb_may_pull(skb, PTP_V1_MIN_LENGTH)) { + return false; + } + data = skb->data; + version = ntohs(*(__be16 *)&data[PTP_V1_VERSION_OFFSET]); + if (version != PTP_VERSION_V1) { + return false; + } + + /* PTP V1 uses all six bytes of the UUID to match the packet + * to the timestamp + */ + match_data_012 = data + PTP_V1_UUID_OFFSET; + match_data_345 = data + PTP_V1_UUID_OFFSET + 3; + } else { + if (!pskb_may_pull(skb, PTP_V2_MIN_LENGTH)) { + return false; + } + data = skb->data; + version = data[PTP_V2_VERSION_OFFSET]; + if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) { + return false; + } + + /* The original V2 implementation uses bytes 2-7 of + * the UUID to match the packet to the timestamp. This + * discards two of the bytes of the MAC address used + * to create the UUID (SF bug 33070). The PTP V2 + * enhanced mode fixes this issue and uses bytes 0-2 + * and byte 5-7 of the UUID. + */ + match_data_345 = data + PTP_V2_UUID_OFFSET + 5; + if (ptp->mode == MC_CMD_PTP_MODE_V2) { + match_data_012 = data + PTP_V2_UUID_OFFSET + 2; + } else { + match_data_012 = data + PTP_V2_UUID_OFFSET + 0; + BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2_ENHANCED); + } + } + + /* Does this packet require timestamping? */ + if (ntohs(*(__be16 *)&data[PTP_DPORT_OFFSET]) == PTP_EVENT_PORT) { + match->state = PTP_PACKET_STATE_UNMATCHED; + + /* We expect the sequence number to be in the same position in + * the packet for PTP V1 and V2 + */ + BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET); + BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH); + + /* Extract UUID/Sequence information */ + match->words[0] = (match_data_012[0] | + (match_data_012[1] << 8) | + (match_data_012[2] << 16) | + (match_data_345[0] << 24)); + match->words[1] = (match_data_345[1] | + (match_data_345[2] << 8) | + (data[PTP_V1_SEQUENCE_OFFSET + + PTP_V1_SEQUENCE_LENGTH - 1] << + 16)); + } else { + match->state = PTP_PACKET_STATE_MATCH_UNWANTED; + } + + skb_queue_tail(&ptp->rxq, skb); + queue_work(ptp->workwq, &ptp->work); + + return true; +} + +/* Transmit a PTP packet. This has to be transmitted by the MC + * itself, through an MCDI call. MCDI calls aren't permitted + * in the transmit path so defer the actual transmission to a suitable worker. + */ +int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + + skb_queue_tail(&ptp->txq, skb); + + if ((udp_hdr(skb)->dest == htons(PTP_EVENT_PORT)) && + (skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM)) + efx_xmit_hwtstamp_pending(skb); + queue_work(ptp->workwq, &ptp->work); + + return NETDEV_TX_OK; +} + +int efx_ptp_get_mode(struct efx_nic *efx) +{ + return efx->ptp_data->mode; +} + +int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, + unsigned int new_mode) +{ + if ((enable_wanted != efx->ptp_data->enabled) || + (enable_wanted && (efx->ptp_data->mode != new_mode))) { + int rc = 0; + + if (enable_wanted) { + /* Change of mode requires disable */ + if (efx->ptp_data->enabled && + (efx->ptp_data->mode != new_mode)) { + efx->ptp_data->enabled = false; + rc = efx_ptp_stop(efx); + if (rc != 0) + return rc; + } + + /* Set new operating mode and establish + * baseline synchronisation, which must + * succeed. + */ + efx->ptp_data->mode = new_mode; + if (netif_running(efx->net_dev)) + rc = efx_ptp_start(efx); + if (rc == 0) { + rc = efx_ptp_synchronize(efx, + PTP_SYNC_ATTEMPTS * 2); + if (rc != 0) + efx_ptp_stop(efx); + } + } else { + rc = efx_ptp_stop(efx); + } + + if (rc != 0) + return rc; + + efx->ptp_data->enabled = enable_wanted; + } + + return 0; +} + +static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) +{ + int rc; + + if ((init->tx_type != HWTSTAMP_TX_OFF) && + (init->tx_type != HWTSTAMP_TX_ON)) + return -ERANGE; + + rc = efx->type->ptp_set_ts_config(efx, init); + if (rc) + return rc; + + efx->ptp_data->config = *init; + return 0; +} + +void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct efx_nic *primary = efx->primary; + + ASSERT_RTNL(); + + if (!ptp) + return; + + ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE); + /* Check licensed features. If we don't have the license for TX + * timestamps, the NIC will not support them. + */ + if (efx_ptp_use_mac_tx_timestamps(efx)) { + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + if (!(nic_data->licensed_features & + (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN))) + ts_info->so_timestamping &= + ~SOF_TIMESTAMPING_TX_HARDWARE; + } + if (primary && primary->ptp_data && primary->ptp_data->phc_clock) + ts_info->phc_index = + ptp_clock_index(primary->ptp_data->phc_clock); + ts_info->tx_types = 1 << HWTSTAMP_TX_OFF | 1 << HWTSTAMP_TX_ON; + ts_info->rx_filters = ptp->efx->type->hwtstamp_filters; +} + +int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int rc; + + /* Not a PTP enabled port */ + if (!efx->ptp_data) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + rc = efx_ptp_ts_init(efx, &config); + if (rc != 0) + return rc; + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) + ? -EFAULT : 0; +} + +int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr) +{ + if (!efx->ptp_data) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, &efx->ptp_data->config, + sizeof(efx->ptp_data->config)) ? -EFAULT : 0; +} + +static void ptp_event_failure(struct efx_nic *efx, int expected_frag_len) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + + netif_err(efx, hw, efx->net_dev, + "PTP unexpected event length: got %d expected %d\n", + ptp->evt_frag_idx, expected_frag_len); + ptp->reset_required = true; + queue_work(ptp->workwq, &ptp->work); +} + +/* Process a completed receive event. Put it on the event queue and + * start worker thread. This is required because event and their + * correspoding packets may come in either order. + */ +static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + struct efx_ptp_event_rx *evt = NULL; + + if (WARN_ON_ONCE(ptp->rx_ts_inline)) + return; + + if (ptp->evt_frag_idx != 3) { + ptp_event_failure(efx, 3); + return; + } + + spin_lock_bh(&ptp->evt_lock); + if (!list_empty(&ptp->evt_free_list)) { + evt = list_first_entry(&ptp->evt_free_list, + struct efx_ptp_event_rx, link); + list_del(&evt->link); + + evt->seq0 = EFX_QWORD_FIELD(ptp->evt_frags[2], MCDI_EVENT_DATA); + evt->seq1 = (EFX_QWORD_FIELD(ptp->evt_frags[2], + MCDI_EVENT_SRC) | + (EFX_QWORD_FIELD(ptp->evt_frags[1], + MCDI_EVENT_SRC) << 8) | + (EFX_QWORD_FIELD(ptp->evt_frags[0], + MCDI_EVENT_SRC) << 16)); + evt->hwtimestamp = efx->ptp_data->nic_to_kernel_time( + EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA), + EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA), + ptp->ts_corrections.ptp_rx); + evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); + list_add_tail(&evt->link, &ptp->evt_list); + + queue_work(ptp->workwq, &ptp->work); + } else if (net_ratelimit()) { + /* Log a rate-limited warning message. */ + netif_err(efx, rx_err, efx->net_dev, "PTP event queue overflow\n"); + } + spin_unlock_bh(&ptp->evt_lock); +} + +static void ptp_event_fault(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + int code = EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA); + if (ptp->evt_frag_idx != 1) { + ptp_event_failure(efx, 1); + return; + } + + netif_err(efx, hw, efx->net_dev, "PTP error %d\n", code); +} + +static void ptp_event_pps(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + if (ptp->nic_ts_enabled) + queue_work(ptp->pps_workwq, &ptp->pps_work); +} + +void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE); + + if (!ptp) { + if (!efx->ptp_warned) { + netif_warn(efx, drv, efx->net_dev, + "Received PTP event but PTP not set up\n"); + efx->ptp_warned = true; + } + return; + } + + if (!ptp->enabled) + return; + + if (ptp->evt_frag_idx == 0) { + ptp->evt_code = code; + } else if (ptp->evt_code != code) { + netif_err(efx, hw, efx->net_dev, + "PTP out of sequence event %d\n", code); + ptp->evt_frag_idx = 0; + } + + ptp->evt_frags[ptp->evt_frag_idx++] = *ev; + if (!MCDI_EVENT_FIELD(*ev, CONT)) { + /* Process resulting event */ + switch (code) { + case MCDI_EVENT_CODE_PTP_RX: + ptp_event_rx(efx, ptp); + break; + case MCDI_EVENT_CODE_PTP_FAULT: + ptp_event_fault(efx, ptp); + break; + case MCDI_EVENT_CODE_PTP_PPS: + ptp_event_pps(efx, ptp); + break; + default: + netif_err(efx, hw, efx->net_dev, + "PTP unknown event %d\n", code); + break; + } + ptp->evt_frag_idx = 0; + } else if (MAX_EVENT_FRAGS == ptp->evt_frag_idx) { + netif_err(efx, hw, efx->net_dev, + "PTP too many event fragments\n"); + ptp->evt_frag_idx = 0; + } +} + +void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev) +{ + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + + /* When extracting the sync timestamp minor value, we should discard + * the least significant two bits. These are not required in order + * to reconstruct full-range timestamps and they are optionally used + * to report status depending on the options supplied when subscribing + * for sync events. + */ + channel->sync_timestamp_major = MCDI_EVENT_FIELD(*ev, PTP_TIME_MAJOR); + channel->sync_timestamp_minor = + (MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_MS_8BITS) & 0xFC) + << ptp->nic_time.sync_event_minor_shift; + + /* if sync events have been disabled then we want to silently ignore + * this event, so throw away result. + */ + (void) cmpxchg(&channel->sync_events_state, SYNC_EVENTS_REQUESTED, + SYNC_EVENTS_VALID); +} + +static inline u32 efx_rx_buf_timestamp_minor(struct efx_nic *efx, const u8 *eh) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_ts_offset)); +#else + const u8 *data = eh + efx->rx_packet_ts_offset; + return (u32)data[0] | + (u32)data[1] << 8 | + (u32)data[2] << 16 | + (u32)data[3] << 24; +#endif +} + +void __efx_rx_skb_attach_timestamp(struct efx_channel *channel, + struct sk_buff *skb) +{ + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + u32 pkt_timestamp_major, pkt_timestamp_minor; + u32 diff, carry; + struct skb_shared_hwtstamps *timestamps; + + if (channel->sync_events_state != SYNC_EVENTS_VALID) + return; + + pkt_timestamp_minor = efx_rx_buf_timestamp_minor(efx, skb_mac_header(skb)); + + /* get the difference between the packet and sync timestamps, + * modulo one second + */ + diff = pkt_timestamp_minor - channel->sync_timestamp_minor; + if (pkt_timestamp_minor < channel->sync_timestamp_minor) + diff += ptp->nic_time.minor_max; + + /* do we roll over a second boundary and need to carry the one? */ + carry = (channel->sync_timestamp_minor >= ptp->nic_time.minor_max - diff) ? + 1 : 0; + + if (diff <= ptp->nic_time.sync_event_diff_max) { + /* packet is ahead of the sync event by a quarter of a second or + * less (allowing for fuzz) + */ + pkt_timestamp_major = channel->sync_timestamp_major + carry; + } else if (diff >= ptp->nic_time.sync_event_diff_min) { + /* packet is behind the sync event but within the fuzz factor. + * This means the RX packet and sync event crossed as they were + * placed on the event queue, which can sometimes happen. + */ + pkt_timestamp_major = channel->sync_timestamp_major - 1 + carry; + } else { + /* it's outside tolerance in both directions. this might be + * indicative of us missing sync events for some reason, so + * we'll call it an error rather than risk giving a bogus + * timestamp. + */ + netif_vdbg(efx, drv, efx->net_dev, + "packet timestamp %x too far from sync event %x:%x\n", + pkt_timestamp_minor, channel->sync_timestamp_major, + channel->sync_timestamp_minor); + return; + } + + /* attach the timestamps to the skb */ + timestamps = skb_hwtstamps(skb); + timestamps->hwtstamp = + ptp->nic_to_kernel_time(pkt_timestamp_major, + pkt_timestamp_minor, + ptp->ts_corrections.general_rx); +} + +static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->efx; + MCDI_DECLARE_BUF(inadj, MC_CMD_PTP_IN_ADJUST_LEN); + s64 adjustment_ns; + int rc; + + if (delta > MAX_PPB) + delta = MAX_PPB; + else if (delta < -MAX_PPB) + delta = -MAX_PPB; + + /* Convert ppb to fixed point ns taking care to round correctly. */ + adjustment_ns = ((s64)delta * PPB_SCALE_WORD + + (1 << (ptp_data->adjfreq_ppb_shift - 1))) >> + ptp_data->adjfreq_ppb_shift; + + MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); + MCDI_SET_DWORD(inadj, PTP_IN_PERIPH_ID, 0); + MCDI_SET_QWORD(inadj, PTP_IN_ADJUST_FREQ, adjustment_ns); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_SECONDS, 0); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_NANOSECONDS, 0); + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inadj, sizeof(inadj), + NULL, 0, NULL); + if (rc != 0) + return rc; + + ptp_data->current_adjfreq = adjustment_ns; + return 0; +} + +static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + u32 nic_major, nic_minor; + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->efx; + MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_ADJUST_LEN); + + efx->ptp_data->ns_to_nic_time(delta, &nic_major, &nic_minor); + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); + MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); + MCDI_SET_QWORD(inbuf, PTP_IN_ADJUST_FREQ, ptp_data->current_adjfreq); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_MAJOR, nic_major); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_MINOR, nic_minor); + return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->efx; + MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_READ_NIC_TIME_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_READ_NIC_TIME_LEN); + int rc; + ktime_t kt; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_READ_NIC_TIME); + MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + if (rc != 0) + return rc; + + kt = ptp_data->nic_to_kernel_time( + MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_MAJOR), + MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_MINOR), 0); + *ts = ktime_to_timespec64(kt); + return 0; +} + +static int efx_phc_settime(struct ptp_clock_info *ptp, + const struct timespec64 *e_ts) +{ + /* Get the current NIC time, efx_phc_gettime. + * Subtract from the desired time to get the offset + * call efx_phc_adjtime with the offset + */ + int rc; + struct timespec64 time_now; + struct timespec64 delta; + + rc = efx_phc_gettime(ptp, &time_now); + if (rc != 0) + return rc; + + delta = timespec64_sub(*e_ts, time_now); + + rc = efx_phc_adjtime(ptp, timespec64_to_ns(&delta)); + if (rc != 0) + return rc; + + return 0; +} + +static int efx_phc_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, + int enable) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + if (request->type != PTP_CLK_REQ_PPS) + return -EOPNOTSUPP; + + ptp_data->nic_ts_enabled = !!enable; + return 0; +} + +static const struct efx_channel_type efx_ptp_channel_type = { + .handle_no_channel = efx_ptp_handle_no_channel, + .pre_probe = efx_ptp_probe_channel, + .post_remove = efx_ptp_remove_channel, + .get_name = efx_ptp_get_channel_name, + /* no copy operation; there is no need to reallocate this channel */ + .receive_skb = efx_ptp_rx, + .want_txqs = efx_ptp_want_txqs, + .keep_eventq = false, +}; + +void efx_ptp_defer_probe_with_channel(struct efx_nic *efx) +{ + /* Check whether PTP is implemented on this NIC. The DISABLE + * operation will succeed if and only if it is implemented. + */ + if (efx_ptp_disable(efx) == 0) + efx->extra_channel_type[EFX_EXTRA_CHANNEL_PTP] = + &efx_ptp_channel_type; +} + +void efx_ptp_start_datapath(struct efx_nic *efx) +{ + if (efx_ptp_restart(efx)) + netif_err(efx, drv, efx->net_dev, "Failed to restart PTP.\n"); + /* re-enable timestamping if it was previously enabled */ + if (efx->type->ptp_set_ts_sync_events) + efx->type->ptp_set_ts_sync_events(efx, true, true); +} + +void efx_ptp_stop_datapath(struct efx_nic *efx) +{ + /* temporarily disable timestamping */ + if (efx->type->ptp_set_ts_sync_events) + efx->type->ptp_set_ts_sync_events(efx, false, true); + efx_ptp_stop(efx); +} diff --git a/drivers/net/ethernet/sfc/siena/ptp.h b/drivers/net/ethernet/sfc/siena/ptp.h new file mode 100644 index 000000000000..9855e8c9e544 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/ptp.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2013 Solarflare Communications Inc. + * Copyright 2019-2020 Xilinx Inc. + */ + +#ifndef EFX_PTP_H +#define EFX_PTP_H + +#include <linux/net_tstamp.h> +#include "net_driver.h" + +struct ethtool_ts_info; +int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); +void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); +struct efx_channel *efx_ptp_channel(struct efx_nic *efx); +void efx_ptp_remove(struct efx_nic *efx); +int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr); +int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr); +void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info); +bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); +int efx_ptp_get_mode(struct efx_nic *efx); +int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, + unsigned int new_mode); +int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); +void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev); +size_t efx_ptp_describe_stats(struct efx_nic *efx, u8 *strings); +size_t efx_ptp_update_stats(struct efx_nic *efx, u64 *stats); +void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev); +void __efx_rx_skb_attach_timestamp(struct efx_channel *channel, + struct sk_buff *skb); +static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel, + struct sk_buff *skb) +{ + if (channel->sync_events_state == SYNC_EVENTS_VALID) + __efx_rx_skb_attach_timestamp(channel, skb); +} +void efx_ptp_start_datapath(struct efx_nic *efx); +void efx_ptp_stop_datapath(struct efx_nic *efx); +bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx); +ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue); + +#endif /* EFX_PTP_H */ diff --git a/drivers/net/ethernet/sfc/siena/rx.c b/drivers/net/ethernet/sfc/siena/rx.c new file mode 100644 index 000000000000..2375cef577e4 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/rx.c @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2013 Solarflare Communications Inc. + */ + +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/slab.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/prefetch.h> +#include <linux/moduleparam.h> +#include <linux/iommu.h> +#include <net/ip.h> +#include <net/checksum.h> +#include <net/xdp.h> +#include <linux/bpf_trace.h> +#include "net_driver.h" +#include "efx.h" +#include "rx_common.h" +#include "filter.h" +#include "nic.h" +#include "selftest.h" +#include "workarounds.h" + +/* Preferred number of descriptors to fill at once */ +#define EFX_RX_PREFERRED_BATCH 8U + +/* Maximum rx prefix used by any architecture. */ +#define EFX_MAX_RX_PREFIX_SIZE 16 + +/* Size of buffer allocated for skb header area. */ +#define EFX_SKB_HEADERS 128u + +/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ +#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ + EFX_RX_USR_BUF_SIZE) + +static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + int len) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; + + if (likely(len <= max_len)) + return; + + /* The packet must be discarded, but this is only a fatal error + * if the caller indicated it was + */ + rx_buf->flags |= EFX_RX_PKT_DISCARD; + + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "RX queue %d overlength RX event (%#x > %#x)\n", + efx_rx_queue_index(rx_queue), len, max_len); + + efx_rx_queue_channel(rx_queue)->n_rx_overlength++; +} + +/* Allocate and construct an SKB around page fragments */ +static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags, + u8 *eh, int hdr_len) +{ + struct efx_nic *efx = channel->efx; + struct sk_buff *skb; + + /* Allocate an SKB to store the headers */ + skb = netdev_alloc_skb(efx->net_dev, + efx->rx_ip_align + efx->rx_prefix_size + + hdr_len); + if (unlikely(skb == NULL)) { + atomic_inc(&efx->n_rx_noskb_drops); + return NULL; + } + + EFX_WARN_ON_ONCE_PARANOID(rx_buf->len < hdr_len); + + memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size, + efx->rx_prefix_size + hdr_len); + skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size); + __skb_put(skb, hdr_len); + + /* Append the remaining page(s) onto the frag list */ + if (rx_buf->len > hdr_len) { + rx_buf->page_offset += hdr_len; + rx_buf->len -= hdr_len; + + for (;;) { + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_buf->page, rx_buf->page_offset, + rx_buf->len, efx->rx_buffer_truesize); + rx_buf->page = NULL; + + if (skb_shinfo(skb)->nr_frags == n_frags) + break; + + rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); + } + } else { + __free_pages(rx_buf->page, efx->rx_buffer_order); + rx_buf->page = NULL; + n_frags = 0; + } + + /* Move past the ethernet header */ + skb->protocol = eth_type_trans(skb, efx->net_dev); + + skb_mark_napi_id(skb, &channel->napi_str); + + return skb; +} + +void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, + unsigned int n_frags, unsigned int len, u16 flags) +{ + struct efx_nic *efx = rx_queue->efx; + struct efx_channel *channel = efx_rx_queue_channel(rx_queue); + struct efx_rx_buffer *rx_buf; + + rx_queue->rx_packets++; + + rx_buf = efx_rx_buffer(rx_queue, index); + rx_buf->flags |= flags; + + /* Validate the number of fragments and completed length */ + if (n_frags == 1) { + if (!(flags & EFX_RX_PKT_PREFIX_LEN)) + efx_rx_packet__check_len(rx_queue, rx_buf, len); + } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || + unlikely(len <= (n_frags - 1) * efx->rx_dma_len) || + unlikely(len > n_frags * efx->rx_dma_len) || + unlikely(!efx->rx_scatter)) { + /* If this isn't an explicit discard request, either + * the hardware or the driver is broken. + */ + WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD)); + rx_buf->flags |= EFX_RX_PKT_DISCARD; + } + + netif_vdbg(efx, rx_status, efx->net_dev, + "RX queue %d received ids %x-%x len %d %s%s\n", + efx_rx_queue_index(rx_queue), index, + (index + n_frags - 1) & rx_queue->ptr_mask, len, + (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", + (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); + + /* Discard packet, if instructed to do so. Process the + * previous receive first. + */ + if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { + efx_rx_flush_packet(channel); + efx_discard_rx_packet(channel, rx_buf, n_frags); + return; + } + + if (n_frags == 1 && !(flags & EFX_RX_PKT_PREFIX_LEN)) + rx_buf->len = len; + + /* Release and/or sync the DMA mapping - assumes all RX buffers + * consumed in-order per RX queue. + */ + efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); + + /* Prefetch nice and early so data will (hopefully) be in cache by + * the time we look at it. + */ + prefetch(efx_rx_buf_va(rx_buf)); + + rx_buf->page_offset += efx->rx_prefix_size; + rx_buf->len -= efx->rx_prefix_size; + + if (n_frags > 1) { + /* Release/sync DMA mapping for additional fragments. + * Fix length for last fragment. + */ + unsigned int tail_frags = n_frags - 1; + + for (;;) { + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + if (--tail_frags == 0) + break; + efx_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len); + } + rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len; + efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); + } + + /* All fragments have been DMA-synced, so recycle pages. */ + rx_buf = efx_rx_buffer(rx_queue, index); + efx_recycle_rx_pages(channel, rx_buf, n_frags); + + /* Pipeline receives so that we give time for packet headers to be + * prefetched into cache. + */ + efx_rx_flush_packet(channel); + channel->rx_pkt_n_frags = n_frags; + channel->rx_pkt_index = index; +} + +static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct sk_buff *skb; + u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); + + skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); + if (unlikely(skb == NULL)) { + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); + return; + } + skb_record_rx_queue(skb, channel->rx_queue.core_index); + + /* Set the SKB flags */ + skb_checksum_none_assert(skb); + if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); + } + + efx_rx_skb_attach_timestamp(channel, skb); + + if (channel->type->receive_skb) + if (channel->type->receive_skb(channel, skb)) + return; + + /* Pass the packet up */ + if (channel->rx_list != NULL) + /* Add to list, will pass up later */ + list_add_tail(&skb->list, channel->rx_list); + else + /* No list, so pass it up now */ + netif_receive_skb(skb); +} + +/** efx_do_xdp: perform XDP processing on a received packet + * + * Returns true if packet should still be delivered. + */ +static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, u8 **ehp) +{ + u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE]; + struct efx_rx_queue *rx_queue; + struct bpf_prog *xdp_prog; + struct xdp_frame *xdpf; + struct xdp_buff xdp; + u32 xdp_act; + s16 offset; + int err; + + xdp_prog = rcu_dereference_bh(efx->xdp_prog); + if (!xdp_prog) + return true; + + rx_queue = efx_channel_get_rx_queue(channel); + + if (unlikely(channel->rx_pkt_n_frags > 1)) { + /* We can't do XDP on fragmented packets - drop. */ + efx_free_rx_buffers(rx_queue, rx_buf, + channel->rx_pkt_n_frags); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP is not possible with multiple receive fragments (%d)\n", + channel->rx_pkt_n_frags); + channel->n_rx_xdp_bad_drops++; + return false; + } + + dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, + rx_buf->len, DMA_FROM_DEVICE); + + /* Save the rx prefix. */ + EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE); + memcpy(rx_prefix, *ehp - efx->rx_prefix_size, + efx->rx_prefix_size); + + xdp_init_buff(&xdp, efx->rx_page_buf_step, &rx_queue->xdp_rxq_info); + /* No support yet for XDP metadata */ + xdp_prepare_buff(&xdp, *ehp - EFX_XDP_HEADROOM, EFX_XDP_HEADROOM, + rx_buf->len, false); + + xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); + + offset = (u8 *)xdp.data - *ehp; + + switch (xdp_act) { + case XDP_PASS: + /* Fix up rx prefix. */ + if (offset) { + *ehp += offset; + rx_buf->page_offset += offset; + rx_buf->len -= offset; + memcpy(*ehp - efx->rx_prefix_size, rx_prefix, + efx->rx_prefix_size); + } + break; + + case XDP_TX: + /* Buffer ownership passes to tx on success. */ + xdpf = xdp_convert_buff_to_frame(&xdp); + err = efx_xdp_tx_buffers(efx, 1, &xdpf, true); + if (unlikely(err != 1)) { + efx_free_rx_buffers(rx_queue, rx_buf, 1); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP TX failed (%d)\n", err); + channel->n_rx_xdp_bad_drops++; + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + } else { + channel->n_rx_xdp_tx++; + } + break; + + case XDP_REDIRECT: + err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog); + if (unlikely(err)) { + efx_free_rx_buffers(rx_queue, rx_buf, 1); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP redirect failed (%d)\n", err); + channel->n_rx_xdp_bad_drops++; + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + } else { + channel->n_rx_xdp_redirect++; + } + break; + + default: + bpf_warn_invalid_xdp_action(efx->net_dev, xdp_prog, xdp_act); + efx_free_rx_buffers(rx_queue, rx_buf, 1); + channel->n_rx_xdp_bad_drops++; + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + break; + + case XDP_ABORTED: + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + fallthrough; + case XDP_DROP: + efx_free_rx_buffers(rx_queue, rx_buf, 1); + channel->n_rx_xdp_drops++; + break; + } + + return xdp_act == XDP_PASS; +} + +/* Handle a received packet. Second half: Touches packet payload. */ +void __efx_rx_packet(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + struct efx_rx_buffer *rx_buf = + efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); + u8 *eh = efx_rx_buf_va(rx_buf); + + /* Read length from the prefix if necessary. This already + * excludes the length of the prefix itself. + */ + if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) + rx_buf->len = le16_to_cpup((__le16 *) + (eh + efx->rx_packet_len_offset)); + + /* If we're in loopback test, then pass the packet directly to the + * loopback layer, and free the rx_buf here + */ + if (unlikely(efx->loopback_selftest)) { + struct efx_rx_queue *rx_queue; + + efx_loopback_rx_packet(efx, eh, rx_buf->len); + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, + channel->rx_pkt_n_frags); + goto out; + } + + if (!efx_do_xdp(efx, channel, rx_buf, &eh)) + goto out; + + if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) + rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; + + if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb) + efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh, 0); + else + efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); +out: + channel->rx_pkt_n_frags = 0; +} diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c new file mode 100644 index 000000000000..fa8b9aacca11 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/rx_common.c @@ -0,0 +1,1086 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include <linux/module.h> +#include <linux/iommu.h> +#include "efx.h" +#include "nic.h" +#include "rx_common.h" + +/* This is the percentage fill level below which new RX descriptors + * will be added to the RX descriptor ring. + */ +static unsigned int rx_refill_threshold; +module_param(rx_refill_threshold, uint, 0444); +MODULE_PARM_DESC(rx_refill_threshold, + "RX descriptor ring refill threshold (%)"); + +/* RX maximum head room required. + * + * This must be at least 1 to prevent overflow, plus one packet-worth + * to allow pipelined receives. + */ +#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) + +/* Check the RX page recycle ring for a page that can be reused. */ +static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + struct efx_rx_page_state *state; + unsigned int index; + struct page *page; + + if (unlikely(!rx_queue->page_ring)) + return NULL; + index = rx_queue->page_remove & rx_queue->page_ptr_mask; + page = rx_queue->page_ring[index]; + if (page == NULL) + return NULL; + + rx_queue->page_ring[index] = NULL; + /* page_remove cannot exceed page_add. */ + if (rx_queue->page_remove != rx_queue->page_add) + ++rx_queue->page_remove; + + /* If page_count is 1 then we hold the only reference to this page. */ + if (page_count(page) == 1) { + ++rx_queue->page_recycle_count; + return page; + } else { + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + ++rx_queue->page_recycle_failed; + } + + return NULL; +} + +/* Attempt to recycle the page if there is an RX recycle ring; the page can + * only be added if this is the final RX buffer, to prevent pages being used in + * the descriptor ring and appearing in the recycle ring simultaneously. + */ +static void efx_recycle_rx_page(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + struct efx_nic *efx = rx_queue->efx; + struct page *page = rx_buf->page; + unsigned int index; + + /* Only recycle the page after processing the final buffer. */ + if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) + return; + + index = rx_queue->page_add & rx_queue->page_ptr_mask; + if (rx_queue->page_ring[index] == NULL) { + unsigned int read_index = rx_queue->page_remove & + rx_queue->page_ptr_mask; + + /* The next slot in the recycle ring is available, but + * increment page_remove if the read pointer currently + * points here. + */ + if (read_index == index) + ++rx_queue->page_remove; + rx_queue->page_ring[index] = page; + ++rx_queue->page_add; + return; + } + ++rx_queue->page_recycle_full; + efx_unmap_rx_buffer(efx, rx_buf); + put_page(rx_buf->page); +} + +/* Recycle the pages that are used by buffers that have just been received. */ +void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + if (unlikely(!rx_queue->page_ring)) + return; + + do { + efx_recycle_rx_page(channel, rx_buf); + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--n_frags); +} + +void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + efx_recycle_rx_pages(channel, rx_buf, n_frags); + + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); +} + +static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) +{ + unsigned int bufs_in_recycle_ring, page_ring_size; + struct efx_nic *efx = rx_queue->efx; + + bufs_in_recycle_ring = efx_rx_recycle_ring_size(efx); + page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / + efx->rx_bufs_per_page); + rx_queue->page_ring = kcalloc(page_ring_size, + sizeof(*rx_queue->page_ring), GFP_KERNEL); + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; +} + +static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + int i; + + if (unlikely(!rx_queue->page_ring)) + return; + + /* Unmap and release the pages in the recycle ring. Remove the ring. */ + for (i = 0; i <= rx_queue->page_ptr_mask; i++) { + struct page *page = rx_queue->page_ring[i]; + struct efx_rx_page_state *state; + + if (page == NULL) + continue; + + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + } + kfree(rx_queue->page_ring); + rx_queue->page_ring = NULL; +} + +static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf) +{ + /* Release the page reference we hold for the buffer. */ + if (rx_buf->page) + put_page(rx_buf->page); + + /* If this is the last buffer in a page, unmap and free it. */ + if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { + efx_unmap_rx_buffer(rx_queue->efx, rx_buf); + efx_free_rx_buffers(rx_queue, rx_buf, 1); + } + rx_buf->page = NULL; +} + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int entries; + int rc; + + /* Create the smallest power-of-two aligned ring */ + entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); + rx_queue->ptr_mask = entries - 1; + + netif_dbg(efx, probe, efx->net_dev, + "creating RX queue %d size %#x mask %#x\n", + efx_rx_queue_index(rx_queue), efx->rxq_entries, + rx_queue->ptr_mask); + + /* Allocate RX buffers */ + rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer), + GFP_KERNEL); + if (!rx_queue->buffer) + return -ENOMEM; + + rc = efx_nic_probe_rx(rx_queue); + if (rc) { + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; + } + + return rc; +} + +void efx_init_rx_queue(struct efx_rx_queue *rx_queue) +{ + unsigned int max_fill, trigger, max_trigger; + struct efx_nic *efx = rx_queue->efx; + int rc = 0; + + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); + + /* Initialise ptr fields */ + rx_queue->added_count = 0; + rx_queue->notified_count = 0; + rx_queue->removed_count = 0; + rx_queue->min_fill = -1U; + efx_init_rx_recycle_ring(rx_queue); + + rx_queue->page_remove = 0; + rx_queue->page_add = rx_queue->page_ptr_mask + 1; + rx_queue->page_recycle_count = 0; + rx_queue->page_recycle_failed = 0; + rx_queue->page_recycle_full = 0; + + /* Initialise limit fields */ + max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; + max_trigger = + max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; + if (rx_refill_threshold != 0) { + trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; + if (trigger > max_trigger) + trigger = max_trigger; + } else { + trigger = max_trigger; + } + + rx_queue->max_fill = max_fill; + rx_queue->fast_fill_trigger = trigger; + rx_queue->refill_enabled = true; + + /* Initialise XDP queue information */ + rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev, + rx_queue->core_index, 0); + + if (rc) { + netif_err(efx, rx_err, efx->net_dev, + "Failure to initialise XDP queue information rc=%d\n", + rc); + efx->xdp_rxq_info_failed = true; + } else { + rx_queue->xdp_rxq_info_valid = true; + } + + /* Set up RX descriptor ring */ + efx_nic_init_rx(rx_queue); +} + +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_rx_buffer *rx_buf; + int i; + + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); + + del_timer_sync(&rx_queue->slow_fill); + + /* Release RX buffers from the current read ptr to the write ptr */ + if (rx_queue->buffer) { + for (i = rx_queue->removed_count; i < rx_queue->added_count; + i++) { + unsigned int index = i & rx_queue->ptr_mask; + + rx_buf = efx_rx_buffer(rx_queue, index); + efx_fini_rx_buffer(rx_queue, rx_buf); + } + } + + efx_fini_rx_recycle_ring(rx_queue); + + if (rx_queue->xdp_rxq_info_valid) + xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info); + + rx_queue->xdp_rxq_info_valid = false; +} + +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) +{ + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "destroying RX queue %d\n", efx_rx_queue_index(rx_queue)); + + efx_nic_remove_rx(rx_queue); + + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; +} + +/* Unmap a DMA-mapped page. This function is only called for the final RX + * buffer in a page. + */ +void efx_unmap_rx_buffer(struct efx_nic *efx, + struct efx_rx_buffer *rx_buf) +{ + struct page *page = rx_buf->page; + + if (page) { + struct efx_rx_page_state *state = page_address(page); + + dma_unmap_page(&efx->pci_dev->dev, + state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + } +} + +void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs) +{ + do { + if (rx_buf->page) { + put_page(rx_buf->page); + rx_buf->page = NULL; + } + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--num_bufs); +} + +void efx_rx_slow_fill(struct timer_list *t) +{ + struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); + + /* Post an event to cause NAPI to run and refill the queue */ + efx_nic_generate_fill_event(rx_queue); + ++rx_queue->slow_fill_count; +} + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) +{ + mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); +} + +/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers + * + * @rx_queue: Efx RX queue + * + * This allocates a batch of pages, maps them for DMA, and populates + * struct efx_rx_buffers for each one. Return a negative error code or + * 0 on success. If a single page can be used for multiple buffers, + * then the page will either be inserted fully, or not at all. + */ +static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) +{ + unsigned int page_offset, index, count; + struct efx_nic *efx = rx_queue->efx; + struct efx_rx_page_state *state; + struct efx_rx_buffer *rx_buf; + dma_addr_t dma_addr; + struct page *page; + + count = 0; + do { + page = efx_reuse_page(rx_queue); + if (page == NULL) { + page = alloc_pages(__GFP_COMP | + (atomic ? GFP_ATOMIC : GFP_KERNEL), + efx->rx_buffer_order); + if (unlikely(page == NULL)) + return -ENOMEM; + dma_addr = + dma_map_page(&efx->pci_dev->dev, page, 0, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&efx->pci_dev->dev, + dma_addr))) { + __free_pages(page, efx->rx_buffer_order); + return -EIO; + } + state = page_address(page); + state->dma_addr = dma_addr; + } else { + state = page_address(page); + dma_addr = state->dma_addr; + } + + dma_addr += sizeof(struct efx_rx_page_state); + page_offset = sizeof(struct efx_rx_page_state); + + do { + index = rx_queue->added_count & rx_queue->ptr_mask; + rx_buf = efx_rx_buffer(rx_queue, index); + rx_buf->dma_addr = dma_addr + efx->rx_ip_align + + EFX_XDP_HEADROOM; + rx_buf->page = page; + rx_buf->page_offset = page_offset + efx->rx_ip_align + + EFX_XDP_HEADROOM; + rx_buf->len = efx->rx_dma_len; + rx_buf->flags = 0; + ++rx_queue->added_count; + get_page(page); + dma_addr += efx->rx_page_buf_step; + page_offset += efx->rx_page_buf_step; + } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); + + rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; + } while (++count < efx->rx_pages_per_batch); + + return 0; +} + +void efx_rx_config_page_split(struct efx_nic *efx) +{ + efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM, + EFX_RX_BUF_ALIGNMENT); + efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : + ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / + efx->rx_page_buf_step); + efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / + efx->rx_bufs_per_page; + efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, + efx->rx_bufs_per_page); +} + +/* efx_fast_push_rx_descriptors - push new RX descriptors quickly + * @rx_queue: RX descriptor queue + * + * This will aim to fill the RX descriptor queue up to + * @rx_queue->@max_fill. If there is insufficient atomic + * memory to do so, a slow fill will be scheduled. + * + * The caller must provide serialisation (none is used here). In practise, + * this means this function must run from the NAPI handler, or be called + * when NAPI is disabled. + */ +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int fill_level, batch_size; + int space, rc = 0; + + if (!rx_queue->refill_enabled) + return; + + /* Calculate current fill level, and exit if we don't need to fill */ + fill_level = (rx_queue->added_count - rx_queue->removed_count); + EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); + if (fill_level >= rx_queue->fast_fill_trigger) + goto out; + + /* Record minimum fill level */ + if (unlikely(fill_level < rx_queue->min_fill)) { + if (fill_level) + rx_queue->min_fill = fill_level; + } + + batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; + space = rx_queue->max_fill - fill_level; + EFX_WARN_ON_ONCE_PARANOID(space < batch_size); + + netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, + "RX queue %d fast-filling descriptor ring from" + " level %d to level %d\n", + efx_rx_queue_index(rx_queue), fill_level, + rx_queue->max_fill); + + do { + rc = efx_init_rx_buffers(rx_queue, atomic); + if (unlikely(rc)) { + /* Ensure that we don't leave the rx queue empty */ + efx_schedule_slow_fill(rx_queue); + goto out; + } + } while ((space -= batch_size) >= batch_size); + + netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, + "RX queue %d fast-filled descriptor ring " + "to level %d\n", efx_rx_queue_index(rx_queue), + rx_queue->added_count - rx_queue->removed_count); + + out: + if (rx_queue->notified_count != rx_queue->added_count) + efx_nic_notify_rx_desc(rx_queue); +} + +/* Pass a received packet up through GRO. GRO can handle pages + * regardless of checksum state and skbs with a good checksum. + */ +void +efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, + unsigned int n_frags, u8 *eh, __wsum csum) +{ + struct napi_struct *napi = &channel->napi_str; + struct efx_nic *efx = channel->efx; + struct sk_buff *skb; + + skb = napi_get_frags(napi); + if (unlikely(!skb)) { + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); + return; + } + + if (efx->net_dev->features & NETIF_F_RXHASH && + efx_rx_buf_hash_valid(efx, eh)) + skb_set_hash(skb, efx_rx_buf_hash(efx, eh), + PKT_HASH_TYPE_L3); + if (csum) { + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + } else { + skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE); + } + skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); + + for (;;) { + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buf->page, rx_buf->page_offset, + rx_buf->len); + rx_buf->page = NULL; + skb->len += rx_buf->len; + if (skb_shinfo(skb)->nr_frags == n_frags) + break; + + rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); + } + + skb->data_len = skb->len; + skb->truesize += n_frags * efx->rx_buffer_truesize; + + skb_record_rx_queue(skb, channel->rx_queue.core_index); + + napi_gro_frags(napi); +} + +/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because + * (a) this is an infrequent control-plane operation and (b) n is small (max 64) + */ +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) +{ + struct list_head *head = &efx->rss_context.list; + struct efx_rss_context *ctx, *new; + u32 id = 1; /* Don't use zero, that refers to the master RSS context */ + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + /* Search for first gap in the numbering */ + list_for_each_entry(ctx, head, list) { + if (ctx->user_id != id) + break; + id++; + /* Check for wrap. If this happens, we have nearly 2^32 + * allocated RSS contexts, which seems unlikely. + */ + if (WARN_ON_ONCE(!id)) + return NULL; + } + + /* Create the new entry */ + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + new->rx_hash_udp_4tuple = false; + + /* Insert the new entry into the gap */ + new->user_id = id; + list_add_tail(&new->list, &ctx->list); + return new; +} + +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) +{ + struct list_head *head = &efx->rss_context.list; + struct efx_rss_context *ctx; + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + list_for_each_entry(ctx, head, list) + if (ctx->user_id == id) + return ctx; + return NULL; +} + +void efx_free_rss_context_entry(struct efx_rss_context *ctx) +{ + list_del(&ctx->list); + kfree(ctx); +} + +void efx_set_default_rx_indir_table(struct efx_nic *efx, + struct efx_rss_context *ctx) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) + ctx->rx_indir_table[i] = + ethtool_rxfh_indir_default(i, efx->rss_spread); +} + +/** + * efx_filter_is_mc_recipient - test whether spec is a multicast recipient + * @spec: Specification to test + * + * Return: %true if the specification is a non-drop RX filter that + * matches a local MAC address I/G bit value of 1 or matches a local + * IPv4 or IPv6 address value in the respective multicast address + * range. Otherwise %false. + */ +bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) +{ + if (!(spec->flags & EFX_FILTER_FLAG_RX) || + spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) + return false; + + if (spec->match_flags & + (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && + is_multicast_ether_addr(spec->loc_mac)) + return true; + + if ((spec->match_flags & + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { + if (spec->ether_type == htons(ETH_P_IP) && + ipv4_is_multicast(spec->loc_host[0])) + return true; + if (spec->ether_type == htons(ETH_P_IPV6) && + ((const u8 *)spec->loc_host)[0] == 0xff) + return true; + } + + return false; +} + +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right) +{ + if ((left->match_flags ^ right->match_flags) | + ((left->flags ^ right->flags) & + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) + return false; + + return memcmp(&left->outer_vid, &right->outer_vid, + sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) == 0; +} + +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) +{ + BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); + return jhash2((const u32 *)&spec->outer_vid, + (sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) / 4, + 0); +} + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force) +{ + if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { + /* ARFS is currently updating this entry, leave it */ + return false; + } + if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { + /* ARFS tried and failed to update this, so it's probably out + * of date. Remove the filter and the ARFS rule entry. + */ + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; + *force = true; + return true; + } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ + /* ARFS has moved on, so old filter is not needed. Since we did + * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will + * not be removed by efx_rps_hash_del() subsequently. + */ + *force = true; + return true; + } + /* Remove it iff ARFS wants to. */ + return true; +} + +static +struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + u32 hash = efx_filter_spec_hash(spec); + + lockdep_assert_held(&efx->rps_hash_lock); + if (!efx->rps_hash_table) + return NULL; + return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; +} + +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) + return rule; + } + return NULL; +} + +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + *new = false; + return rule; + } + } + rule = kmalloc(sizeof(*rule), GFP_ATOMIC); + *new = true; + if (rule) { + memcpy(&rule->spec, spec, sizeof(rule->spec)); + hlist_add_head(&rule->node, head); + } + return rule; +} + +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (WARN_ON(!head)) + return; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + /* Someone already reused the entry. We know that if + * this check doesn't fire (i.e. filter_id == REMOVING) + * then the REMOVING mark was put there by our caller, + * because caller is holding a lock on filter table and + * only holders of that lock set REMOVING. + */ + if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) + return; + hlist_del(node); + kfree(rule); + return; + } + } + /* We didn't find it. */ + WARN_ON(1); +} +#endif + +int efx_probe_filters(struct efx_nic *efx) +{ + int rc; + + mutex_lock(&efx->mac_lock); + down_write(&efx->filter_sem); + rc = efx->type->filter_table_probe(efx); + if (rc) + goto out_unlock; + +#ifdef CONFIG_RFS_ACCEL + if (efx->type->offload_features & NETIF_F_NTUPLE) { + struct efx_channel *channel; + int i, success = 1; + + efx_for_each_channel(channel, efx) { + channel->rps_flow_id = + kcalloc(efx->type->max_rx_ip_filters, + sizeof(*channel->rps_flow_id), + GFP_KERNEL); + if (!channel->rps_flow_id) + success = 0; + else + for (i = 0; + i < efx->type->max_rx_ip_filters; + ++i) + channel->rps_flow_id[i] = + RPS_FLOW_ID_INVALID; + channel->rfs_expire_index = 0; + channel->rfs_filter_count = 0; + } + + if (!success) { + efx_for_each_channel(channel, efx) + kfree(channel->rps_flow_id); + efx->type->filter_table_remove(efx); + rc = -ENOMEM; + goto out_unlock; + } + } +#endif +out_unlock: + up_write(&efx->filter_sem); + mutex_unlock(&efx->mac_lock); + return rc; +} + +void efx_remove_filters(struct efx_nic *efx) +{ +#ifdef CONFIG_RFS_ACCEL + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + cancel_delayed_work_sync(&channel->filter_work); + kfree(channel->rps_flow_id); + channel->rps_flow_id = NULL; + } +#endif + down_write(&efx->filter_sem); + efx->type->filter_table_remove(efx); + up_write(&efx->filter_sem); +} + +#ifdef CONFIG_RFS_ACCEL + +static void efx_filter_rfs_work(struct work_struct *data) +{ + struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion, + work); + struct efx_nic *efx = netdev_priv(req->net_dev); + struct efx_channel *channel = efx_get_channel(efx, req->rxq_index); + int slot_idx = req - efx->rps_slot; + struct efx_arfs_rule *rule; + u16 arfs_id = 0; + int rc; + + rc = efx->type->filter_insert(efx, &req->spec, true); + if (rc >= 0) + /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */ + rc %= efx->type->max_rx_ip_filters; + if (efx->rps_hash_table) { + spin_lock_bh(&efx->rps_hash_lock); + rule = efx_rps_hash_find(efx, &req->spec); + /* The rule might have already gone, if someone else's request + * for the same spec was already worked and then expired before + * we got around to our work. In that case we have nothing + * tying us to an arfs_id, meaning that as soon as the filter + * is considered for expiry it will be removed. + */ + if (rule) { + if (rc < 0) + rule->filter_id = EFX_ARFS_FILTER_ID_ERROR; + else + rule->filter_id = rc; + arfs_id = rule->arfs_id; + } + spin_unlock_bh(&efx->rps_hash_lock); + } + if (rc >= 0) { + /* Remember this so we can check whether to expire the filter + * later. + */ + mutex_lock(&efx->rps_mutex); + if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID) + channel->rfs_filter_count++; + channel->rps_flow_id[rc] = req->flow_id; + mutex_unlock(&efx->rps_mutex); + + if (req->spec.ether_type == htons(ETH_P_IP)) + netif_info(efx, rx_status, efx->net_dev, + "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + else + netif_info(efx, rx_status, efx->net_dev, + "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_succeeded++; + } else { + if (req->spec.ether_type == htons(ETH_P_IP)) + netif_dbg(efx, rx_status, efx->net_dev, + "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + else + netif_dbg(efx, rx_status, efx->net_dev, + "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_failed++; + /* We're overloading the NIC's filter tables, so let's do a + * chunk of extra expiry work. + */ + __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, + 100u)); + } + + /* Release references */ + clear_bit(slot_idx, &efx->rps_slot_map); + dev_put(req->net_dev); +} + +int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct efx_nic *efx = netdev_priv(net_dev); + struct efx_async_filter_insertion *req; + struct efx_arfs_rule *rule; + struct flow_keys fk; + int slot_idx; + bool new; + int rc; + + /* find a free slot */ + for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++) + if (!test_and_set_bit(slot_idx, &efx->rps_slot_map)) + break; + if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT) + return -EBUSY; + + if (flow_id == RPS_FLOW_ID_INVALID) { + rc = -EINVAL; + goto out_clear; + } + + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) { + rc = -EPROTONOSUPPORT; + goto out_clear; + } + + if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) { + rc = -EPROTONOSUPPORT; + goto out_clear; + } + if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) { + rc = -EPROTONOSUPPORT; + goto out_clear; + } + + req = efx->rps_slot + slot_idx; + efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT, + efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, + rxq_index); + req->spec.match_flags = + EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO | + EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT | + EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT; + req->spec.ether_type = fk.basic.n_proto; + req->spec.ip_proto = fk.basic.ip_proto; + + if (fk.basic.n_proto == htons(ETH_P_IP)) { + req->spec.rem_host[0] = fk.addrs.v4addrs.src; + req->spec.loc_host[0] = fk.addrs.v4addrs.dst; + } else { + memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src, + sizeof(struct in6_addr)); + memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst, + sizeof(struct in6_addr)); + } + + req->spec.rem_port = fk.ports.src; + req->spec.loc_port = fk.ports.dst; + + if (efx->rps_hash_table) { + /* Add it to ARFS hash table */ + spin_lock(&efx->rps_hash_lock); + rule = efx_rps_hash_add(efx, &req->spec, &new); + if (!rule) { + rc = -ENOMEM; + goto out_unlock; + } + if (new) + rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER; + rc = rule->arfs_id; + /* Skip if existing or pending filter already does the right thing */ + if (!new && rule->rxq_index == rxq_index && + rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING) + goto out_unlock; + rule->rxq_index = rxq_index; + rule->filter_id = EFX_ARFS_FILTER_ID_PENDING; + spin_unlock(&efx->rps_hash_lock); + } else { + /* Without an ARFS hash table, we just use arfs_id 0 for all + * filters. This means if multiple flows hash to the same + * flow_id, all but the most recently touched will be eligible + * for expiry. + */ + rc = 0; + } + + /* Queue the request */ + dev_hold(req->net_dev = net_dev); + INIT_WORK(&req->work, efx_filter_rfs_work); + req->rxq_index = rxq_index; + req->flow_id = flow_id; + schedule_work(&req->work); + return rc; +out_unlock: + spin_unlock(&efx->rps_hash_lock); +out_clear: + clear_bit(slot_idx, &efx->rps_slot_map); + return rc; +} + +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) +{ + bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); + struct efx_nic *efx = channel->efx; + unsigned int index, size, start; + u32 flow_id; + + if (!mutex_trylock(&efx->rps_mutex)) + return false; + expire_one = efx->type->filter_rfs_expire_one; + index = channel->rfs_expire_index; + start = index; + size = efx->type->max_rx_ip_filters; + while (quota) { + flow_id = channel->rps_flow_id[index]; + + if (flow_id != RPS_FLOW_ID_INVALID) { + quota--; + if (expire_one(efx, flow_id, index)) { + netif_info(efx, rx_status, efx->net_dev, + "expired filter %d [channel %u flow %u]\n", + index, channel->channel, flow_id); + channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + channel->rfs_filter_count--; + } + } + if (++index == size) + index = 0; + /* If we were called with a quota that exceeds the total number + * of filters in the table (which shouldn't happen, but could + * if two callers race), ensure that we don't loop forever - + * stop when we've examined every row of the table. + */ + if (index == start) + break; + } + + channel->rfs_expire_index = index; + mutex_unlock(&efx->rps_mutex); + return true; +} + +#endif /* CONFIG_RFS_ACCEL */ diff --git a/drivers/net/ethernet/sfc/siena/rx_common.h b/drivers/net/ethernet/sfc/siena/rx_common.h new file mode 100644 index 000000000000..fbd2769307f9 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/rx_common.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_RX_COMMON_H +#define EFX_RX_COMMON_H + +/* Preferred number of descriptors to fill at once */ +#define EFX_RX_PREFERRED_BATCH 8U + +/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ +#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ + EFX_RX_USR_BUF_SIZE) + +/* Number of RX buffers to recycle pages for. When creating the RX page recycle + * ring, this number is divided by the number of buffers per page to calculate + * the number of pages to store in the RX page recycle ring. + */ +#define EFX_RECYCLE_RING_SIZE_10G 256 + +static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) +{ + return page_address(buf->page) + buf->page_offset; +} + +static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset)); +#else + const u8 *data = eh + efx->rx_packet_hash_offset; + + return (u32)data[0] | + (u32)data[1] << 8 | + (u32)data[2] << 16 | + (u32)data[3] << 24; +#endif +} + +void efx_rx_slow_fill(struct timer_list *t); + +void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags); +void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags); + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); +void efx_init_rx_queue(struct efx_rx_queue *rx_queue); +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); +void efx_destroy_rx_queue(struct efx_rx_queue *rx_queue); + +void efx_init_rx_buffer(struct efx_rx_queue *rx_queue, + struct page *page, + unsigned int page_offset, + u16 flags); +void efx_unmap_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf); + +static inline void efx_sync_rx_buffer(struct efx_nic *efx, + struct efx_rx_buffer *rx_buf, + unsigned int len) +{ + dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len, + DMA_FROM_DEVICE); +} + +void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs); + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); +void efx_rx_config_page_split(struct efx_nic *efx); +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); + +void +efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, + unsigned int n_frags, u8 *eh, __wsum csum); + +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); +void efx_free_rss_context_entry(struct efx_rss_context *ctx); +void efx_set_default_rx_indir_table(struct efx_nic *efx, + struct efx_rss_context *ctx); + +bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right); +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec); + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force); +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec); +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new); +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); + +int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota); +#endif + +int efx_probe_filters(struct efx_nic *efx); +void efx_remove_filters(struct efx_nic *efx); + +#endif diff --git a/drivers/net/ethernet/sfc/siena/selftest.c b/drivers/net/ethernet/sfc/siena/selftest.c new file mode 100644 index 000000000000..3c5227afd497 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/selftest.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2012 Solarflare Communications Inc. + */ + +#include <linux/netdevice.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/kernel_stat.h> +#include <linux/pci.h> +#include <linux/ethtool.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include "net_driver.h" +#include "efx.h" +#include "efx_common.h" +#include "efx_channels.h" +#include "nic.h" +#include "mcdi_port_common.h" +#include "selftest.h" +#include "workarounds.h" + +/* IRQ latency can be enormous because: + * - All IRQs may be disabled on a CPU for a *long* time by e.g. a + * slow serial console or an old IDE driver doing error recovery + * - The PREEMPT_RT patches mostly deal with this, but also allow a + * tasklet or normal task to be given higher priority than our IRQ + * threads + * Try to avoid blaming the hardware for this. + */ +#define IRQ_TIMEOUT HZ + +/* + * Loopback test packet structure + * + * The self-test should stress every RSS vector, and unfortunately + * Falcon only performs RSS on TCP/UDP packets. + */ +struct efx_loopback_payload { + struct ethhdr header; + struct iphdr ip; + struct udphdr udp; + __be16 iteration; + char msg[64]; +} __packed; + +/* Loopback test source MAC address */ +static const u8 payload_source[ETH_ALEN] __aligned(2) = { + 0x00, 0x0f, 0x53, 0x1b, 0x1b, 0x1b, +}; + +static const char payload_msg[] = + "Hello world! This is an Efx loopback test in progress!"; + +/* Interrupt mode names */ +static const unsigned int efx_interrupt_mode_max = EFX_INT_MODE_MAX; +static const char *const efx_interrupt_mode_names[] = { + [EFX_INT_MODE_MSIX] = "MSI-X", + [EFX_INT_MODE_MSI] = "MSI", + [EFX_INT_MODE_LEGACY] = "legacy", +}; +#define INT_MODE(efx) \ + STRING_TABLE_LOOKUP(efx->interrupt_mode, efx_interrupt_mode) + +/** + * struct efx_loopback_state - persistent state during a loopback selftest + * @flush: Drop all packets in efx_loopback_rx_packet + * @packet_count: Number of packets being used in this test + * @skbs: An array of skbs transmitted + * @offload_csum: Checksums are being offloaded + * @rx_good: RX good packet count + * @rx_bad: RX bad packet count + * @payload: Payload used in tests + */ +struct efx_loopback_state { + bool flush; + int packet_count; + struct sk_buff **skbs; + bool offload_csum; + atomic_t rx_good; + atomic_t rx_bad; + struct efx_loopback_payload payload; +}; + +/* How long to wait for all the packets to arrive (in ms) */ +#define LOOPBACK_TIMEOUT_MS 1000 + +/************************************************************************** + * + * MII, NVRAM and register tests + * + **************************************************************************/ + +static int efx_test_phy_alive(struct efx_nic *efx, struct efx_self_tests *tests) +{ + int rc = 0; + + rc = efx_mcdi_phy_test_alive(efx); + tests->phy_alive = rc ? -1 : 1; + + return rc; +} + +static int efx_test_nvram(struct efx_nic *efx, struct efx_self_tests *tests) +{ + int rc = 0; + + if (efx->type->test_nvram) { + rc = efx->type->test_nvram(efx); + if (rc == -EPERM) + rc = 0; + else + tests->nvram = rc ? -1 : 1; + } + + return rc; +} + +/************************************************************************** + * + * Interrupt and event queue testing + * + **************************************************************************/ + +/* Test generation and receipt of interrupts */ +static int efx_test_interrupts(struct efx_nic *efx, + struct efx_self_tests *tests) +{ + unsigned long timeout, wait; + int cpu; + int rc; + + netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n"); + tests->interrupt = -1; + + rc = efx_nic_irq_test_start(efx); + if (rc == -ENOTSUPP) { + netif_dbg(efx, drv, efx->net_dev, + "direct interrupt testing not supported\n"); + tests->interrupt = 0; + return 0; + } + + timeout = jiffies + IRQ_TIMEOUT; + wait = 1; + + /* Wait for arrival of test interrupt. */ + netif_dbg(efx, drv, efx->net_dev, "waiting for test interrupt\n"); + do { + schedule_timeout_uninterruptible(wait); + cpu = efx_nic_irq_test_irq_cpu(efx); + if (cpu >= 0) + goto success; + wait *= 2; + } while (time_before(jiffies, timeout)); + + netif_err(efx, drv, efx->net_dev, "timed out waiting for interrupt\n"); + return -ETIMEDOUT; + + success: + netif_dbg(efx, drv, efx->net_dev, "%s test interrupt seen on CPU%d\n", + INT_MODE(efx), cpu); + tests->interrupt = 1; + return 0; +} + +/* Test generation and receipt of interrupting events */ +static int efx_test_eventq_irq(struct efx_nic *efx, + struct efx_self_tests *tests) +{ + struct efx_channel *channel; + unsigned int read_ptr[EFX_MAX_CHANNELS]; + unsigned long napi_ran = 0, dma_pend = 0, int_pend = 0; + unsigned long timeout, wait; + + BUILD_BUG_ON(EFX_MAX_CHANNELS > BITS_PER_LONG); + + efx_for_each_channel(channel, efx) { + read_ptr[channel->channel] = channel->eventq_read_ptr; + set_bit(channel->channel, &dma_pend); + set_bit(channel->channel, &int_pend); + efx_nic_event_test_start(channel); + } + + timeout = jiffies + IRQ_TIMEOUT; + wait = 1; + + /* Wait for arrival of interrupts. NAPI processing may or may + * not complete in time, but we can cope in any case. + */ + do { + schedule_timeout_uninterruptible(wait); + + efx_for_each_channel(channel, efx) { + efx_stop_eventq(channel); + if (channel->eventq_read_ptr != + read_ptr[channel->channel]) { + set_bit(channel->channel, &napi_ran); + clear_bit(channel->channel, &dma_pend); + clear_bit(channel->channel, &int_pend); + } else { + if (efx_nic_event_present(channel)) + clear_bit(channel->channel, &dma_pend); + if (efx_nic_event_test_irq_cpu(channel) >= 0) + clear_bit(channel->channel, &int_pend); + } + efx_start_eventq(channel); + } + + wait *= 2; + } while ((dma_pend || int_pend) && time_before(jiffies, timeout)); + + efx_for_each_channel(channel, efx) { + bool dma_seen = !test_bit(channel->channel, &dma_pend); + bool int_seen = !test_bit(channel->channel, &int_pend); + + tests->eventq_dma[channel->channel] = dma_seen ? 1 : -1; + tests->eventq_int[channel->channel] = int_seen ? 1 : -1; + + if (dma_seen && int_seen) { + netif_dbg(efx, drv, efx->net_dev, + "channel %d event queue passed (with%s NAPI)\n", + channel->channel, + test_bit(channel->channel, &napi_ran) ? + "" : "out"); + } else { + /* Report failure and whether either interrupt or DMA + * worked + */ + netif_err(efx, drv, efx->net_dev, + "channel %d timed out waiting for event queue\n", + channel->channel); + if (int_seen) + netif_err(efx, drv, efx->net_dev, + "channel %d saw interrupt " + "during event queue test\n", + channel->channel); + if (dma_seen) + netif_err(efx, drv, efx->net_dev, + "channel %d event was generated, but " + "failed to trigger an interrupt\n", + channel->channel); + } + } + + return (dma_pend || int_pend) ? -ETIMEDOUT : 0; +} + +static int efx_test_phy(struct efx_nic *efx, struct efx_self_tests *tests, + unsigned flags) +{ + int rc; + + mutex_lock(&efx->mac_lock); + rc = efx_mcdi_phy_run_tests(efx, tests->phy_ext, flags); + mutex_unlock(&efx->mac_lock); + if (rc == -EPERM) + rc = 0; + else + netif_info(efx, drv, efx->net_dev, + "%s phy selftest\n", rc ? "Failed" : "Passed"); + + return rc; +} + +/************************************************************************** + * + * Loopback testing + * NB Only one loopback test can be executing concurrently. + * + **************************************************************************/ + +/* Loopback test RX callback + * This is called for each received packet during loopback testing. + */ +void efx_loopback_rx_packet(struct efx_nic *efx, + const char *buf_ptr, int pkt_len) +{ + struct efx_loopback_state *state = efx->loopback_selftest; + struct efx_loopback_payload *received; + struct efx_loopback_payload *payload; + + BUG_ON(!buf_ptr); + + /* If we are just flushing, then drop the packet */ + if ((state == NULL) || state->flush) + return; + + payload = &state->payload; + + received = (struct efx_loopback_payload *) buf_ptr; + received->ip.saddr = payload->ip.saddr; + if (state->offload_csum) + received->ip.check = payload->ip.check; + + /* Check that header exists */ + if (pkt_len < sizeof(received->header)) { + netif_err(efx, drv, efx->net_dev, + "saw runt RX packet (length %d) in %s loopback " + "test\n", pkt_len, LOOPBACK_MODE(efx)); + goto err; + } + + /* Check that the ethernet header exists */ + if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) { + netif_err(efx, drv, efx->net_dev, + "saw non-loopback RX packet in %s loopback test\n", + LOOPBACK_MODE(efx)); + goto err; + } + + /* Check packet length */ + if (pkt_len != sizeof(*payload)) { + netif_err(efx, drv, efx->net_dev, + "saw incorrect RX packet length %d (wanted %d) in " + "%s loopback test\n", pkt_len, (int)sizeof(*payload), + LOOPBACK_MODE(efx)); + goto err; + } + + /* Check that IP header matches */ + if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) { + netif_err(efx, drv, efx->net_dev, + "saw corrupted IP header in %s loopback test\n", + LOOPBACK_MODE(efx)); + goto err; + } + + /* Check that msg and padding matches */ + if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) { + netif_err(efx, drv, efx->net_dev, + "saw corrupted RX packet in %s loopback test\n", + LOOPBACK_MODE(efx)); + goto err; + } + + /* Check that iteration matches */ + if (received->iteration != payload->iteration) { + netif_err(efx, drv, efx->net_dev, + "saw RX packet from iteration %d (wanted %d) in " + "%s loopback test\n", ntohs(received->iteration), + ntohs(payload->iteration), LOOPBACK_MODE(efx)); + goto err; + } + + /* Increase correct RX count */ + netif_vdbg(efx, drv, efx->net_dev, + "got loopback RX in %s loopback test\n", LOOPBACK_MODE(efx)); + + atomic_inc(&state->rx_good); + return; + + err: +#ifdef DEBUG + if (atomic_read(&state->rx_bad) == 0) { + netif_err(efx, drv, efx->net_dev, "received packet:\n"); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, + buf_ptr, pkt_len, 0); + netif_err(efx, drv, efx->net_dev, "expected packet:\n"); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, + &state->payload, sizeof(state->payload), 0); + } +#endif + atomic_inc(&state->rx_bad); +} + +/* Initialise an efx_selftest_state for a new iteration */ +static void efx_iterate_state(struct efx_nic *efx) +{ + struct efx_loopback_state *state = efx->loopback_selftest; + struct net_device *net_dev = efx->net_dev; + struct efx_loopback_payload *payload = &state->payload; + + /* Initialise the layerII header */ + ether_addr_copy((u8 *)&payload->header.h_dest, net_dev->dev_addr); + ether_addr_copy((u8 *)&payload->header.h_source, payload_source); + payload->header.h_proto = htons(ETH_P_IP); + + /* saddr set later and used as incrementing count */ + payload->ip.daddr = htonl(INADDR_LOOPBACK); + payload->ip.ihl = 5; + payload->ip.check = (__force __sum16) htons(0xdead); + payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); + payload->ip.version = IPVERSION; + payload->ip.protocol = IPPROTO_UDP; + + /* Initialise udp header */ + payload->udp.source = 0; + payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) - + sizeof(struct iphdr)); + payload->udp.check = 0; /* checksum ignored */ + + /* Fill out payload */ + payload->iteration = htons(ntohs(payload->iteration) + 1); + memcpy(&payload->msg, payload_msg, sizeof(payload_msg)); + + /* Fill out remaining state members */ + atomic_set(&state->rx_good, 0); + atomic_set(&state->rx_bad, 0); + smp_wmb(); +} + +static int efx_begin_loopback(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + struct efx_loopback_state *state = efx->loopback_selftest; + struct efx_loopback_payload *payload; + struct sk_buff *skb; + int i; + netdev_tx_t rc; + + /* Transmit N copies of buffer */ + for (i = 0; i < state->packet_count; i++) { + /* Allocate an skb, holding an extra reference for + * transmit completion counting */ + skb = alloc_skb(sizeof(state->payload), GFP_KERNEL); + if (!skb) + return -ENOMEM; + state->skbs[i] = skb; + skb_get(skb); + + /* Copy the payload in, incrementing the source address to + * exercise the rss vectors */ + payload = skb_put(skb, sizeof(state->payload)); + memcpy(payload, &state->payload, sizeof(state->payload)); + payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2)); + + /* Ensure everything we've written is visible to the + * interrupt handler. */ + smp_wmb(); + + netif_tx_lock_bh(efx->net_dev); + rc = efx_enqueue_skb(tx_queue, skb); + netif_tx_unlock_bh(efx->net_dev); + + if (rc != NETDEV_TX_OK) { + netif_err(efx, drv, efx->net_dev, + "TX queue %d could not transmit packet %d of " + "%d in %s loopback test\n", tx_queue->label, + i + 1, state->packet_count, + LOOPBACK_MODE(efx)); + + /* Defer cleaning up the other skbs for the caller */ + kfree_skb(skb); + return -EPIPE; + } + } + + return 0; +} + +static int efx_poll_loopback(struct efx_nic *efx) +{ + struct efx_loopback_state *state = efx->loopback_selftest; + + return atomic_read(&state->rx_good) == state->packet_count; +} + +static int efx_end_loopback(struct efx_tx_queue *tx_queue, + struct efx_loopback_self_tests *lb_tests) +{ + struct efx_nic *efx = tx_queue->efx; + struct efx_loopback_state *state = efx->loopback_selftest; + struct sk_buff *skb; + int tx_done = 0, rx_good, rx_bad; + int i, rc = 0; + + netif_tx_lock_bh(efx->net_dev); + + /* Count the number of tx completions, and decrement the refcnt. Any + * skbs not already completed will be free'd when the queue is flushed */ + for (i = 0; i < state->packet_count; i++) { + skb = state->skbs[i]; + if (skb && !skb_shared(skb)) + ++tx_done; + dev_kfree_skb(skb); + } + + netif_tx_unlock_bh(efx->net_dev); + + /* Check TX completion and received packet counts */ + rx_good = atomic_read(&state->rx_good); + rx_bad = atomic_read(&state->rx_bad); + if (tx_done != state->packet_count) { + /* Don't free the skbs; they will be picked up on TX + * overflow or channel teardown. + */ + netif_err(efx, drv, efx->net_dev, + "TX queue %d saw only %d out of an expected %d " + "TX completion events in %s loopback test\n", + tx_queue->label, tx_done, state->packet_count, + LOOPBACK_MODE(efx)); + rc = -ETIMEDOUT; + /* Allow to fall through so we see the RX errors as well */ + } + + /* We may always be up to a flush away from our desired packet total */ + if (rx_good != state->packet_count) { + netif_dbg(efx, drv, efx->net_dev, + "TX queue %d saw only %d out of an expected %d " + "received packets in %s loopback test\n", + tx_queue->label, rx_good, state->packet_count, + LOOPBACK_MODE(efx)); + rc = -ETIMEDOUT; + /* Fall through */ + } + + /* Update loopback test structure */ + lb_tests->tx_sent[tx_queue->label] += state->packet_count; + lb_tests->tx_done[tx_queue->label] += tx_done; + lb_tests->rx_good += rx_good; + lb_tests->rx_bad += rx_bad; + + return rc; +} + +static int +efx_test_loopback(struct efx_tx_queue *tx_queue, + struct efx_loopback_self_tests *lb_tests) +{ + struct efx_nic *efx = tx_queue->efx; + struct efx_loopback_state *state = efx->loopback_selftest; + int i, begin_rc, end_rc; + + for (i = 0; i < 3; i++) { + /* Determine how many packets to send */ + state->packet_count = efx->txq_entries / 3; + state->packet_count = min(1 << (i << 2), state->packet_count); + state->skbs = kcalloc(state->packet_count, + sizeof(state->skbs[0]), GFP_KERNEL); + if (!state->skbs) + return -ENOMEM; + state->flush = false; + + netif_dbg(efx, drv, efx->net_dev, + "TX queue %d (hw %d) testing %s loopback with %d packets\n", + tx_queue->label, tx_queue->queue, LOOPBACK_MODE(efx), + state->packet_count); + + efx_iterate_state(efx); + begin_rc = efx_begin_loopback(tx_queue); + + /* This will normally complete very quickly, but be + * prepared to wait much longer. */ + msleep(1); + if (!efx_poll_loopback(efx)) { + msleep(LOOPBACK_TIMEOUT_MS); + efx_poll_loopback(efx); + } + + end_rc = efx_end_loopback(tx_queue, lb_tests); + kfree(state->skbs); + + if (begin_rc || end_rc) { + /* Wait a while to ensure there are no packets + * floating around after a failure. */ + schedule_timeout_uninterruptible(HZ / 10); + return begin_rc ? begin_rc : end_rc; + } + } + + netif_dbg(efx, drv, efx->net_dev, + "TX queue %d passed %s loopback test with a burst length " + "of %d packets\n", tx_queue->label, LOOPBACK_MODE(efx), + state->packet_count); + + return 0; +} + +/* Wait for link up. On Falcon, we would prefer to rely on efx_monitor, but + * any contention on the mac lock (via e.g. efx_mac_mcast_work) causes it + * to delay and retry. Therefore, it's safer to just poll directly. Wait + * for link up and any faults to dissipate. */ +static int efx_wait_for_link(struct efx_nic *efx) +{ + struct efx_link_state *link_state = &efx->link_state; + int count, link_up_count = 0; + bool link_up; + + for (count = 0; count < 40; count++) { + schedule_timeout_uninterruptible(HZ / 10); + + if (efx->type->monitor != NULL) { + mutex_lock(&efx->mac_lock); + efx->type->monitor(efx); + mutex_unlock(&efx->mac_lock); + } + + mutex_lock(&efx->mac_lock); + link_up = link_state->up; + if (link_up) + link_up = !efx->type->check_mac_fault(efx); + mutex_unlock(&efx->mac_lock); + + if (link_up) { + if (++link_up_count == 2) + return 0; + } else { + link_up_count = 0; + } + } + + return -ETIMEDOUT; +} + +static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests, + unsigned int loopback_modes) +{ + enum efx_loopback_mode mode; + struct efx_loopback_state *state; + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); + struct efx_tx_queue *tx_queue; + int rc = 0; + + /* Set the port loopback_selftest member. From this point on + * all received packets will be dropped. Mark the state as + * "flushing" so all inflight packets are dropped */ + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (state == NULL) + return -ENOMEM; + BUG_ON(efx->loopback_selftest); + state->flush = true; + efx->loopback_selftest = state; + + /* Test all supported loopback modes */ + for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) { + if (!(loopback_modes & (1 << mode))) + continue; + + /* Move the port into the specified loopback mode. */ + state->flush = true; + mutex_lock(&efx->mac_lock); + efx->loopback_mode = mode; + rc = __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "unable to move into %s loopback\n", + LOOPBACK_MODE(efx)); + goto out; + } + + rc = efx_wait_for_link(efx); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "loopback %s never came up\n", + LOOPBACK_MODE(efx)); + goto out; + } + + /* Test all enabled types of TX queue */ + efx_for_each_channel_tx_queue(tx_queue, channel) { + state->offload_csum = (tx_queue->type & + EFX_TXQ_TYPE_OUTER_CSUM); + rc = efx_test_loopback(tx_queue, + &tests->loopback[mode]); + if (rc) + goto out; + } + } + + out: + /* Remove the flush. The caller will remove the loopback setting */ + state->flush = true; + efx->loopback_selftest = NULL; + wmb(); + kfree(state); + + if (rc == -EPERM) + rc = 0; + + return rc; +} + +/************************************************************************** + * + * Entry point + * + *************************************************************************/ + +int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, + unsigned flags) +{ + enum efx_loopback_mode loopback_mode = efx->loopback_mode; + int phy_mode = efx->phy_mode; + int rc_test = 0, rc_reset, rc; + + efx_selftest_async_cancel(efx); + + /* Online (i.e. non-disruptive) testing + * This checks interrupt generation, event delivery and PHY presence. */ + + rc = efx_test_phy_alive(efx, tests); + if (rc && !rc_test) + rc_test = rc; + + rc = efx_test_nvram(efx, tests); + if (rc && !rc_test) + rc_test = rc; + + rc = efx_test_interrupts(efx, tests); + if (rc && !rc_test) + rc_test = rc; + + rc = efx_test_eventq_irq(efx, tests); + if (rc && !rc_test) + rc_test = rc; + + if (rc_test) + return rc_test; + + if (!(flags & ETH_TEST_FL_OFFLINE)) + return efx_test_phy(efx, tests, flags); + + /* Offline (i.e. disruptive) testing + * This checks MAC and PHY loopback on the specified port. */ + + /* Detach the device so the kernel doesn't transmit during the + * loopback test and the watchdog timeout doesn't fire. + */ + efx_device_detach_sync(efx); + + if (efx->type->test_chip) { + rc_reset = efx->type->test_chip(efx, tests); + if (rc_reset) { + netif_err(efx, hw, efx->net_dev, + "Unable to recover from chip test\n"); + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + return rc_reset; + } + + if ((tests->memory < 0 || tests->registers < 0) && !rc_test) + rc_test = -EIO; + } + + /* Ensure that the phy is powered and out of loopback + * for the bist and loopback tests */ + mutex_lock(&efx->mac_lock); + efx->phy_mode &= ~PHY_MODE_LOW_POWER; + efx->loopback_mode = LOOPBACK_NONE; + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); + + rc = efx_test_phy(efx, tests, flags); + if (rc && !rc_test) + rc_test = rc; + + rc = efx_test_loopbacks(efx, tests, efx->loopback_modes); + if (rc && !rc_test) + rc_test = rc; + + /* restore the PHY to the previous state */ + mutex_lock(&efx->mac_lock); + efx->phy_mode = phy_mode; + efx->loopback_mode = loopback_mode; + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); + + efx_device_attach_if_not_resetting(efx); + + return rc_test; +} + +void efx_selftest_async_start(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_nic_event_test_start(channel); + schedule_delayed_work(&efx->selftest_work, IRQ_TIMEOUT); +} + +void efx_selftest_async_cancel(struct efx_nic *efx) +{ + cancel_delayed_work_sync(&efx->selftest_work); +} + +static void efx_selftest_async_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, + selftest_work.work); + struct efx_channel *channel; + int cpu; + + efx_for_each_channel(channel, efx) { + cpu = efx_nic_event_test_irq_cpu(channel); + if (cpu < 0) + netif_err(efx, ifup, efx->net_dev, + "channel %d failed to trigger an interrupt\n", + channel->channel); + else + netif_dbg(efx, ifup, efx->net_dev, + "channel %d triggered interrupt on CPU %d\n", + channel->channel, cpu); + } +} + +void efx_selftest_async_init(struct efx_nic *efx) +{ + INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); +} diff --git a/drivers/net/ethernet/sfc/siena/selftest.h b/drivers/net/ethernet/sfc/siena/selftest.h new file mode 100644 index 000000000000..a23f085bf298 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/selftest.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2012 Solarflare Communications Inc. + */ + +#ifndef EFX_SELFTEST_H +#define EFX_SELFTEST_H + +#include "net_driver.h" + +/* + * Self tests + */ + +struct efx_loopback_self_tests { + int tx_sent[EFX_MAX_TXQ_PER_CHANNEL]; + int tx_done[EFX_MAX_TXQ_PER_CHANNEL]; + int rx_good; + int rx_bad; +}; + +#define EFX_MAX_PHY_TESTS 20 + +/* Efx self test results + * For fields which are not counters, 1 indicates success and -1 + * indicates failure; 0 indicates test could not be run. + */ +struct efx_self_tests { + /* online tests */ + int phy_alive; + int nvram; + int interrupt; + int eventq_dma[EFX_MAX_CHANNELS]; + int eventq_int[EFX_MAX_CHANNELS]; + /* offline tests */ + int memory; + int registers; + int phy_ext[EFX_MAX_PHY_TESTS]; + struct efx_loopback_self_tests loopback[LOOPBACK_TEST_MAX + 1]; +}; + +void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr, + int pkt_len); +int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, + unsigned flags); +void efx_selftest_async_init(struct efx_nic *efx); +void efx_selftest_async_start(struct efx_nic *efx); +void efx_selftest_async_cancel(struct efx_nic *efx); + +#endif /* EFX_SELFTEST_H */ diff --git a/drivers/net/ethernet/sfc/siena/sriov.c b/drivers/net/ethernet/sfc/siena/sriov.c new file mode 100644 index 000000000000..3f241e6c881a --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/sriov.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2014-2015 Solarflare Communications Inc. + */ +#include <linux/module.h> +#include "net_driver.h" +#include "nic.h" +#include "sriov.h" + +int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_mac) + return efx->type->sriov_set_vf_mac(efx, vf_i, mac); + else + return -EOPNOTSUPP; +} + +int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, + u8 qos, __be16 vlan_proto) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_vlan) { + if ((vlan & ~VLAN_VID_MASK) || + (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))) + return -EINVAL; + + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + + return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos); + } else { + return -EOPNOTSUPP; + } +} + +int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, + bool spoofchk) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_spoofchk) + return efx->type->sriov_set_vf_spoofchk(efx, vf_i, spoofchk); + else + return -EOPNOTSUPP; +} + +int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, + struct ifla_vf_info *ivi) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_get_vf_config) + return efx->type->sriov_get_vf_config(efx, vf_i, ivi); + else + return -EOPNOTSUPP; +} + +int efx_sriov_set_vf_link_state(struct net_device *net_dev, int vf_i, + int link_state) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->sriov_set_vf_link_state) + return efx->type->sriov_set_vf_link_state(efx, vf_i, + link_state); + else + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/sfc/siena/sriov.h b/drivers/net/ethernet/sfc/siena/sriov.h new file mode 100644 index 000000000000..747707bee483 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/sriov.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2014-2015 Solarflare Communications Inc. + */ + +#ifndef EFX_SRIOV_H +#define EFX_SRIOV_H + +#include "net_driver.h" + +#ifdef CONFIG_SFC_SRIOV + +int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac); +int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, + u8 qos, __be16 vlan_proto); +int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, + bool spoofchk); +int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, + struct ifla_vf_info *ivi); +int efx_sriov_set_vf_link_state(struct net_device *net_dev, int vf_i, + int link_state); +#endif /* CONFIG_SFC_SRIOV */ + +#endif /* EFX_SRIOV_H */ diff --git a/drivers/net/ethernet/sfc/siena/tx.c b/drivers/net/ethernet/sfc/siena/tx.c new file mode 100644 index 000000000000..138bca611341 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/tx.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2013 Solarflare Communications Inc. + */ + +#include <linux/pci.h> +#include <linux/tcp.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/ipv6.h> +#include <linux/slab.h> +#include <net/ipv6.h> +#include <linux/if_ether.h> +#include <linux/highmem.h> +#include <linux/cache.h> +#include "net_driver.h" +#include "efx.h" +#include "io.h" +#include "nic.h" +#include "tx.h" +#include "tx_common.h" +#include "workarounds.h" +#include "ef10_regs.h" + +#ifdef EFX_USE_PIO + +#define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES) +unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF; + +#endif /* EFX_USE_PIO */ + +static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer) +{ + unsigned int index = efx_tx_queue_get_insert_index(tx_queue); + struct efx_buffer *page_buf = + &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)]; + unsigned int offset = + ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1); + + if (unlikely(!page_buf->addr) && + efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE, + GFP_ATOMIC)) + return NULL; + buffer->dma_addr = page_buf->dma_addr + offset; + buffer->unmap_len = 0; + return (u8 *)page_buf->addr + offset; +} + +u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, size_t len) +{ + if (len > EFX_TX_CB_SIZE) + return NULL; + return efx_tx_get_copy_buffer(tx_queue, buffer); +} + +static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) +{ + /* We need to consider all queues that the net core sees as one */ + struct efx_nic *efx = txq1->efx; + struct efx_tx_queue *txq2; + unsigned int fill_level; + + fill_level = efx_channel_tx_old_fill_level(txq1->channel); + if (likely(fill_level < efx->txq_stop_thresh)) + return; + + /* We used the stale old_read_count above, which gives us a + * pessimistic estimate of the fill level (which may even + * validly be >= efx->txq_entries). Now try again using + * read_count (more likely to be a cache miss). + * + * If we read read_count and then conditionally stop the + * queue, it is possible for the completion path to race with + * us and complete all outstanding descriptors in the middle, + * after which there will be no more completions to wake it. + * Therefore we stop the queue first, then read read_count + * (with a memory barrier to ensure the ordering), then + * restart the queue if the fill level turns out to be low + * enough. + */ + netif_tx_stop_queue(txq1->core_txq); + smp_mb(); + efx_for_each_channel_tx_queue(txq2, txq1->channel) + txq2->old_read_count = READ_ONCE(txq2->read_count); + + fill_level = efx_channel_tx_old_fill_level(txq1->channel); + EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries); + if (likely(fill_level < efx->txq_stop_thresh)) { + smp_mb(); + if (likely(!efx->loopback_selftest)) + netif_tx_start_queue(txq1->core_txq); + } +} + +static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue, + struct sk_buff *skb) +{ + unsigned int copy_len = skb->len; + struct efx_tx_buffer *buffer; + u8 *copy_buffer; + int rc; + + EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE); + + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + + copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer); + if (unlikely(!copy_buffer)) + return -ENOMEM; + + rc = skb_copy_bits(skb, 0, copy_buffer, copy_len); + EFX_WARN_ON_PARANOID(rc); + buffer->len = copy_len; + + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB; + + ++tx_queue->insert_count; + return rc; +} + +#ifdef EFX_USE_PIO + +struct efx_short_copy_buffer { + int used; + u8 buf[L1_CACHE_BYTES]; +}; + +/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned. + * Advances piobuf pointer. Leaves additional data in the copy buffer. + */ +static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf, + u8 *data, int len, + struct efx_short_copy_buffer *copy_buf) +{ + int block_len = len & ~(sizeof(copy_buf->buf) - 1); + + __iowrite64_copy(*piobuf, data, block_len >> 3); + *piobuf += block_len; + len -= block_len; + + if (len) { + data += block_len; + BUG_ON(copy_buf->used); + BUG_ON(len > sizeof(copy_buf->buf)); + memcpy(copy_buf->buf, data, len); + copy_buf->used = len; + } +} + +/* Copy to PIO, respecting dword alignment, popping data from copy buffer first. + * Advances piobuf pointer. Leaves additional data in the copy buffer. + */ +static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf, + u8 *data, int len, + struct efx_short_copy_buffer *copy_buf) +{ + if (copy_buf->used) { + /* if the copy buffer is partially full, fill it up and write */ + int copy_to_buf = + min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len); + + memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf); + copy_buf->used += copy_to_buf; + + /* if we didn't fill it up then we're done for now */ + if (copy_buf->used < sizeof(copy_buf->buf)) + return; + + __iowrite64_copy(*piobuf, copy_buf->buf, + sizeof(copy_buf->buf) >> 3); + *piobuf += sizeof(copy_buf->buf); + data += copy_to_buf; + len -= copy_to_buf; + copy_buf->used = 0; + } + + efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf); +} + +static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf, + struct efx_short_copy_buffer *copy_buf) +{ + /* if there's anything in it, write the whole buffer, including junk */ + if (copy_buf->used) + __iowrite64_copy(piobuf, copy_buf->buf, + sizeof(copy_buf->buf) >> 3); +} + +/* Traverse skb structure and copy fragments in to PIO buffer. + * Advances piobuf pointer. + */ +static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb, + u8 __iomem **piobuf, + struct efx_short_copy_buffer *copy_buf) +{ + int i; + + efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb), + copy_buf); + + for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + u8 *vaddr; + + vaddr = kmap_atomic(skb_frag_page(f)); + + efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + skb_frag_off(f), + skb_frag_size(f), copy_buf); + kunmap_atomic(vaddr); + } + + EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list); +} + +static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, + struct sk_buff *skb) +{ + struct efx_tx_buffer *buffer = + efx_tx_queue_get_insert_buffer(tx_queue); + u8 __iomem *piobuf = tx_queue->piobuf; + + /* Copy to PIO buffer. Ensure the writes are padded to the end + * of a cache line, as this is required for write-combining to be + * effective on at least x86. + */ + + if (skb_shinfo(skb)->nr_frags) { + /* The size of the copy buffer will ensure all writes + * are the size of a cache line. + */ + struct efx_short_copy_buffer copy_buf; + + copy_buf.used = 0; + + efx_skb_copy_bits_to_pio(tx_queue->efx, skb, + &piobuf, ©_buf); + efx_flush_copy_buffer(tx_queue->efx, piobuf, ©_buf); + } else { + /* Pad the write to the size of a cache line. + * We can do this because we know the skb_shared_info struct is + * after the source, and the destination buffer is big enough. + */ + BUILD_BUG_ON(L1_CACHE_BYTES > + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + __iowrite64_copy(tx_queue->piobuf, skb->data, + ALIGN(skb->len, L1_CACHE_BYTES) >> 3); + } + + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION; + + EFX_POPULATE_QWORD_5(buffer->option, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO, + ESF_DZ_TX_PIO_CONT, 0, + ESF_DZ_TX_PIO_BYTE_CNT, skb->len, + ESF_DZ_TX_PIO_BUF_ADDR, + tx_queue->piobuf_offset); + ++tx_queue->insert_count; + return 0; +} + +/* Decide whether we can use TX PIO, ie. write packet data directly into + * a buffer on the device. This can reduce latency at the expense of + * throughput, so we only do this if both hardware and software TX rings + * are empty, including all queues for the channel. This also ensures that + * only one packet at a time can be using the PIO buffer. If the xmit_more + * flag is set then we don't use this - there'll be another packet along + * shortly and we want to hold off the doorbell. + */ +static bool efx_tx_may_pio(struct efx_tx_queue *tx_queue) +{ + struct efx_channel *channel = tx_queue->channel; + + if (!tx_queue->piobuf) + return false; + + EFX_WARN_ON_ONCE_PARANOID(!channel->efx->type->option_descriptors); + + efx_for_each_channel_tx_queue(tx_queue, channel) + if (!efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count)) + return false; + + return true; +} +#endif /* EFX_USE_PIO */ + +/* Send any pending traffic for a channel. xmit_more is shared across all + * queues for a channel, so we must check all of them. + */ +static void efx_tx_send_pending(struct efx_channel *channel) +{ + struct efx_tx_queue *q; + + efx_for_each_channel_tx_queue(q, channel) { + if (q->xmit_pending) + efx_nic_push_buffers(q); + } +} + +/* + * Add a socket buffer to a TX queue + * + * This maps all fragments of a socket buffer for DMA and adds them to + * the TX queue. The queue's insert pointer will be incremented by + * the number of fragments in the socket buffer. + * + * If any DMA mapping fails, any mapped fragments will be unmapped, + * the queue's insert pointer will be restored to its original value. + * + * This function is split out from efx_hard_start_xmit to allow the + * loopback test to direct packets via specific TX queues. + * + * Returns NETDEV_TX_OK. + * You must hold netif_tx_lock() to call this function. + */ +netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) +{ + unsigned int old_insert_count = tx_queue->insert_count; + bool xmit_more = netdev_xmit_more(); + bool data_mapped = false; + unsigned int segments; + unsigned int skb_len; + int rc; + + skb_len = skb->len; + segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; + if (segments == 1) + segments = 0; /* Don't use TSO for a single segment. */ + + /* Handle TSO first - it's *possible* (although unlikely) that we might + * be passed a packet to segment that's smaller than the copybreak/PIO + * size limit. + */ + if (segments) { + switch (tx_queue->tso_version) { + case 1: + rc = efx_enqueue_skb_tso(tx_queue, skb, &data_mapped); + break; + case 2: + rc = efx_ef10_tx_tso_desc(tx_queue, skb, &data_mapped); + break; + case 0: /* No TSO on this queue, SW fallback needed */ + default: + rc = -EINVAL; + break; + } + if (rc == -EINVAL) { + rc = efx_tx_tso_fallback(tx_queue, skb); + tx_queue->tso_fallbacks++; + if (rc == 0) + return 0; + } + if (rc) + goto err; +#ifdef EFX_USE_PIO + } else if (skb_len <= efx_piobuf_size && !xmit_more && + efx_tx_may_pio(tx_queue)) { + /* Use PIO for short packets with an empty queue. */ + if (efx_enqueue_skb_pio(tx_queue, skb)) + goto err; + tx_queue->pio_packets++; + data_mapped = true; +#endif + } else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) { + /* Pad short packets or coalesce short fragmented packets. */ + if (efx_enqueue_skb_copy(tx_queue, skb)) + goto err; + tx_queue->cb_packets++; + data_mapped = true; + } + + /* Map for DMA and create descriptors if we haven't done so already. */ + if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments))) + goto err; + + efx_tx_maybe_stop_queue(tx_queue); + + tx_queue->xmit_pending = true; + + /* Pass off to hardware */ + if (__netdev_tx_sent_queue(tx_queue->core_txq, skb_len, xmit_more)) + efx_tx_send_pending(tx_queue->channel); + + if (segments) { + tx_queue->tso_bursts++; + tx_queue->tso_packets += segments; + tx_queue->tx_packets += segments; + } else { + tx_queue->tx_packets++; + } + + return NETDEV_TX_OK; + + +err: + efx_enqueue_unwind(tx_queue, old_insert_count); + dev_kfree_skb_any(skb); + + /* If we're not expecting another transmit and we had something to push + * on this queue or a partner queue then we need to push here to get the + * previous packets out. + */ + if (!xmit_more) + efx_tx_send_pending(tx_queue->channel); + + return NETDEV_TX_OK; +} + +/* Transmit a packet from an XDP buffer + * + * Returns number of packets sent on success, error code otherwise. + * Runs in NAPI context, either in our poll (for XDP TX) or a different NIC + * (for XDP redirect). + */ +int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, + bool flush) +{ + struct efx_tx_buffer *tx_buffer; + struct efx_tx_queue *tx_queue; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + unsigned int len; + int space; + int cpu; + int i = 0; + + if (unlikely(n && !xdpfs)) + return -EINVAL; + if (unlikely(!n)) + return 0; + + cpu = raw_smp_processor_id(); + if (unlikely(cpu >= efx->xdp_tx_queue_count)) + return -EINVAL; + + tx_queue = efx->xdp_tx_queues[cpu]; + if (unlikely(!tx_queue)) + return -EINVAL; + + if (!tx_queue->initialised) + return -EINVAL; + + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) + HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu); + + /* If we're borrowing net stack queues we have to handle stop-restart + * or we might block the queue and it will be considered as frozen + */ + if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { + if (netif_tx_queue_stopped(tx_queue->core_txq)) + goto unlock; + efx_tx_maybe_stop_queue(tx_queue); + } + + /* Check for available space. We should never need multiple + * descriptors per frame. + */ + space = efx->txq_entries + + tx_queue->read_count - tx_queue->insert_count; + + for (i = 0; i < n; i++) { + xdpf = xdpfs[i]; + + if (i >= space) + break; + + /* We'll want a descriptor for this tx. */ + prefetchw(__efx_tx_queue_get_insert_buffer(tx_queue)); + + len = xdpf->len; + + /* Map for DMA. */ + dma_addr = dma_map_single(&efx->pci_dev->dev, + xdpf->data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(&efx->pci_dev->dev, dma_addr)) + break; + + /* Create descriptor and set up for unmapping DMA. */ + tx_buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); + tx_buffer->xdpf = xdpf; + tx_buffer->flags = EFX_TX_BUF_XDP | + EFX_TX_BUF_MAP_SINGLE; + tx_buffer->dma_offset = 0; + tx_buffer->unmap_len = len; + tx_queue->tx_packets++; + } + + /* Pass mapped frames to hardware. */ + if (flush && i > 0) + efx_nic_push_buffers(tx_queue); + +unlock: + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) + HARD_TX_UNLOCK(efx->net_dev, tx_queue->core_txq); + + return i == 0 ? -EIO : i; +} + +/* Initiate a packet transmission. We use one channel per CPU + * (sharing when we have more CPUs than channels). + * + * Context: non-blocking. + * Should always return NETDEV_TX_OK and consume the skb. + */ +netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, + struct net_device *net_dev) +{ + struct efx_nic *efx = netdev_priv(net_dev); + struct efx_tx_queue *tx_queue; + unsigned index, type; + + EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); + + index = skb_get_queue_mapping(skb); + type = efx_tx_csum_type_skb(skb); + if (index >= efx->n_tx_channels) { + index -= efx->n_tx_channels; + type |= EFX_TXQ_TYPE_HIGHPRI; + } + + /* PTP "event" packet */ + if (unlikely(efx_xmit_with_hwtstamp(skb)) && + ((efx_ptp_use_mac_tx_timestamps(efx) && efx->ptp_data) || + unlikely(efx_ptp_is_ptp_tx(efx, skb)))) { + /* There may be existing transmits on the channel that are + * waiting for this packet to trigger the doorbell write. + * We need to send the packets at this point. + */ + efx_tx_send_pending(efx_get_tx_channel(efx, index)); + return efx_ptp_tx(efx, skb); + } + + tx_queue = efx_get_tx_queue(efx, index, type); + if (WARN_ON_ONCE(!tx_queue)) { + /* We don't have a TXQ of the right type. + * This should never happen, as we don't advertise offload + * features unless we can support them. + */ + dev_kfree_skb_any(skb); + /* If we're not expecting another transmit and we had something to push + * on this queue or a partner queue then we need to push here to get the + * previous packets out. + */ + if (!netdev_xmit_more()) + efx_tx_send_pending(tx_queue->channel); + return NETDEV_TX_OK; + } + + return __efx_enqueue_skb(tx_queue, skb); +} + +void efx_xmit_done_single(struct efx_tx_queue *tx_queue) +{ + unsigned int pkts_compl = 0, bytes_compl = 0; + unsigned int read_ptr; + bool finished = false; + + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + + while (!finished) { + struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; + + if (!efx_tx_buffer_in_use(buffer)) { + struct efx_nic *efx = tx_queue->efx; + + netif_err(efx, hw, efx->net_dev, + "TX queue %d spurious single TX completion\n", + tx_queue->queue); + efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); + return; + } + + /* Need to check the flag before dequeueing. */ + if (buffer->flags & EFX_TX_BUF_SKB) + finished = true; + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + + ++tx_queue->read_count; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + } + + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; + + EFX_WARN_ON_PARANOID(pkts_compl != 1); + + efx_xmit_done_check_empty(tx_queue); +} + +void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + + /* Must be inverse of queue lookup in efx_hard_start_xmit() */ + tx_queue->core_txq = + netdev_get_tx_queue(efx->net_dev, + tx_queue->channel->channel + + ((tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ? + efx->n_tx_channels : 0)); +} + +int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, + void *type_data) +{ + struct efx_nic *efx = netdev_priv(net_dev); + struct tc_mqprio_qopt *mqprio = type_data; + unsigned tc, num_tc; + + if (type != TC_SETUP_QDISC_MQPRIO) + return -EOPNOTSUPP; + + /* Only Siena supported highpri queues */ + if (efx_nic_rev(efx) > EFX_REV_SIENA_A0) + return -EOPNOTSUPP; + + num_tc = mqprio->num_tc; + + if (num_tc > EFX_MAX_TX_TC) + return -EINVAL; + + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + if (num_tc == net_dev->num_tc) + return 0; + + for (tc = 0; tc < num_tc; tc++) { + net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels; + net_dev->tc_to_txq[tc].count = efx->n_tx_channels; + } + + net_dev->num_tc = num_tc; + + return netif_set_real_num_tx_queues(net_dev, + max_t(int, num_tc, 1) * + efx->n_tx_channels); +} diff --git a/drivers/net/ethernet/sfc/siena/tx.h b/drivers/net/ethernet/sfc/siena/tx.h new file mode 100644 index 000000000000..f2c4d2f89919 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/tx.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2015 Solarflare Communications Inc. + */ + +#ifndef EFX_TX_H +#define EFX_TX_H + +#include <linux/types.h> + +/* Driver internal tx-path related declarations. */ + +unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len); + +u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, size_t len); + +/* What TXQ type will satisfy the checksum offloads required for this skb? */ +static inline unsigned int efx_tx_csum_type_skb(struct sk_buff *skb) +{ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; /* no checksum offload */ + + if (skb->encapsulation && + skb_checksum_start_offset(skb) == skb_inner_transport_offset(skb)) { + /* we only advertise features for IPv4 and IPv6 checksums on + * encapsulated packets, so if the checksum is for the inner + * packet, it must be one of them; no further checking required. + */ + + /* Do we also need to offload the outer header checksum? */ + if (skb_shinfo(skb)->gso_segs > 1 && + !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + return EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM; + return EFX_TXQ_TYPE_INNER_CSUM; + } + + /* similarly, we only advertise features for IPv4 and IPv6 checksums, + * so it must be one of them. No need for further checks. + */ + return EFX_TXQ_TYPE_OUTER_CSUM; +} +#endif /* EFX_TX_H */ diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c new file mode 100644 index 000000000000..9bc8281b7f5b --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/tx_common.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "efx.h" +#include "nic_common.h" +#include "tx_common.h" + +static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) +{ + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, + PAGE_SIZE >> EFX_TX_CB_ORDER); +} + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int entries; + int rc; + + /* Create the smallest power-of-two aligned ring */ + entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); + tx_queue->ptr_mask = entries - 1; + + netif_dbg(efx, probe, efx->net_dev, + "creating TX queue %d size %#x mask %#x\n", + tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); + + /* Allocate software ring */ + tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), + GFP_KERNEL); + if (!tx_queue->buffer) + return -ENOMEM; + + tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), + sizeof(tx_queue->cb_page[0]), GFP_KERNEL); + if (!tx_queue->cb_page) { + rc = -ENOMEM; + goto fail1; + } + + /* Allocate hardware ring, determine TXQ type */ + rc = efx_nic_probe_tx(tx_queue); + if (rc) + goto fail2; + + tx_queue->channel->tx_queue_by_type[tx_queue->type] = tx_queue; + return 0; + +fail2: + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; +fail1: + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; + return rc; +} + +void efx_init_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + + netif_dbg(efx, drv, efx->net_dev, + "initialising TX queue %d\n", tx_queue->queue); + + tx_queue->insert_count = 0; + tx_queue->notify_count = 0; + tx_queue->write_count = 0; + tx_queue->packet_write_count = 0; + tx_queue->old_write_count = 0; + tx_queue->read_count = 0; + tx_queue->old_read_count = 0; + tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + tx_queue->xmit_pending = false; + tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && + tx_queue->channel == efx_ptp_channel(efx)); + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + + tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); + tx_queue->tso_version = 0; + + /* Set up TX descriptor ring */ + efx_nic_init_tx(tx_queue); + + tx_queue->initialised = true; +} + +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer; + + netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, + "shutting down TX queue %d\n", tx_queue->queue); + + tx_queue->initialised = false; + + if (!tx_queue->buffer) + return; + + /* Free any buffers left in the ring */ + while (tx_queue->read_count != tx_queue->write_count) { + unsigned int pkts_compl = 0, bytes_compl = 0; + + buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + + ++tx_queue->read_count; + } + tx_queue->xmit_pending = false; + netdev_tx_reset_queue(tx_queue->core_txq); +} + +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) +{ + int i; + + if (!tx_queue->buffer) + return; + + netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, + "destroying TX queue %d\n", tx_queue->queue); + efx_nic_remove_tx(tx_queue); + + if (tx_queue->cb_page) { + for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) + efx_nic_free_buffer(tx_queue->efx, + &tx_queue->cb_page[i]); + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; + } + + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; + tx_queue->channel->tx_queue_by_type[tx_queue->type] = NULL; +} + +void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + if (buffer->unmap_len) { + struct device *dma_dev = &tx_queue->efx->pci_dev->dev; + dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; + + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) + dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, + DMA_TO_DEVICE); + else + dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, + DMA_TO_DEVICE); + buffer->unmap_len = 0; + } + + if (buffer->flags & EFX_TX_BUF_SKB) { + struct sk_buff *skb = (struct sk_buff *)buffer->skb; + + EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); + (*pkts_compl)++; + (*bytes_compl) += skb->len; + if (tx_queue->timestamping && + (tx_queue->completed_timestamp_major || + tx_queue->completed_timestamp_minor)) { + struct skb_shared_hwtstamps hwtstamp; + + hwtstamp.hwtstamp = + efx_ptp_nic_to_kernel_time(tx_queue); + skb_tstamp_tx(skb, &hwtstamp); + + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + } + dev_consume_skb_any((struct sk_buff *)buffer->skb); + netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, + "TX queue %d transmission id %x complete\n", + tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_XDP) { + xdp_return_frame_rx_napi(buffer->xdpf); + } + + buffer->len = 0; + buffer->flags = 0; +} + +/* Remove packets from the TX queue + * + * This removes packets from the TX queue, up to and including the + * specified index. + */ +static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, + unsigned int index, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int stop_index, read_ptr; + + stop_index = (index + 1) & tx_queue->ptr_mask; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + + while (read_ptr != stop_index) { + struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; + + if (!efx_tx_buffer_in_use(buffer)) { + netif_err(efx, tx_err, efx->net_dev, + "TX queue %d spurious TX completion id %d\n", + tx_queue->queue, read_ptr); + efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); + return; + } + + efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); + + ++tx_queue->read_count; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + } +} + +void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue) +{ + if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); + if (tx_queue->read_count == tx_queue->old_write_count) { + /* Ensure that read_count is flushed. */ + smp_mb(); + tx_queue->empty_read_count = + tx_queue->read_count | EFX_EMPTY_COUNT_VALID; + } + } +} + +void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) +{ + unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; + struct efx_nic *efx = tx_queue->efx; + + EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); + + efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; + + if (pkts_compl > 1) + ++tx_queue->merge_events; + + /* See if we need to restart the netif queue. This memory + * barrier ensures that we write read_count (inside + * efx_dequeue_buffers()) before reading the queue status. + */ + smp_mb(); + if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && + likely(efx->port_enabled) && + likely(netif_device_present(efx->net_dev))) { + fill_level = efx_channel_tx_fill_level(tx_queue->channel); + if (fill_level <= efx->txq_wake_thresh) + netif_tx_wake_queue(tx_queue->core_txq); + } + + efx_xmit_done_check_empty(tx_queue); +} + +/* Remove buffers put into a tx_queue for the current packet. + * None of the buffers must have an skb attached. + */ +void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count) +{ + struct efx_tx_buffer *buffer; + unsigned int bytes_compl = 0; + unsigned int pkts_compl = 0; + + /* Work backwards until we hit the original insert pointer value */ + while (tx_queue->insert_count != insert_count) { + --tx_queue->insert_count; + buffer = __efx_tx_queue_get_insert_buffer(tx_queue); + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + } +} + +struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, size_t len) +{ + const struct efx_nic_type *nic_type = tx_queue->efx->type; + struct efx_tx_buffer *buffer; + unsigned int dma_len; + + /* Map the fragment taking account of NIC-dependent DMA limits. */ + do { + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + + if (nic_type->tx_limit_len) + dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); + else + dma_len = len; + + buffer->len = dma_len; + buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; + len -= dma_len; + dma_addr += dma_len; + ++tx_queue->insert_count; + } while (len); + + return buffer; +} + +int efx_tx_tso_header_length(struct sk_buff *skb) +{ + size_t header_len; + + if (skb->encapsulation) + header_len = skb_inner_transport_header(skb) - + skb->data + + (inner_tcp_hdr(skb)->doff << 2u); + else + header_len = skb_transport_header(skb) - skb->data + + (tcp_hdr(skb)->doff << 2u); + return header_len; +} + +/* Map all data from an SKB for DMA and create descriptors on the queue. */ +int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + unsigned int segment_count) +{ + struct efx_nic *efx = tx_queue->efx; + struct device *dma_dev = &efx->pci_dev->dev; + unsigned int frag_index, nr_frags; + dma_addr_t dma_addr, unmap_addr; + unsigned short dma_flags; + size_t len, unmap_len; + + nr_frags = skb_shinfo(skb)->nr_frags; + frag_index = 0; + + /* Map header data. */ + len = skb_headlen(skb); + dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); + dma_flags = EFX_TX_BUF_MAP_SINGLE; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + + if (segment_count) { + /* For TSO we need to put the header in to a separate + * descriptor. Map this separately if necessary. + */ + size_t header_len = efx_tx_tso_header_length(skb); + + if (header_len != len) { + tx_queue->tso_long_headers++; + efx_tx_map_chunk(tx_queue, dma_addr, header_len); + len -= header_len; + dma_addr += header_len; + } + } + + /* Add descriptors for each fragment. */ + do { + struct efx_tx_buffer *buffer; + skb_frag_t *fragment; + + buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); + + /* The final descriptor for a fragment is responsible for + * unmapping the whole fragment. + */ + buffer->flags = EFX_TX_BUF_CONT | dma_flags; + buffer->unmap_len = unmap_len; + buffer->dma_offset = buffer->dma_addr - unmap_addr; + + if (frag_index >= nr_frags) { + /* Store SKB details with the final buffer for + * the completion. + */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; + return 0; + } + + /* Move on to the next fragment. */ + fragment = &skb_shinfo(skb)->frags[frag_index++]; + len = skb_frag_size(fragment); + dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, + DMA_TO_DEVICE); + dma_flags = 0; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + } while (1); +} + +unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) +{ + /* Header and payload descriptor for each output segment, plus + * one for every input fragment boundary within a segment + */ + unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; + + /* Possibly one more per segment for option descriptors */ + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) + max_descs += EFX_TSO_MAX_SEGS; + + /* Possibly more for PCIe page boundaries within input fragments */ + if (PAGE_SIZE > EFX_PAGE_SIZE) + max_descs += max_t(unsigned int, MAX_SKB_FRAGS, + DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); + + return max_descs; +} + +/* + * Fallback to software TSO. + * + * This is used if we are unable to send a GSO packet through hardware TSO. + * This should only ever happen due to per-queue restrictions - unsupported + * packets should first be filtered by the feature flags. + * + * Returns 0 on success, error code otherwise. + */ +int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, struct sk_buff *skb) +{ + struct sk_buff *segments, *next; + + segments = skb_gso_segment(skb, 0); + if (IS_ERR(segments)) + return PTR_ERR(segments); + + dev_consume_skb_any(skb); + + skb_list_walk_safe(segments, skb, next) { + skb_mark_not_on_list(skb); + efx_enqueue_skb(tx_queue, skb); + } + + return 0; +} diff --git a/drivers/net/ethernet/sfc/siena/tx_common.h b/drivers/net/ethernet/sfc/siena/tx_common.h new file mode 100644 index 000000000000..bbab7f248250 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/tx_common.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TX_COMMON_H +#define EFX_TX_COMMON_H + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); +void efx_init_tx_queue(struct efx_tx_queue *tx_queue); +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue); +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); + +void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, + unsigned int *pkts_compl, + unsigned int *bytes_compl); + +static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer) +{ + return buffer->len || (buffer->flags & EFX_TX_BUF_OPTION); +} + +void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue); +void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); + +void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count); + +struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, size_t len); +int efx_tx_tso_header_length(struct sk_buff *skb); +int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + unsigned int segment_count); + +unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); +int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, struct sk_buff *skb); + +extern bool efx_separate_tx_channels; +#endif diff --git a/drivers/net/ethernet/sfc/siena/vfdi.h b/drivers/net/ethernet/sfc/siena/vfdi.h new file mode 100644 index 000000000000..480b872eb4d1 --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/vfdi.h @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2010-2012 Solarflare Communications Inc. + */ +#ifndef _VFDI_H +#define _VFDI_H + +/** + * DOC: Virtual Function Driver Interface + * + * This file contains software structures used to form a two way + * communication channel between the VF driver and the PF driver, + * named Virtual Function Driver Interface (VFDI). + * + * For the purposes of VFDI, a page is a memory region with size and + * alignment of 4K. All addresses are DMA addresses to be used within + * the domain of the relevant VF. + * + * The only hardware-defined channels for a VF driver to communicate + * with the PF driver are the event mailboxes (%FR_CZ_USR_EV + * registers). Writing to these registers generates an event with + * EV_CODE = EV_CODE_USR_EV, USER_QID set to the index of the mailbox + * and USER_EV_REG_VALUE set to the value written. The PF driver may + * direct or disable delivery of these events by setting + * %FR_CZ_USR_EV_CFG. + * + * The PF driver can send arbitrary events to arbitrary event queues. + * However, for consistency, VFDI events from the PF are defined to + * follow the same form and be sent to the first event queue assigned + * to the VF while that queue is enabled by the VF driver. + * + * The general form of the variable bits of VFDI events is: + * + * 0 16 24 31 + * | DATA | TYPE | SEQ | + * + * SEQ is a sequence number which should be incremented by 1 (modulo + * 256) for each event. The sequence numbers used in each direction + * are independent. + * + * The VF submits requests of type &struct vfdi_req by sending the + * address of the request (ADDR) in a series of 4 events: + * + * 0 16 24 31 + * | ADDR[0:15] | VFDI_EV_TYPE_REQ_WORD0 | SEQ | + * | ADDR[16:31] | VFDI_EV_TYPE_REQ_WORD1 | SEQ+1 | + * | ADDR[32:47] | VFDI_EV_TYPE_REQ_WORD2 | SEQ+2 | + * | ADDR[48:63] | VFDI_EV_TYPE_REQ_WORD3 | SEQ+3 | + * + * The address must be page-aligned. After receiving such a valid + * series of events, the PF driver will attempt to read the request + * and write a response to the same address. In case of an invalid + * sequence of events or a DMA error, there will be no response. + * + * The VF driver may request that the PF driver writes status + * information into its domain asynchronously. After writing the + * status, the PF driver will send an event of the form: + * + * 0 16 24 31 + * | reserved | VFDI_EV_TYPE_STATUS | SEQ | + * + * In case the VF must be reset for any reason, the PF driver will + * send an event of the form: + * + * 0 16 24 31 + * | reserved | VFDI_EV_TYPE_RESET | SEQ | + * + * It is then the responsibility of the VF driver to request + * reinitialisation of its queues. + */ +#define VFDI_EV_SEQ_LBN 24 +#define VFDI_EV_SEQ_WIDTH 8 +#define VFDI_EV_TYPE_LBN 16 +#define VFDI_EV_TYPE_WIDTH 8 +#define VFDI_EV_TYPE_REQ_WORD0 0 +#define VFDI_EV_TYPE_REQ_WORD1 1 +#define VFDI_EV_TYPE_REQ_WORD2 2 +#define VFDI_EV_TYPE_REQ_WORD3 3 +#define VFDI_EV_TYPE_STATUS 4 +#define VFDI_EV_TYPE_RESET 5 +#define VFDI_EV_DATA_LBN 0 +#define VFDI_EV_DATA_WIDTH 16 + +struct vfdi_endpoint { + u8 mac_addr[ETH_ALEN]; + __be16 tci; +}; + +/** + * enum vfdi_op - VFDI operation enumeration + * @VFDI_OP_RESPONSE: Indicates a response to the request. + * @VFDI_OP_INIT_EVQ: Initialize SRAM entries and initialize an EVQ. + * @VFDI_OP_INIT_RXQ: Initialize SRAM entries and initialize an RXQ. + * @VFDI_OP_INIT_TXQ: Initialize SRAM entries and initialize a TXQ. + * @VFDI_OP_FINI_ALL_QUEUES: Flush all queues, finalize all queues, then + * finalize the SRAM entries. + * @VFDI_OP_INSERT_FILTER: Insert a MAC filter targeting the given RXQ. + * @VFDI_OP_REMOVE_ALL_FILTERS: Remove all filters. + * @VFDI_OP_SET_STATUS_PAGE: Set the DMA page(s) used for status updates + * from PF and write the initial status. + * @VFDI_OP_CLEAR_STATUS_PAGE: Clear the DMA page(s) used for status + * updates from PF. + */ +enum vfdi_op { + VFDI_OP_RESPONSE = 0, + VFDI_OP_INIT_EVQ = 1, + VFDI_OP_INIT_RXQ = 2, + VFDI_OP_INIT_TXQ = 3, + VFDI_OP_FINI_ALL_QUEUES = 4, + VFDI_OP_INSERT_FILTER = 5, + VFDI_OP_REMOVE_ALL_FILTERS = 6, + VFDI_OP_SET_STATUS_PAGE = 7, + VFDI_OP_CLEAR_STATUS_PAGE = 8, + VFDI_OP_LIMIT, +}; + +/* Response codes for VFDI operations. Other values may be used in future. */ +#define VFDI_RC_SUCCESS 0 +#define VFDI_RC_ENOMEM (-12) +#define VFDI_RC_EINVAL (-22) +#define VFDI_RC_EOPNOTSUPP (-95) +#define VFDI_RC_ETIMEDOUT (-110) + +/** + * struct vfdi_req - Request from VF driver to PF driver + * @op: Operation code or response indicator, taken from &enum vfdi_op. + * @rc: Response code. Set to 0 on success or a negative error code on failure. + * @u.init_evq.index: Index of event queue to create. + * @u.init_evq.buf_count: Number of 4k buffers backing event queue. + * @u.init_evq.addr: Array of length %u.init_evq.buf_count containing DMA + * address of each page backing the event queue. + * @u.init_rxq.index: Index of receive queue to create. + * @u.init_rxq.buf_count: Number of 4k buffers backing receive queue. + * @u.init_rxq.evq: Instance of event queue to target receive events at. + * @u.init_rxq.label: Label used in receive events. + * @u.init_rxq.flags: Unused. + * @u.init_rxq.addr: Array of length %u.init_rxq.buf_count containing DMA + * address of each page backing the receive queue. + * @u.init_txq.index: Index of transmit queue to create. + * @u.init_txq.buf_count: Number of 4k buffers backing transmit queue. + * @u.init_txq.evq: Instance of event queue to target transmit completion + * events at. + * @u.init_txq.label: Label used in transmit completion events. + * @u.init_txq.flags: Checksum offload flags. + * @u.init_txq.addr: Array of length %u.init_txq.buf_count containing DMA + * address of each page backing the transmit queue. + * @u.mac_filter.rxq: Insert MAC filter at VF local address/VLAN targeting + * all traffic at this receive queue. + * @u.mac_filter.flags: MAC filter flags. + * @u.set_status_page.dma_addr: Base address for the &struct vfdi_status. + * This address must be page-aligned and the PF may write up to a + * whole page (allowing for extension of the structure). + * @u.set_status_page.peer_page_count: Number of additional pages the VF + * has provided into which peer addresses may be DMAd. + * @u.set_status_page.peer_page_addr: Array of DMA addresses of pages. + * If the number of peers exceeds 256, then the VF must provide + * additional pages in this array. The PF will then DMA up to + * 512 vfdi_endpoint structures into each page. These addresses + * must be page-aligned. + */ +struct vfdi_req { + u32 op; + u32 reserved1; + s32 rc; + u32 reserved2; + union { + struct { + u32 index; + u32 buf_count; + u64 addr[]; + } init_evq; + struct { + u32 index; + u32 buf_count; + u32 evq; + u32 label; + u32 flags; +#define VFDI_RXQ_FLAG_SCATTER_EN 1 + u32 reserved; + u64 addr[]; + } init_rxq; + struct { + u32 index; + u32 buf_count; + u32 evq; + u32 label; + u32 flags; +#define VFDI_TXQ_FLAG_IP_CSUM_DIS 1 +#define VFDI_TXQ_FLAG_TCPUDP_CSUM_DIS 2 + u32 reserved; + u64 addr[]; + } init_txq; + struct { + u32 rxq; + u32 flags; +#define VFDI_MAC_FILTER_FLAG_RSS 1 +#define VFDI_MAC_FILTER_FLAG_SCATTER 2 + } mac_filter; + struct { + u64 dma_addr; + u64 peer_page_count; + u64 peer_page_addr[]; + } set_status_page; + } u; +}; + +/** + * struct vfdi_status - Status provided by PF driver to VF driver + * @generation_start: A generation count DMA'd to VF *before* the + * rest of the structure. + * @generation_end: A generation count DMA'd to VF *after* the + * rest of the structure. + * @version: Version of this structure; currently set to 1. Later + * versions must either be layout-compatible or only be sent to VFs + * that specifically request them. + * @length: Total length of this structure including embedded tables + * @vi_scale: log2 the number of VIs available on this VF. This quantity + * is used by the hardware for register decoding. + * @max_tx_channels: The maximum number of transmit queues the VF can use. + * @rss_rxq_count: The number of receive queues present in the shared RSS + * indirection table. + * @peer_count: Total number of peers in the complete peer list. If larger + * than ARRAY_SIZE(%peers), then the VF must provide sufficient + * additional pages each of which is filled with vfdi_endpoint structures. + * @local: The MAC address and outer VLAN tag of *this* VF + * @peers: Table of peer addresses. The @tci fields in these structures + * are currently unused and must be ignored. Additional peers are + * written into any additional pages provided by the VF. + * @timer_quantum_ns: Timer quantum (nominal period between timer ticks) + * for interrupt moderation timers, in nanoseconds. This member is only + * present if @length is sufficiently large. + */ +struct vfdi_status { + u32 generation_start; + u32 generation_end; + u32 version; + u32 length; + u8 vi_scale; + u8 max_tx_channels; + u8 rss_rxq_count; + u8 reserved1; + u16 peer_count; + u16 reserved2; + struct vfdi_endpoint local; + struct vfdi_endpoint peers[256]; + + /* Members below here extend version 1 of this structure */ + u32 timer_quantum_ns; +}; + +#endif diff --git a/drivers/net/ethernet/sfc/siena/workarounds.h b/drivers/net/ethernet/sfc/siena/workarounds.h new file mode 100644 index 000000000000..815be2d20c4b --- /dev/null +++ b/drivers/net/ethernet/sfc/siena/workarounds.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2006-2013 Solarflare Communications Inc. + */ + +#ifndef EFX_WORKAROUNDS_H +#define EFX_WORKAROUNDS_H + +/* + * Hardware workarounds. + * Bug numbers are from Solarflare's Bugzilla. + */ + +#define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0) +#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) +#define EFX_WORKAROUND_10G(efx) 1 + +/* Bit-bashed I2C reads cause performance drop */ +#define EFX_WORKAROUND_7884 EFX_WORKAROUND_10G +/* Legacy interrupt storm when interrupt fifo fills */ +#define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA + +/* Lockup when writing event block registers at gen2/gen3 */ +#define EFX_EF10_WORKAROUND_35388(efx) \ + (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_35388) +#define EFX_WORKAROUND_35388(efx) \ + (efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx)) + +/* Moderation timer access must go through MCDI */ +#define EFX_EF10_WORKAROUND_61265(efx) \ + (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_61265) + +#endif /* EFX_WORKAROUNDS_H */ |