summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-03-22 15:29:05 -0400
committerDavid S. Miller <davem@davemloft.net>2018-03-22 15:29:05 -0400
commit6686c459e1449a3ee5f3fd313b0a559ace7a700e (patch)
treed8e84a2ba3f2621b5d67a1d96d6c3a2637ca3d44
parent1a2e10a24039147047cc21759ec57314c333c1ac (diff)
parent2bfbd35d8ecd97a4a7f1db1754908b54542fa7aa (diff)
Merge branch 'hns3-VF-reset'
Salil Mehta says: ==================== Add support of VF Reset to HNS3 VF driver This patch-set adds the support of VF reset to the existing VF driver. VF Reset can be triggered due to TX watchdog firing as a result of TX data-path not working. VF reset could also be a result of some internal configuration changes if that requires reset, or as a result of the PF/Core/Global/IMP(Integrated Management Processor) reset happened in the PF. Summary of Patches: * Watchdog timer trigger chnages are present in Patch 1. * Reset Service Task and related Event handling is present in Patches {2,3} * Changes to send reset request to PF, reset stack and re-initialization of the hclge device is present in Patches {4,5,6} * Changes related to ARQ (Asynchronous Receive Queue) and its event handling are present in Patches {7,8} * Changes required in PF to handle the VF Reset request and actually perform hardware VF reset is there in Patch 9. NOTE: This patch depends upon "[PATCH net-next 00/11] fix some bugs for HNS3 driver" Link: https://lkml.org/lkml/2018/3/21/72 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h16
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c30
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c38
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c42
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c336
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h31
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c95
11 files changed, 534 insertions, 71 deletions
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index e6e1d221b5c8..519e2bd6aa60 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -11,6 +11,7 @@
enum HCLGE_MBX_OPCODE {
HCLGE_MBX_RESET = 0x01, /* (VF -> PF) assert reset */
+ HCLGE_MBX_ASSERTING_RESET, /* (PF -> VF) PF is asserting reset*/
HCLGE_MBX_SET_UNICAST, /* (VF -> PF) set UC addr */
HCLGE_MBX_SET_MULTICAST, /* (VF -> PF) set MC addr */
HCLGE_MBX_SET_VLAN, /* (VF -> PF) set VLAN */
@@ -85,6 +86,21 @@ struct hclge_mbx_pf_to_vf_cmd {
u16 msg[8];
};
+/* used by VF to store the received Async responses from PF */
+struct hclgevf_mbx_arq_ring {
+#define HCLGE_MBX_MAX_ARQ_MSG_SIZE 8
+#define HCLGE_MBX_MAX_ARQ_MSG_NUM 1024
+ struct hclgevf_dev *hdev;
+ u32 head;
+ u32 tail;
+ u32 count;
+ u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
+};
+
#define hclge_mbx_ring_ptr_move_crq(crq) \
(crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
+#define hclge_mbx_tail_ptr_move_arq(arq) \
+ (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
+#define hclge_mbx_head_ptr_move_arq(arq) \
+ (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 9daa88d9041e..37ec1b3286c6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -118,6 +118,8 @@ enum hnae3_reset_notify_type {
};
enum hnae3_reset_type {
+ HNAE3_VF_RESET,
+ HNAE3_VF_FULL_RESET,
HNAE3_FUNC_RESET,
HNAE3_CORE_RESET,
HNAE3_GLOBAL_RESET,
@@ -400,8 +402,7 @@ struct hnae3_ae_ops {
int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
u16 vlan, u8 qos, __be16 proto);
int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
- void (*reset_event)(struct hnae3_handle *handle,
- enum hnae3_reset_type reset);
+ void (*reset_event)(struct hnae3_handle *handle);
void (*get_channels)(struct hnae3_handle *handle,
struct ethtool_channels *ch);
void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
@@ -495,6 +496,9 @@ struct hnae3_handle {
struct hnae3_ae_algo *ae_algo; /* the class who provides this handle */
u64 flags; /* Indicate the capabilities for this handle*/
+ unsigned long last_reset_time;
+ enum hnae3_reset_type reset_level;
+
union {
struct net_device *netdev; /* first member */
struct hnae3_knic_private_info kinfo;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 0b4a676999ca..40a3eb70629e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -320,7 +320,7 @@ static int hns3_nic_net_open(struct net_device *netdev)
return ret;
}
- priv->last_reset_time = jiffies;
+ priv->ae_handle->last_reset_time = jiffies;
return 0;
}
@@ -1543,7 +1543,6 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
static void hns3_nic_net_timeout(struct net_device *ndev)
{
struct hns3_nic_priv *priv = netdev_priv(ndev);
- unsigned long last_reset_time = priv->last_reset_time;
struct hnae3_handle *h = priv->ae_handle;
if (!hns3_get_tx_timeo_queue_info(ndev))
@@ -1551,24 +1550,12 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
priv->tx_timeout_count++;
- /* This timeout is far away enough from last timeout,
- * if timeout again,set the reset type to PF reset
- */
- if (time_after(jiffies, (last_reset_time + 20 * HZ)))
- priv->reset_level = HNAE3_FUNC_RESET;
-
- /* Don't do any new action before the next timeout */
- else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo)))
+ if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo)))
return;
- priv->last_reset_time = jiffies;
-
+ /* request the reset */
if (h->ae_algo->ops->reset_event)
- h->ae_algo->ops->reset_event(h, priv->reset_level);
-
- priv->reset_level++;
- if (priv->reset_level > HNAE3_GLOBAL_RESET)
- priv->reset_level = HNAE3_GLOBAL_RESET;
+ h->ae_algo->ops->reset_event(h);
}
static const struct net_device_ops hns3_nic_netdev_ops = {
@@ -3122,8 +3109,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
priv->dev = &pdev->dev;
priv->netdev = netdev;
priv->ae_handle = handle;
- priv->last_reset_time = jiffies;
- priv->reset_level = HNAE3_FUNC_RESET;
+ priv->ae_handle->reset_level = HNAE3_NONE_RESET;
+ priv->ae_handle->last_reset_time = jiffies;
priv->tx_timeout_count = 0;
handle->kinfo.netdev = netdev;
@@ -3355,7 +3342,6 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
{
struct hnae3_knic_private_info *kinfo = &handle->kinfo;
- struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
int ret = 0;
if (netif_running(kinfo->netdev)) {
@@ -3365,8 +3351,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
"hns net up fail, ret=%d!\n", ret);
return ret;
}
-
- priv->last_reset_time = jiffies;
+ handle->last_reset_time = jiffies;
}
return ret;
@@ -3378,7 +3363,6 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
struct hns3_nic_priv *priv = netdev_priv(netdev);
int ret;
- priv->reset_level = 1;
hns3_init_mac_addr(netdev);
hns3_nic_set_rx_mode(netdev);
hns3_recover_hw_addr(netdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 39daa01f08d5..9e4cfbbf8dcd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -532,8 +532,6 @@ struct hns3_nic_priv {
/* The most recently read link state */
int link;
u64 tx_timeout_count;
- enum hnae3_reset_type reset_level;
- unsigned long last_reset_time;
unsigned long state;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 31e90b588e92..bede4117bad9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2749,7 +2749,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
return 0;
}
-static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
{
struct hclge_desc desc;
struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data;
@@ -2845,27 +2845,31 @@ static void hclge_reset(struct hclge_dev *hdev)
hclge_notify_client(hdev, HNAE3_UP_CLIENT);
}
-static void hclge_reset_event(struct hnae3_handle *handle,
- enum hnae3_reset_type reset)
+static void hclge_reset_event(struct hnae3_handle *handle)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
- dev_info(&hdev->pdev->dev,
- "Receive reset event , reset_type is %d", reset);
+ /* check if this is a new reset request and we are not here just because
+ * last reset attempt did not succeed and watchdog hit us again. We will
+ * know this if last reset request did not occur very recently (watchdog
+ * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
+ * In case of new request we reset the "reset level" to PF reset.
+ */
+ if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
+ handle->reset_level = HNAE3_FUNC_RESET;
- switch (reset) {
- case HNAE3_FUNC_RESET:
- case HNAE3_CORE_RESET:
- case HNAE3_GLOBAL_RESET:
- /* request reset & schedule reset task */
- set_bit(reset, &hdev->reset_request);
- hclge_reset_task_schedule(hdev);
- break;
- default:
- dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset);
- break;
- }
+ dev_info(&hdev->pdev->dev, "received reset event , reset type is %d",
+ handle->reset_level);
+
+ /* request reset & schedule reset task */
+ set_bit(handle->reset_level, &hdev->reset_request);
+ hclge_reset_task_schedule(hdev);
+
+ if (handle->reset_level < HNAE3_GLOBAL_RESET)
+ handle->reset_level++;
+
+ handle->last_reset_time = jiffies;
}
static void hclge_reset_subtask(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 8c14d10c88cb..0f4157e71282 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -657,4 +657,5 @@ void hclge_mbx_handler(struct hclge_dev *hdev);
void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
int hclge_cfg_flowctrl(struct hclge_dev *hdev);
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 949da0c993cf..39013334a613 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -79,6 +79,18 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
return status;
}
+int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+{
+ u8 msg_data[2];
+ u8 dest_vfid;
+
+ dest_vfid = (u8)vport->vport_id;
+
+ /* send this requested info to VF */
+ return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+ HCLGE_MBX_ASSERTING_RESET, dest_vfid);
+}
+
static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
{
struct hnae3_ring_chain_node *chain_tmp, *chain;
@@ -339,6 +351,33 @@ static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0);
}
+static void hclge_reset_vf(struct hclge_vport *vport,
+ struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
+ mbx_req->mbx_src_vfid);
+
+ /* Acknowledge VF that PF is now about to assert the reset for the VF.
+ * On receiving this message VF will get into pending state and will
+ * start polling for the hardware reset completion status.
+ */
+ ret = hclge_inform_reset_assert_to_vf(vport);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "PF fail(%d) to inform VF(%d)of reset, reset failed!\n",
+ ret, vport->vport_id);
+ return;
+ }
+
+ dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n",
+ mbx_req->mbx_src_vfid);
+ /* reset this virtual function */
+ hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid);
+}
+
void hclge_mbx_handler(struct hclge_dev *hdev)
{
struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq;
@@ -416,6 +455,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
case HCLGE_MBX_QUEUE_RESET:
hclge_mbx_reset_vf_queue(vport, req);
break;
+ case HCLGE_MBX_RESET:
+ hclge_reset_vf(vport, req);
+ break;
default:
dev_err(&hdev->pdev->dev,
"un-supported mailbox message, code = %d\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 85985e731311..1bbfe131b596 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -315,6 +315,12 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
goto err_csq;
}
+ /* initialize the pointers of async rx queue of mailbox */
+ hdev->arq.hdev = hdev;
+ hdev->arq.head = 0;
+ hdev->arq.tail = 0;
+ hdev->arq.count = 0;
+
/* get firmware version */
ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
if (ret) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 906dfa3d0fc6..2b8426412cc9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2,6 +2,7 @@
// Copyright (c) 2016-2017 Hisilicon Limited.
#include <linux/etherdevice.h>
+#include <net/rtnetlink.h>
#include "hclgevf_cmd.h"
#include "hclgevf_main.h"
#include "hclge_mbx.h"
@@ -9,6 +10,8 @@
#define HCLGEVF_NAME "hclgevf"
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev);
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev);
static struct hnae3_ae_algo ae_algovf;
static const struct pci_device_id ae_algovf_pci_tbl[] = {
@@ -208,6 +211,12 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
struct hclgevf_tqp *tqp;
int i;
+ /* if this is on going reset then we need to re-allocate the TPQs
+ * since we cannot assume we would get same number of TPQs back from PF
+ */
+ if (hclgevf_dev_ongoing_reset(hdev))
+ devm_kfree(&hdev->pdev->dev, hdev->htqp);
+
hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
sizeof(struct hclgevf_tqp), GFP_KERNEL);
if (!hdev->htqp)
@@ -251,6 +260,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
new_tqps = kinfo->rss_size * kinfo->num_tc;
kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
+ /* if this is on going reset then we need to re-allocate the hnae queues
+ * as well since number of TPQs from PF might have changed.
+ */
+ if (hclgevf_dev_ongoing_reset(hdev))
+ devm_kfree(&hdev->pdev->dev, kinfo->tqp);
+
kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
sizeof(struct hnae3_queue *), GFP_KERNEL);
if (!kinfo->tqp)
@@ -832,6 +847,138 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
2, true, NULL, 0);
}
+static int hclgevf_notify_client(struct hclgevf_dev *hdev,
+ enum hnae3_reset_notify_type type)
+{
+ struct hnae3_client *client = hdev->nic_client;
+ struct hnae3_handle *handle = &hdev->nic;
+
+ if (!client->ops->reset_notify)
+ return -EOPNOTSUPP;
+
+ return client->ops->reset_notify(handle, type);
+}
+
+static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_RESET_WAIT_MS 500
+#define HCLGEVF_RESET_WAIT_CNT 20
+ u32 val, cnt = 0;
+
+ /* wait to check the hardware reset completion status */
+ val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+ while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
+ (cnt < HCLGEVF_RESET_WAIT_CNT)) {
+ msleep(HCLGEVF_RESET_WAIT_MS);
+ val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+ cnt++;
+ }
+
+ /* hardware completion status should be available by this time */
+ if (cnt >= HCLGEVF_RESET_WAIT_CNT) {
+ dev_warn(&hdev->pdev->dev,
+ "could'nt get reset done status from h/w, timeout!\n");
+ return -EBUSY;
+ }
+
+ /* we will wait a bit more to let reset of the stack to complete. This
+ * might happen in case reset assertion was made by PF. Yes, this also
+ * means we might end up waiting bit more even for VF reset.
+ */
+ msleep(5000);
+
+ return 0;
+}
+
+static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
+{
+ int ret;
+
+ /* uninitialize the nic client */
+ hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+ /* re-initialize the hclge device */
+ ret = hclgevf_init_hdev(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "hclge device re-init failed, VF is disabled!\n");
+ return ret;
+ }
+
+ /* bring up the nic client again */
+ hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
+
+ return 0;
+}
+
+static int hclgevf_reset(struct hclgevf_dev *hdev)
+{
+ int ret;
+
+ rtnl_lock();
+
+ /* bring down the nic to stop any ongoing TX/RX */
+ hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+ /* check if VF could successfully fetch the hardware reset completion
+ * status from the hardware
+ */
+ ret = hclgevf_reset_wait(hdev);
+ if (ret) {
+ /* can't do much in this situation, will disable VF */
+ dev_err(&hdev->pdev->dev,
+ "VF failed(=%d) to fetch H/W reset completion status\n",
+ ret);
+
+ dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+ hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+ rtnl_unlock();
+ return ret;
+ }
+
+ /* now, re-initialize the nic client and ae device*/
+ ret = hclgevf_reset_stack(hdev);
+ if (ret)
+ dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
+
+ /* bring up the nic to enable TX/RX again */
+ hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+
+ rtnl_unlock();
+
+ return ret;
+}
+
+static int hclgevf_do_reset(struct hclgevf_dev *hdev)
+{
+ int status;
+ u8 respmsg;
+
+ status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
+ 0, false, &respmsg, sizeof(u8));
+ if (status)
+ dev_err(&hdev->pdev->dev,
+ "VF reset request to PF failed(=%d)\n", status);
+
+ return status;
+}
+
+static void hclgevf_reset_event(struct hnae3_handle *handle)
+{
+ struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+ dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
+
+ handle->reset_level = HNAE3_VF_RESET;
+
+ /* reset of this VF requested */
+ set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
+ hclgevf_reset_task_schedule(hdev);
+
+ handle->last_reset_time = jiffies;
+}
+
static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -854,10 +1001,22 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
hdev->num_msi_used += 1;
}
-static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
{
- if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+ if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+ !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+ set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+ schedule_work(&hdev->rst_service_task);
+ }
+}
+
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
+{
+ if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) &&
+ !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) {
+ set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
schedule_work(&hdev->mbx_service_task);
+ }
}
static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
@@ -867,6 +1026,16 @@ static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
schedule_work(&hdev->service_task);
}
+static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
+{
+ /* if we have any pending mailbox event then schedule the mbx task */
+ if (hdev->mbx_event_pending)
+ hclgevf_mbx_task_schedule(hdev);
+
+ if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
+ hclgevf_reset_task_schedule(hdev);
+}
+
static void hclgevf_service_timer(struct timer_list *t)
{
struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
@@ -876,6 +1045,75 @@ static void hclgevf_service_timer(struct timer_list *t)
hclgevf_task_schedule(hdev);
}
+static void hclgevf_reset_service_task(struct work_struct *work)
+{
+ struct hclgevf_dev *hdev =
+ container_of(work, struct hclgevf_dev, rst_service_task);
+ int ret;
+
+ if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+ return;
+
+ clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+
+ if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
+ &hdev->reset_state)) {
+ /* PF has initmated that it is about to reset the hardware.
+ * We now have to poll & check if harware has actually completed
+ * the reset sequence. On hardware reset completion, VF needs to
+ * reset the client and ae device.
+ */
+ hdev->reset_attempts = 0;
+
+ ret = hclgevf_reset(hdev);
+ if (ret)
+ dev_err(&hdev->pdev->dev, "VF stack reset failed.\n");
+ } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
+ &hdev->reset_state)) {
+ /* we could be here when either of below happens:
+ * 1. reset was initiated due to watchdog timeout due to
+ * a. IMP was earlier reset and our TX got choked down and
+ * which resulted in watchdog reacting and inducing VF
+ * reset. This also means our cmdq would be unreliable.
+ * b. problem in TX due to other lower layer(example link
+ * layer not functioning properly etc.)
+ * 2. VF reset might have been initiated due to some config
+ * change.
+ *
+ * NOTE: Theres no clear way to detect above cases than to react
+ * to the response of PF for this reset request. PF will ack the
+ * 1b and 2. cases but we will not get any intimation about 1a
+ * from PF as cmdq would be in unreliable state i.e. mailbox
+ * communication between PF and VF would be broken.
+ */
+
+ /* if we are never geting into pending state it means either:
+ * 1. PF is not receiving our request which could be due to IMP
+ * reset
+ * 2. PF is screwed
+ * We cannot do much for 2. but to check first we can try reset
+ * our PCIe + stack and see if it alleviates the problem.
+ */
+ if (hdev->reset_attempts > 3) {
+ /* prepare for full reset of stack + pcie interface */
+ hdev->nic.reset_level = HNAE3_VF_FULL_RESET;
+
+ /* "defer" schedule the reset task again */
+ set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+ } else {
+ hdev->reset_attempts++;
+
+ /* request PF for resetting this VF via mailbox */
+ ret = hclgevf_do_reset(hdev);
+ if (ret)
+ dev_warn(&hdev->pdev->dev,
+ "VF rst fail, stack will call\n");
+ }
+ }
+
+ clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+}
+
static void hclgevf_mailbox_service_task(struct work_struct *work)
{
struct hclgevf_dev *hdev;
@@ -887,7 +1125,7 @@ static void hclgevf_mailbox_service_task(struct work_struct *work)
clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
- hclgevf_mbx_handler(hdev);
+ hclgevf_mbx_async_handler(hdev);
clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
}
@@ -903,6 +1141,8 @@ static void hclgevf_service_task(struct work_struct *work)
*/
hclgevf_request_link_info(hdev);
+ hclgevf_deferred_task_schedule(hdev);
+
clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
}
@@ -945,8 +1185,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
if (!hclgevf_check_event_cause(hdev, &clearval))
goto skip_sched;
- /* schedule the VF mailbox service task, if not already scheduled */
- hclgevf_mbx_task_schedule(hdev);
+ hclgevf_mbx_handler(hdev);
hclgevf_clear_event_cause(hdev, clearval);
@@ -968,6 +1207,22 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
return hclgevf_get_tc_info(hdev);
}
+static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev)
+{
+ struct pci_dev *pdev = ae_dev->pdev;
+ struct hclgevf_dev *hdev = ae_dev->priv;
+
+ hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+ if (!hdev)
+ return -ENOMEM;
+
+ hdev->pdev = pdev;
+ hdev->ae_dev = ae_dev;
+ ae_dev->priv = hdev;
+
+ return 0;
+}
+
static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
{
struct hnae3_handle *roce = &hdev->roce;
@@ -1073,6 +1328,10 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle)
static void hclgevf_state_init(struct hclgevf_dev *hdev)
{
+ /* if this is on going reset then skip this initialization */
+ if (hclgevf_dev_ongoing_reset(hdev))
+ return;
+
/* setup tasks for the MBX */
INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
@@ -1084,6 +1343,8 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
INIT_WORK(&hdev->service_task, hclgevf_service_task);
clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+ INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task);
+
mutex_init(&hdev->mbx_resp.mbx_mutex);
/* bring the device down */
@@ -1100,6 +1361,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
cancel_work_sync(&hdev->service_task);
if (hdev->mbx_service_task.func)
cancel_work_sync(&hdev->mbx_service_task);
+ if (hdev->rst_service_task.func)
+ cancel_work_sync(&hdev->rst_service_task);
mutex_destroy(&hdev->mbx_resp.mbx_mutex);
}
@@ -1110,6 +1373,10 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
int vectors;
int i;
+ /* if this is on going reset then skip this initialization */
+ if (hclgevf_dev_ongoing_reset(hdev))
+ return 0;
+
hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM;
vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
@@ -1160,6 +1427,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
{
int ret = 0;
+ /* if this is on going reset then skip this initialization */
+ if (hclgevf_dev_ongoing_reset(hdev))
+ return 0;
+
hclgevf_get_misc_vector(hdev);
ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
@@ -1270,6 +1541,14 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev)
struct hclgevf_hw *hw;
int ret;
+ /* check if we need to skip initialization of pci. This will happen if
+ * device is undergoing VF reset. Otherwise, we would need to
+ * re-initialize pci interface again i.e. when device is not going
+ * through *any* reset or actually undergoing full reset.
+ */
+ if (hclgevf_dev_ongoing_reset(hdev))
+ return 0;
+
ret = pci_enable_device(pdev);
if (ret) {
dev_err(&pdev->dev, "failed to enable PCI device\n");
@@ -1321,19 +1600,16 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
pci_set_drvdata(pdev, NULL);
}
-static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
{
- struct pci_dev *pdev = ae_dev->pdev;
- struct hclgevf_dev *hdev;
+ struct pci_dev *pdev = hdev->pdev;
int ret;
- hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
- if (!hdev)
- return -ENOMEM;
-
- hdev->pdev = pdev;
- hdev->ae_dev = ae_dev;
- ae_dev->priv = hdev;
+ /* check if device is on-going full reset(i.e. pcie as well) */
+ if (hclgevf_dev_ongoing_full_reset(hdev)) {
+ dev_warn(&pdev->dev, "device is going full reset\n");
+ hclgevf_uninit_hdev(hdev);
+ }
ret = hclgevf_pci_init(hdev);
if (ret) {
@@ -1418,15 +1694,38 @@ err_irq_init:
return ret;
}
-static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
{
- struct hclgevf_dev *hdev = ae_dev->priv;
-
hclgevf_cmd_uninit(hdev);
hclgevf_misc_irq_uninit(hdev);
hclgevf_state_uninit(hdev);
hclgevf_uninit_msi(hdev);
hclgevf_pci_uninit(hdev);
+}
+
+static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+ struct pci_dev *pdev = ae_dev->pdev;
+ int ret;
+
+ ret = hclgevf_alloc_hdev(ae_dev);
+ if (ret) {
+ dev_err(&pdev->dev, "hclge device allocation failed\n");
+ return ret;
+ }
+
+ ret = hclgevf_init_hdev(ae_dev->priv);
+ if (ret)
+ dev_err(&pdev->dev, "hclge device initialization failed\n");
+
+ return ret;
+}
+
+static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+ struct hclgevf_dev *hdev = ae_dev->priv;
+
+ hclgevf_uninit_hdev(hdev);
ae_dev->priv = NULL;
}
@@ -1526,6 +1825,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
.get_tc_size = hclgevf_get_tc_size,
.get_fw_version = hclgevf_get_fw_version,
.set_vlan_filter = hclgevf_set_vlan_filter,
+ .reset_event = hclgevf_reset_event,
.get_channels = hclgevf_get_channels,
.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
.get_status = hclgevf_get_status,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 0eaea063e7c5..a477a7c36bbd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -34,6 +34,9 @@
#define HCLGEVF_VECTOR0_RX_CMDQ_INT_B 1
#define HCLGEVF_TQP_RESET_TRY_TIMES 10
+/* Reset related Registers */
+#define HCLGEVF_FUN_RST_ING 0x20C00
+#define HCLGEVF_FUN_RST_ING_B 0
#define HCLGEVF_RSS_IND_TBL_SIZE 512
#define HCLGEVF_RSS_SET_BITMAP_MSK 0xffff
@@ -52,6 +55,8 @@ enum hclgevf_states {
HCLGEVF_STATE_DISABLED,
/* task states */
HCLGEVF_STATE_SERVICE_SCHED,
+ HCLGEVF_STATE_RST_SERVICE_SCHED,
+ HCLGEVF_STATE_RST_HANDLING,
HCLGEVF_STATE_MBX_SERVICE_SCHED,
HCLGEVF_STATE_MBX_HANDLING,
};
@@ -122,6 +127,11 @@ struct hclgevf_dev {
struct hclgevf_rss_cfg rss_cfg;
unsigned long state;
+#define HCLGEVF_RESET_REQUESTED 0
+#define HCLGEVF_RESET_PENDING 1
+ unsigned long reset_state; /* requested, pending */
+ u32 reset_attempts;
+
u32 fw_version;
u16 num_tqps; /* num task queue pairs of this PF */
@@ -142,10 +152,13 @@ struct hclgevf_dev {
int *vector_irq;
bool accept_mta_mc; /* whether to accept mta filter multicast */
+ bool mbx_event_pending;
struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
+ struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
struct timer_list service_timer;
struct work_struct service_task;
+ struct work_struct rst_service_task;
struct work_struct mbx_service_task;
struct hclgevf_tqp *htqp;
@@ -158,11 +171,29 @@ struct hclgevf_dev {
u32 flag;
};
+static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev)
+{
+ return (hdev &&
+ (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+ (hdev->nic.reset_level == HNAE3_VF_RESET));
+}
+
+static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev)
+{
+ return (hdev &&
+ (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+ (hdev->nic.reset_level == HNAE3_VF_FULL_RESET));
+}
+
int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
const u8 *msg_data, u8 msg_len, bool need_resp,
u8 *resp_data, u16 resp_len);
void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev);
+
void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
u8 duplex);
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index a63ed3aa2c00..a28618428338 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -132,9 +132,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
struct hclge_mbx_pf_to_vf_cmd *req;
struct hclgevf_cmq_ring *crq;
struct hclgevf_desc *desc;
- u16 link_status, flag;
- u32 speed;
- u8 duplex;
+ u16 *msg_q;
+ u16 flag;
u8 *temp;
int i;
@@ -146,6 +145,12 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
desc = &crq->desc[crq->next_to_use];
req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
+ /* synchronous messages are time critical and need preferential
+ * treatment. Therefore, we need to acknowledge all the sync
+ * responses as quickly as possible so that waiting tasks do not
+ * timeout and simultaneously queue the async messages for later
+ * prcessing in context of mailbox task i.e. the slow path.
+ */
switch (req->msg[0]) {
case HCLGE_MBX_PF_VF_RESP:
if (resp->received_resp)
@@ -165,13 +170,31 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
}
break;
case HCLGE_MBX_LINK_STAT_CHANGE:
- link_status = le16_to_cpu(req->msg[1]);
- memcpy(&speed, &req->msg[2], sizeof(speed));
- duplex = (u8)le16_to_cpu(req->msg[4]);
+ case HCLGE_MBX_ASSERTING_RESET:
+ /* set this mbx event as pending. This is required as we
+ * might loose interrupt event when mbx task is busy
+ * handling. This shall be cleared when mbx task just
+ * enters handling state.
+ */
+ hdev->mbx_event_pending = true;
- /* update upper layer with new link link status */
- hclgevf_update_link_status(hdev, link_status);
- hclgevf_update_speed_duplex(hdev, speed, duplex);
+ /* we will drop the async msg if we find ARQ as full
+ * and continue with next message
+ */
+ if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+ dev_warn(&hdev->pdev->dev,
+ "Async Q full, dropping msg(%d)\n",
+ req->msg[1]);
+ break;
+ }
+
+ /* tail the async message in arq */
+ msg_q = hdev->arq.msg_q[hdev->arq.tail];
+ memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE);
+ hclge_mbx_tail_ptr_move_arq(hdev->arq);
+ hdev->arq.count++;
+
+ hclgevf_mbx_task_schedule(hdev);
break;
default:
@@ -189,3 +212,57 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG,
crq->next_to_use);
}
+
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
+{
+ u16 link_status;
+ u16 *msg_q;
+ u8 duplex;
+ u32 speed;
+ u32 tail;
+
+ /* we can safely clear it now as we are at start of the async message
+ * processing
+ */
+ hdev->mbx_event_pending = false;
+
+ tail = hdev->arq.tail;
+
+ /* process all the async queue messages */
+ while (tail != hdev->arq.head) {
+ msg_q = hdev->arq.msg_q[hdev->arq.head];
+
+ switch (msg_q[0]) {
+ case HCLGE_MBX_LINK_STAT_CHANGE:
+ link_status = le16_to_cpu(msg_q[1]);
+ memcpy(&speed, &msg_q[2], sizeof(speed));
+ duplex = (u8)le16_to_cpu(msg_q[4]);
+
+ /* update upper layer with new link link status */
+ hclgevf_update_link_status(hdev, link_status);
+ hclgevf_update_speed_duplex(hdev, speed, duplex);
+
+ break;
+ case HCLGE_MBX_ASSERTING_RESET:
+ /* PF has asserted reset hence VF should go in pending
+ * state and poll for the hardware reset status till it
+ * has been completely reset. After this stack should
+ * eventually be re-initialized.
+ */
+ hdev->nic.reset_level = HNAE3_VF_RESET;
+ set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+ hclgevf_reset_task_schedule(hdev);
+
+ break;
+ default:
+ dev_err(&hdev->pdev->dev,
+ "fetched unsupported(%d) message from arq\n",
+ msg_q[0]);
+ break;
+ }
+
+ hclge_mbx_head_ptr_move_arq(hdev->arq);
+ hdev->arq.count--;
+ msg_q = hdev->arq.msg_q[hdev->arq.head];
+ }
+}