summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
authorJack Wang <jinpu.wang@cloud.ionos.com>2020-10-23 09:43:44 +0200
committerJason Gunthorpe <jgg@nvidia.com>2020-10-28 13:17:39 -0300
commitfcf2959da6a74e71a85ab666e732fa1ed4da2c9a (patch)
treed5cab1815b66691f84d91c54931c4ed6d9a933d8 /drivers/infiniband/ulp
parent73385fdbc43df2e9ba07d4a459d6e0e2110ad2d8 (diff)
RDMA/rtrs-clt: Avoid run destroy_con_cq_qp/create_con_cq_qp in parallel
It could happen two kworkers race with each other: CPU0 CPU1 addr_resolver kworker reconnect kworker rtrs_clt_rdma_cm_handler rtrs_rdma_addr_resolved create_con_cq_qp: s.dev_ref++ "s.dev_ref is 1" wait in create_cm fails with TIMEOUT destroy_con_cq_qp: --s.dev_ref "s.dev_ref is 0" destroy_con_cq_qp: sess->s.dev = NULL rtrs_cq_qp_create -> create_qp(con, sess->dev->ib_pd...) sess->dev is NULL, panic. To fix the problem using mutex to serialize create_con_cq_qp and destroy_con_cq_qp. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20201023074353.21946-4-jinpu.wang@cloud.ionos.com Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com> Reviewed-by: Gioh Kim <gi-oh.kim@cloud.ionos.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c15
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h1
2 files changed, 14 insertions, 2 deletions
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index fb840b152b37..4677e8ed29ae 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -1499,6 +1499,7 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
con->c.cid = cid;
con->c.sess = &sess->s;
atomic_set(&con->io_cnt, 0);
+ mutex_init(&con->con_mutex);
sess->s.con[cid] = &con->c;
@@ -1510,6 +1511,7 @@ static void destroy_con(struct rtrs_clt_con *con)
struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
sess->s.con[con->c.cid] = NULL;
+ mutex_destroy(&con->con_mutex);
kfree(con);
}
@@ -1520,6 +1522,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
int err, cq_vector;
struct rtrs_msg_rkey_rsp *rsp;
+ lockdep_assert_held(&con->con_mutex);
if (con->c.cid == 0) {
/*
* One completion for each receive and two for each send
@@ -1593,7 +1596,7 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con)
* Be careful here: destroy_con_cq_qp() can be called even
* create_con_cq_qp() failed, see comments there.
*/
-
+ lockdep_assert_held(&con->con_mutex);
rtrs_cq_qp_destroy(&con->c);
if (con->rsp_ius) {
rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE,
@@ -1625,7 +1628,9 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
struct rtrs_sess *s = con->c.sess;
int err;
+ mutex_lock(&con->con_mutex);
err = create_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
if (err) {
rtrs_err(s, "create_con_cq_qp(), err: %d\n", err);
return err;
@@ -1938,8 +1943,9 @@ static int create_cm(struct rtrs_clt_con *con)
errr:
stop_cm(con);
- /* Is safe to call destroy if cq_qp is not inited */
+ mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
destroy_cm:
destroy_cm(con);
@@ -2046,7 +2052,9 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
if (!sess->s.con[cid])
break;
con = to_clt_con(sess->s.con[cid]);
+ mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
destroy_cm(con);
destroy_con(con);
}
@@ -2213,7 +2221,10 @@ destroy:
struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]);
stop_cm(con);
+
+ mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
destroy_cm(con);
destroy_con(con);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index 167acd3c90fc..b8dbd701b3cb 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -72,6 +72,7 @@ struct rtrs_clt_con {
struct rtrs_iu *rsp_ius;
u32 queue_size;
unsigned int cpu;
+ struct mutex con_mutex;
atomic_t io_cnt;
int cm_err;
};