summaryrefslogtreecommitdiff
path: root/net/smc
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/af_smc.c231
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_cdc.c52
-rw-r--r--net/smc/smc_cdc.h1
-rw-r--r--net/smc/smc_clc.c84
-rw-r--r--net/smc/smc_clc.h34
-rw-r--r--net/smc/smc_close.c208
-rw-r--r--net/smc/smc_close.h2
-rw-r--r--net/smc/smc_core.c17
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_ib.c38
-rw-r--r--net/smc/smc_rx.c5
-rw-r--r--net/smc/smc_tx.c32
-rw-r--r--net/smc/smc_wr.c50
-rw-r--r--net/smc/smc_wr.h2
15 files changed, 448 insertions, 319 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 449f62e1e270..3583c8ab1bae 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
- sock_hold(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
- } else {
+ if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *sock)
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
+ if (smc->use_fallback) {
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ }
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
+ if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
out:
return rc;
}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
- INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
@@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
@@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -513,6 +520,8 @@ out_connected:
return rc ? rc : local_contact;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
decline_rdma:
@@ -520,15 +529,19 @@ decline_rdma:
smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
rc = smc_clc_send_decline(smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (rc < 0)
goto out_err;
}
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
out_err:
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
}
@@ -581,40 +594,33 @@ out_err:
static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
{
- struct sock *sk = &lsmc->sk;
- struct socket *new_clcsock;
+ struct socket *new_clcsock = NULL;
+ struct sock *lsk = &lsmc->sk;
struct sock *new_sk;
int rc;
- release_sock(&lsmc->sk);
- new_sk = smc_sock_alloc(sock_net(sk), NULL);
+ release_sock(lsk);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL);
if (!new_sk) {
rc = -ENOMEM;
- lsmc->sk.sk_err = ENOMEM;
+ lsk->sk_err = ENOMEM;
*new_smc = NULL;
- lock_sock(&lsmc->sk);
+ lock_sock(lsk);
goto out;
}
*new_smc = smc_sk(new_sk);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
- lock_sock(&lsmc->sk);
- if (rc < 0) {
- lsmc->sk.sk_err = -rc;
- new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
- *new_smc = NULL;
- goto out;
- }
- if (lsmc->sk.sk_state == SMC_CLOSED) {
+ lock_sock(lsk);
+ if (rc < 0)
+ lsk->sk_err = -rc;
+ if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
}
@@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
{
struct smc_sock *par = smc_sk(parent);
- sock_hold(sk);
+ sock_hold(sk); /* sock_put in smc_accept_unlink () */
spin_lock(&par->accept_q_lock);
list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
spin_unlock(&par->accept_q_lock);
@@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk)
list_del_init(&smc_sk(sk)->accept_q);
spin_unlock(&par->accept_q_lock);
sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
- sock_put(sk);
+ sock_put(sk); /* sock_hold in smc_accept_enqueue */
}
/* remove a sock from the accept queue to bind it to a new socket created
@@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ if (isk->clcsock) {
+ sock_release(isk->clcsock);
+ isk->clcsock = NULL;
+ }
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
continue;
}
if (new_sock)
@@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk);
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- } else {
+ if (!smc->use_fallback) {
smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk)
sock_release(tcp);
}
if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ if (sk->sk_state == SMC_CLOSED)
+ smc_conn_free(&smc->conn);
}
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -751,14 +759,16 @@ static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
struct smc_sock *lsmc = new_smc->listen_smc;
struct smc_clc_msg_accept_confirm cclc;
int local_contact = SMC_REUSE_CONTACT;
struct sock *newsmcsk = &new_smc->sk;
- struct smc_clc_msg_proposal pclc;
+ struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
struct sockaddr_in peeraddr;
+ u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
int rc = 0, len;
@@ -775,7 +785,7 @@ static void smc_listen_work(struct work_struct *work)
/* do inband token exchange -
*wait for and receive SMC Proposal CLC message
*/
- reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
+ reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
SMC_CLC_PROPOSAL);
if (reason_code < 0)
goto out_err;
@@ -804,8 +814,11 @@ static void smc_listen_work(struct work_struct *work)
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
- if ((pclc.outgoing_subnet != subnet) ||
- (pclc.prefix_len != prefix_len)) {
+
+ pclc = (struct smc_clc_msg_proposal *)&buf;
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (pclc_prfx->outgoing_subnet != subnet ||
+ pclc_prfx->prefix_len != prefix_len) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc.lcl, 0);
+ smcibdev, ibport, &pclc->lcl, 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -879,11 +892,9 @@ static void smc_listen_work(struct work_struct *work)
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
- if (reason_code < 0) {
+ if (reason_code < 0)
/* peer is not aware of a problem */
- rc = reason_code;
goto out_err_unlock;
- }
if (reason_code > 0)
goto decline_rdma_unlock;
}
@@ -910,21 +921,26 @@ enqueue:
return;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc_conn_free(&new_smc->conn);
new_smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(new_smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (smc_clc_send_decline(new_smc, reason_code) < 0)
goto out_err;
}
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
out_err:
+ if (newsmcsk->sk_state == SMC_INIT)
+ sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
@@ -934,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
{
struct smc_sock *lsmc = container_of(work, struct smc_sock,
tcp_listen_work);
+ struct sock *lsk = &lsmc->sk;
struct smc_sock *new_smc;
int rc = 0;
- lock_sock(&lsmc->sk);
- while (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock(lsk);
+ while (lsk->sk_state == SMC_LISTEN) {
rc = smc_clcsock_accept(lsmc, &new_smc);
if (rc)
goto out;
@@ -947,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work)
new_smc->listen_smc = lsmc;
new_smc->use_fallback = false; /* assume rdma capability first*/
- sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
+ sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- schedule_work(&new_smc->smc_listen_work);
+ sock_hold(&new_smc->sk); /* sock_put in passive closing */
+ if (!schedule_work(&new_smc->smc_listen_work))
+ sock_put(&new_smc->sk);
}
out:
- release_sock(&lsmc->sk);
- lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
+ if (lsmc->clcsock) {
+ sock_release(lsmc->clcsock);
+ lsmc->clcsock = NULL;
+ }
+ release_sock(lsk);
+ /* no more listening, wake up smc_close_wait_listen_clcsock and
+ * accept
+ */
+ lsk->sk_state_change(lsk);
+ sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
static int smc_listen(struct socket *sock, int backlog)
@@ -989,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
- schedule_work(&smc->tcp_listen_work);
+ sock_hold(sk); /* sock_hold in tcp_listen_worker */
+ if (!schedule_work(&smc->tcp_listen_work))
+ sock_put(sk);
out:
release_sock(sk);
@@ -1006,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
int rc = 0;
lsmc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1040,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
out:
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
return rc;
}
@@ -1109,21 +1140,15 @@ out:
static __poll_t smc_accept_poll(struct sock *parent)
{
- struct smc_sock *isk;
- struct sock *sk;
+ struct smc_sock *isk = smc_sk(parent);
+ int mask = 0;
- lock_sock(parent);
- list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)isk;
+ spin_lock(&isk->accept_q_lock);
+ if (!list_empty(&isk->accept_q))
+ mask = POLLIN | POLLRDNORM;
+ spin_unlock(&isk->accept_q_lock);
- if (sk->sk_state == SMC_ACTIVE) {
- release_sock(parent);
- return POLLIN | POLLRDNORM;
- }
- }
- release_sock(parent);
-
- return 0;
+ return mask;
}
static __poll_t smc_poll(struct file *file, struct socket *sock,
@@ -1134,9 +1159,15 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
struct smc_sock *smc;
int rc;
+ if (!sk)
+ return POLLNVAL;
+
smc = smc_sk(sock->sk);
+ sock_hold(sk);
+ lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
+ release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
@@ -1148,37 +1179,43 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= POLLERR;
- else
- /* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ /* success cases including fallback */
+ mask |= POLLOUT | POLLWRNORM;
}
}
- release_sock(sk);
} else {
- sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == SMC_LISTEN)
- /* woken up by sk_data_ready in smc_listen_work() */
- mask |= smc_accept_poll(sk);
+ if (sk->sk_state != SMC_CLOSED) {
+ release_sock(sk);
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ lock_sock(sk);
+ }
if (sk->sk_err)
mask |= POLLERR;
- if (atomic_read(&smc->conn.sndbuf_space) ||
- (sk->sk_shutdown & SEND_SHUTDOWN)) {
- mask |= POLLOUT | POLLWRNORM;
- } else {
- sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
- if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
mask |= POLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ if (sk->sk_state == SMC_LISTEN) {
+ /* woken up by sk_data_ready in smc_listen_work() */
+ mask = smc_accept_poll(sk);
+ } else {
+ if (atomic_read(&smc->conn.sndbuf_space) ||
+ sk->sk_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ if (atomic_read(&smc->conn.bytes_to_rcv))
+ mask |= POLLIN | POLLRDNORM;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ if (sk->sk_state == SMC_APPCLOSEWAIT1)
+ mask |= POLLIN;
+ }
}
+ release_sock(sk);
+ sock_put(sk);
return mask;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 0bee9d16cf29..9518986c97b1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
- struct delayed_work sock_put_work; /* final socket freeing */
bool use_fallback; /* fallback to tcp */
u8 wait_close_tx_prepared : 1;
/* shutdown wr or close
@@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed)
static inline bool using_ipsec(struct smc_sock *smc)
{
return (smc->clcsock->sk->sk_policy[0] ||
- smc->clcsock->sk->sk_policy[1]) ? 1 : 0;
+ smc->clcsock->sk->sk_policy[1]) ? true : false;
}
#else
static inline bool using_ipsec(struct smc_sock *smc)
{
- return 0;
+ return false;
}
#endif
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 87f7bede6eab..3cd086e5bd28 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
cdcpend->conn);
}
smc_tx_sndbuf_nonfull(smc);
- if (smc->sk.sk_state != SMC_ACTIVE)
- /* wake up smc_close_wait_tx_pends() */
- smc->sk.sk_state_change(&smc->sk);
bh_unlock_sock(&smc->sk);
}
@@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ int rc;
- return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
- (struct smc_wr_tx_pend_priv **)pend);
+ rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+ (struct smc_wr_tx_pend_priv **)pend);
+ if (!conn->alert_token_local)
+ /* abnormal termination */
+ rc = -EPIPE;
+ return rc;
}
static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
@@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
-bool smc_cdc_tx_has_pending(struct smc_connection *conn)
-{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
- return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
- smc_cdc_tx_filter, (unsigned long)conn);
-}
-
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -213,6 +207,17 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
/* guarantee 0 <= bytes_to_rcv <= rmbe_size */
smp_mb__after_atomic();
smc->sk.sk_data_ready(&smc->sk);
+ } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
+ (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) {
+ smc->sk.sk_data_ready(&smc->sk);
+ }
+
+ /* piggy backed tx info */
+ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
+ if (diff_cons && smc_tx_prepared_sends(conn)) {
+ smc_tx_sndbuf_nonempty(conn);
+ /* trigger socket release if connection closed */
+ smc_close_wake_tx_prepared(smc);
}
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
@@ -224,25 +229,10 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
- schedule_work(&conn->close_work);
+ sock_hold(&smc->sk); /* sock_put in close_work */
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
}
-
- /* piggy backed tx info */
- /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn)) {
- smc_tx_sndbuf_nonempty(conn);
- /* trigger socket release if connection closed */
- smc_close_wake_tx_prepared(smc);
- }
-
- /* socket connected but not accepted */
- if (!smc->sk.sk_socket)
- return;
-
- /* data available */
- if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
- (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req))
- smc_tx_consumer_update(conn);
}
/* called under tasklet context */
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 149ceda1b088..ab240b37ad11 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
-bool smc_cdc_tx_has_pending(struct smc_connection *conn);
int smc_cdc_init(void) __init;
#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 511548085d16..8ac51583a063 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -22,6 +22,54 @@
#include "smc_clc.h"
#include "smc_ib.h"
+/* check if received message has a correct header length and contains valid
+ * heading and trailing eyecatchers
+ */
+static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
+{
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
+ struct smc_clc_msg_accept_confirm *clc;
+ struct smc_clc_msg_proposal *pclc;
+ struct smc_clc_msg_decline *dclc;
+ struct smc_clc_msg_trail *trl;
+
+ if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ return false;
+ switch (clcm->type) {
+ case SMC_CLC_PROPOSAL:
+ pclc = (struct smc_clc_msg_proposal *)clcm;
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (ntohs(pclc->hdr.length) !=
+ sizeof(*pclc) + ntohs(pclc->iparea_offset) +
+ sizeof(*pclc_prfx) +
+ pclc_prfx->ipv6_prefixes_cnt *
+ sizeof(struct smc_clc_ipv6_prefix) +
+ sizeof(*trl))
+ return false;
+ trl = (struct smc_clc_msg_trail *)
+ ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
+ break;
+ case SMC_CLC_ACCEPT:
+ case SMC_CLC_CONFIRM:
+ clc = (struct smc_clc_msg_accept_confirm *)clcm;
+ if (ntohs(clc->hdr.length) != sizeof(*clc))
+ return false;
+ trl = &clc->trl;
+ break;
+ case SMC_CLC_DECLINE:
+ dclc = (struct smc_clc_msg_decline *)clcm;
+ if (ntohs(dclc->hdr.length) != sizeof(*dclc))
+ return false;
+ trl = &dclc->trl;
+ break;
+ default:
+ return false;
+ }
+ if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ return false;
+ return true;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
@@ -75,9 +123,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
datlen = ntohs(clcm->length);
if ((len < sizeof(struct smc_clc_msg_hdr)) ||
- (datlen < sizeof(struct smc_clc_msg_decline)) ||
- (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
- memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
+ (datlen > buflen) ||
((clcm->type != SMC_CLC_DECLINE) &&
(clcm->type != expected_type))) {
smc->sk.sk_err = EPROTO;
@@ -91,7 +137,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
krflags = MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
- if (len < datlen) {
+ if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
smc->sk.sk_err = EPROTO;
reason_code = -EPROTO;
goto out;
@@ -135,7 +181,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
smc->sk.sk_err = EPROTO;
if (len < 0)
smc->sk.sk_err = -len;
- return len;
+ return sock_error(&smc->sk);
}
/* send CLC PROPOSAL message across internal TCP socket */
@@ -143,33 +189,43 @@ int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
+ struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_proposal pclc;
+ struct smc_clc_msg_trail trl;
int reason_code = 0;
+ struct kvec vec[3];
struct msghdr msg;
- struct kvec vec;
- int len, rc;
+ int len, plen, rc;
/* send SMC Proposal CLC message */
+ plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
- pclc.hdr.length = htons(sizeof(pclc));
+ pclc.hdr.length = htons(plen);
pclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
+ pclc.iparea_offset = htons(0);
+ memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
- &pclc.prefix_len);
+ rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ &pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
- memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+ pclc_prfx.ipv6_prefixes_cnt = 0;
+ memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
- vec.iov_base = &pclc;
- vec.iov_len = sizeof(pclc);
+ vec[0].iov_base = &pclc;
+ vec[0].iov_len = sizeof(pclc);
+ vec[1].iov_base = &pclc_prfx;
+ vec[1].iov_len = sizeof(pclc_prfx);
+ vec[2].iov_base = &trl;
+ vec[2].iov_len = sizeof(trl);
/* due to the few bytes needed for clc-handshake this cannot block */
- len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
+ len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 12a9af1539a2..c145a0f36a68 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,7 +44,7 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
flag : 1,
- rsvd : 3;
+ rsvd : 3;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 rsvd : 3,
flag : 1,
@@ -62,17 +62,31 @@ struct smc_clc_msg_local { /* header2 of clc messages */
u8 mac[6]; /* mac of ib_device port */
};
-struct smc_clc_msg_proposal { /* clc proposal message */
- struct smc_clc_msg_hdr hdr;
- struct smc_clc_msg_local lcl;
- __be16 iparea_offset; /* offset to IP address information area */
+struct smc_clc_ipv6_prefix {
+ u8 prefix[4];
+ u8 prefix_len;
+} __packed;
+
+struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
u8 prefix_len; /* number of significant bits in mask */
u8 reserved[2];
u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
- struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
+struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
+ struct smc_clc_msg_hdr hdr;
+ struct smc_clc_msg_local lcl;
+ __be16 iparea_offset; /* offset to IP address information area */
+} __aligned(4);
+
+#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
+#define SMC_CLC_PROPOSAL_MAX_PREFIX (8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
+ SMC_CLC_PROPOSAL_MAX_OFFSET + \
+ SMC_CLC_PROPOSAL_MAX_PREFIX + \
+ sizeof(struct smc_clc_msg_trail))
+
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
@@ -102,6 +116,14 @@ struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
+/* determine start of the prefix area within the proposal message */
+static inline struct smc_clc_msg_proposal_prefix *
+smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
+{
+ return (struct smc_clc_msg_proposal_prefix *)
+ ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
+}
+
struct smc_sock;
struct smc_ib_device;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 48615d2ac4aa..e339c0186dcf 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -19,7 +19,7 @@
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
+#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
static void smc_close_cleanup_listen(struct sock *parent)
{
@@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent)
smc_close_non_accepted(sk);
}
-static void smc_close_wait_tx_pends(struct smc_sock *smc)
+static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = &smc->sk;
signed long timeout;
- timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
+ timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
add_wait_queue(sk_sleep(sk), &wait);
- while (!signal_pending(current) && timeout) {
- int rc;
-
- rc = sk_wait_event(sk, &timeout,
- !smc_cdc_tx_has_pending(&smc->conn),
- &wait);
- if (rc)
+ do {
+ release_sock(sk);
+ if (smc->clcsock)
+ timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
+ timeout);
+ sched_annotate_sleep();
+ lock_sock(sk);
+ if (!smc->clcsock)
break;
- }
+ } while (timeout);
remove_wait_queue(sk_sleep(sk), &wait);
}
@@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn)
}
/* terminate smc socket abnormally - active abort
- * RDMA communication no longer possible
+ * link group is terminated, i.e. RDMA communication no longer possible
*/
-void smc_close_active_abort(struct smc_sock *smc)
+static void smc_close_active_abort(struct smc_sock *smc)
{
+ struct sock *sk = &smc->sk;
+
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- smc->sk.sk_err = ECONNABORTED;
+ sk->sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
smc->clcsock->sk->sk_err = ECONNABORTED;
smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
}
- switch (smc->sk.sk_state) {
+ switch (sk->sk_state) {
case SMC_INIT:
case SMC_ACTIVE:
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
+ sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
if (!smc_cdc_rxed_any_close(&smc->conn))
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
else
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (!txflags->peer_conn_closed) {
- smc->sk.sk_state = SMC_PEERABORTWAIT;
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
+ /* just SHUTDOWN_SEND done */
+ sk->sk_state = SMC_PEERABORTWAIT;
} else {
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
}
+ sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
- if (!txflags->peer_conn_closed) {
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
- }
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERFINCLOSEWAIT:
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
break;
}
- sock_set_flag(&smc->sk, SOCK_DEAD);
- smc->sk.sk_state_change(&smc->sk);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_state_change(sk);
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
- if (smc->smc_listen_work.func)
- cancel_work_sync(&smc->smc_listen_work);
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
@@ -200,11 +204,9 @@ again:
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
+ smc_close_wait_listen_clcsock(smc);
}
- release_sock(sk);
smc_close_cleanup_listen(sk);
- cancel_work_sync(&smc->smc_listen_work);
- lock_sock(sk);
break;
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
@@ -214,6 +216,8 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -226,9 +230,10 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
@@ -237,19 +242,21 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
- if (sk->sk_err != ECONNABORTED) {
- /* confirm close from peer */
- rc = smc_close_final(conn);
- if (rc)
- break;
- }
- if (smc_cdc_rxed_any_close(conn))
+ if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
+ sk->sk_state != SMC_APPCLOSEWAIT2)
+ goto again;
+ /* confirm close from peer */
+ rc = smc_close_final(conn);
+ if (rc)
+ break;
+ if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
- else
+ sock_put(sk); /* postponed passive closing */
+ } else {
/* peer has just issued a shutdown write */
sk->sk_state = SMC_PEERFINCLOSEWAIT;
- smc_close_wait_tx_pends(smc);
+ }
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
@@ -257,6 +264,8 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -264,12 +273,8 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
@@ -278,7 +283,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
@@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
struct sock *sk = &smc->sk;
switch (sk->sk_state) {
+ case SMC_INIT:
case SMC_ACTIVE:
- case SMC_APPFINCLOSEWAIT:
case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- smc_close_abort(&smc->conn);
+ sk->sk_state = SMC_PROCESSABORT;
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PROCESSABORT;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(&smc->conn)) {
+ !smc_close_sent_any_close(&smc->conn))
/* just shutdown, but not yet closed locally */
- smc_close_abort(&smc->conn);
sk->sk_state = SMC_PROCESSABORT;
- } else {
+ else
sk->sk_state = SMC_CLOSED;
- }
+ sock_put(sk); /* passive closing */
break;
+ case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_INIT:
case SMC_PROCESSABORT:
/* nothing to do, add tracing in future patch */
break;
}
}
-/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
- * or peer_done_writing.
+/* Either some kind of closing has been received: peer_conn_closed,
+ * peer_conn_abort, or peer_done_writing
+ * or the link group of the connection terminates abnormally.
*/
static void smc_close_passive_work(struct work_struct *work)
{
@@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work)
struct sock *sk = &smc->sk;
int old_state;
- lock_sock(&smc->sk);
+ lock_sock(sk);
old_state = sk->sk_state;
if (!conn->alert_token_local) {
@@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work)
goto wakeup;
}
- rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
+ rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
+ /* peer has not received all data */
smc_close_passive_abort_received(smc);
+ release_sock(&smc->sk);
+ cancel_delayed_work_sync(&conn->tx_work);
+ lock_sock(&smc->sk);
goto wakeup;
}
switch (sk->sk_state) {
case SMC_INIT:
- if (atomic_read(&smc->conn.bytes_to_rcv) ||
+ if (atomic_read(&conn->bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn)))
+ !smc_cdc_rxed_any_close(conn))) {
sk->sk_state = SMC_APPCLOSEWAIT1;
- else
+ } else {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
break;
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
@@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work)
/* fall through */
/* to check for closing */
case SMC_PEERCLOSEWAIT2:
- case SMC_PEERFINCLOSEWAIT:
- if (!smc_cdc_rxed_any_close(&smc->conn))
+ if (!smc_cdc_rxed_any_close(conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
smc_close_sent_any_close(conn)) {
@@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work)
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_APPFINCLOSEWAIT;
}
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_PEERFINCLOSEWAIT:
+ if (smc_cdc_rxed_any_close(conn)) {
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
+ break;
case SMC_APPFINCLOSEWAIT:
case SMC_PEERABORTWAIT:
case SMC_PROCESSABORT:
@@ -393,23 +422,11 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+ smc_conn_free(conn);
}
- release_sock(&smc->sk);
-}
-
-void smc_close_sock_put_work(struct work_struct *work)
-{
- struct smc_sock *smc = container_of(to_delayed_work(work),
- struct smc_sock,
- sock_put_work);
-
- smc->sk.sk_prot->unhash(&smc->sk);
- sock_put(&smc->sk);
+ release_sock(sk);
+ sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_shutdown_write(struct smc_sock *smc)
@@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_ACTIVE)
+ goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (sk->sk_state == SMC_ACTIVE)
- sk->sk_state = SMC_PEERCLOSEWAIT1;
- else
- goto again;
+ if (rc)
+ break;
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
/* passive close */
@@ -446,8 +464,12 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_APPCLOSEWAIT1)
+ goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
@@ -462,7 +484,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index ed82506b1b0a..19eb6a211c23 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -20,9 +20,7 @@
#define SMC_CLOSE_SOCK_PUT_DELAY HZ
void smc_close_wake_tx_prepared(struct smc_sock *smc);
-void smc_close_active_abort(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 94f21116dac5..2424c7100aaf 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work)
bool conns;
spin_lock_bh(&smc_lgr_list.lock);
+ if (list_empty(&lgr->list))
+ goto free;
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
@@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
+free:
spin_unlock_bh(&smc_lgr_list.lock);
smc_lgr_free(lgr);
}
@@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn)
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!lgr)
+ if (!conn->lgr)
return;
smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
@@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
while (node) {
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk);
+ sock_hold(&smc->sk); /* sock_put in close work */
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
__smc_lgr_unregister_conn(conn);
- schedule_work(&conn->close_work);
- sock_put(&smc->sk);
+ write_unlock_bh(&lgr->conns_lock);
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
+ write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);
+ wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
}
/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index d2d01cf70224..427b91c1c964 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout;
- if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
+ smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+ !list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 90f1a7f9085c..2a8957bd6d38 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -141,6 +141,17 @@ out:
return rc;
}
+static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *l;
+
+ list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+ if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+ lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+ smc_lgr_terminate(lgr);
+ }
+}
+
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
@@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
+ if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ smc_ib_port_terminate(smcibdev, port_idx + 1);
}
}
@@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_PORT_ERR:
- port_idx = ibevent->element.port_num - 1;
- set_bit(port_idx, &smcibdev->port_event_mask);
- schedule_work(&smcibdev->port_event_work);
- /* fall through */
case IB_EVENT_DEVICE_FATAL:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
- break;
case IB_EVENT_PORT_ACTIVE:
port_idx = ibevent->element.port_num - 1;
set_bit(port_idx, &smcibdev->port_event_mask);
@@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
{
- ib_dealloc_pd(lnk->roce_pd);
+ if (lnk->roce_pd)
+ ib_dealloc_pd(lnk->roce_pd);
lnk->roce_pd = NULL;
}
@@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
{
+ struct smc_ib_device *smcibdev =
+ (struct smc_ib_device *)ibevent->device;
+ u8 port_idx;
+
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
case IB_EVENT_GID_CHANGE:
case IB_EVENT_PORT_ERR:
case IB_EVENT_QP_ACCESS_ERR:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
+ port_idx = ibevent->element.port_num - 1;
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
break;
default:
break;
@@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
void smc_ib_destroy_queue_pair(struct smc_link *lnk)
{
- ib_destroy_qp(lnk->roce_qp);
+ if (lnk->roce_qp)
+ ib_destroy_qp(lnk->roce_qp);
lnk->roce_qp = NULL;
}
@@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
{
if (!smcibdev->initialized)
return;
+ smcibdev->initialized = 0;
smc_wr_remove_dev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
ib_destroy_cq(smcibdev->roce_cq_recv);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index cbf58637ee14..9dc392ca06bf 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -65,7 +65,6 @@ static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
sk->sk_shutdown & RCV_SHUTDOWN ||
- sock_flag(sk, SOCK_DONE) ||
atomic_read(&conn->bytes_to_rcv) ||
smc_cdc_rxed_any_close_or_senddone(conn),
&wait);
@@ -116,7 +115,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
if (read_done) {
if (sk->sk_err ||
sk->sk_state == SMC_CLOSED ||
- (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ sk->sk_shutdown & RCV_SHUTDOWN ||
!timeo ||
signal_pending(current) ||
smc_cdc_rxed_any_close_or_senddone(conn) ||
@@ -124,8 +123,6 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
peer_conn_abort)
break;
} else {
- if (sock_flag(sk, SOCK_DONE))
- break;
if (sk->sk_err) {
read_done = sock_error(sk);
break;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index c48dc2d5fd3a..838bce20c361 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
rc = -EPIPE;
break;
}
- if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ if (smc_cdc_rxed_any_close(conn)) {
rc = -ECONNRESET;
break;
}
@@ -104,14 +104,12 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
if (atomic_read(&conn->sndbuf_space))
break; /* at least 1 byte of free space available */
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- sk->sk_write_pending++;
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
+ smc_cdc_rxed_any_close(conn) ||
atomic_read(&conn->sndbuf_space),
&wait);
- sk->sk_write_pending--;
}
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
@@ -250,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
- if (rc)
+ if (rc) {
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
- schedule_delayed_work(&conn->tx_work,
- SMC_TX_WORK_DELAY);
+ if (conn->alert_token_local) /* connection healthy */
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -440,19 +441,24 @@ static void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
+ if (smc->sk.sk_err ||
+ !conn->alert_token_local ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ goto out;
+
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+
+out:
release_sock(&smc->sk);
}
void smc_tx_consumer_update(struct smc_connection *conn)
{
union smc_host_cursor cfed, cons;
- struct smc_cdc_tx_pend *pend;
- struct smc_wr_buf *wr_buf;
- int to_confirm, rc;
+ int to_confirm;
smc_curs_write(&cons,
smc_curs_read(&conn->local_tx_ctrl.cons, conn),
@@ -466,10 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
- if (!rc)
- rc = smc_cdc_msg_send(conn, wr_buf, pend);
- if (rc < 0) {
+ if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
+ conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index de4537f66832..1b8af23e6e2b 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data)
again:
polled++;
do {
+ memset(&wc, 0, sizeof(wc));
rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
if (polled == 1) {
ib_req_notify_cq(dev->roce_cq_send,
@@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
+ u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
u64 wr_id;
- u32 idx;
int rc;
*wr_buf = NULL;
@@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc)
return rc;
} else {
- rc = wait_event_interruptible_timeout(
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ rc = wait_event_timeout(
link->wr_tx_wait,
+ list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- struct smc_link_group *lgr;
-
- lgr = container_of(link, struct smc_link_group,
- lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr);
return -EPIPE;
}
- if (rc == -ERESTARTSYS)
- return -EINTR;
if (idx == link->wr_tx_cnt)
return -EPIPE;
}
@@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
pend = container_of(priv, struct smc_wr_tx_pend, priv);
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
&failed_wr);
- if (rc)
+ if (rc) {
+ struct smc_link_group *lgr =
+ container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
smc_wr_tx_put_slot(link, priv);
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
return rc;
}
-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
+void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data)
{
struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
+ struct smc_wr_rx_hdr *wr_tx;
int i;
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
+ wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
+ if (wr_tx->type != wr_tx_hdr_type)
continue;
tx_pend = &link->wr_tx_pends[i].priv;
if (filter(tx_pend, data))
@@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
}
}
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data)
-{
- struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
- int i;
-
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
- continue;
- tx_pend = &link->wr_tx_pends[i].priv;
- if (filter(tx_pend, data))
- return true;
- }
- return false;
-}
-
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2acf12b06063..ef0c3494c9cb 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link,
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,