aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/rpc_rdma.c
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2019-06-19 10:32:48 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2019-07-09 10:30:16 -0400
commit05eb06d86685e7d9dac60e6bbb46d7f4c30b056e (patch)
tree8f04bce3b88e16afd6912e44681b80752721c171 /net/sunrpc/xprtrdma/rpc_rdma.c
parent1310051c720a83c5717658bcbff710b260f2bff9 (diff)
downloadlinux-05eb06d86685e7d9dac60e6bbb46d7f4c30b056e.tar.gz
xprtrdma: Fix occasional transport deadlock
Under high I/O workloads, I've noticed that an RPC/RDMA transport occasionally deadlocks (IOPS goes to zero, and doesn't recover). Diagnosis shows that the sendctx queue is empty, but when sendctxs are returned to the queue, the xprt_write_space wake-up never occurs. The wake-up logic in rpcrdma_sendctx_put_locked is racy. I noticed that both EMPTY_SCQ and XPRT_WRITE_SPACE are implemented via an atomic bit. Just one of those is sufficient. Removing EMPTY_SCQ in favor of the generic bit mechanism makes the deadlock un-reproducible. Without EMPTY_SCQ, rpcrdma_buffer::rb_flags is no longer used and is therefore removed. Unfortunately this patch does not apply cleanly to stable. If needed, someone will have to port it and test it. Fixes: 2fad659209d5 ("xprtrdma: Wait on empty sendctx queue") Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
1 files changed, 12 insertions, 14 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 97bfb804b6c6..59b214ba8813 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -699,22 +699,28 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req, u32 hdrlen,
struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
{
+ int ret;
+
+ ret = -EAGAIN;
req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
if (!req->rl_sendctx)
- return -EAGAIN;
+ goto err;
req->rl_sendctx->sc_wr.num_sge = 0;
req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
__clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
+ ret = -EIO;
if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
- return -EIO;
-
+ goto err;
if (rtype != rpcrdma_areadch)
if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
- return -EIO;
-
+ goto err;
return 0;
+
+err:
+ trace_xprtrdma_prepsend_failed(&req->rl_slot, ret);
+ return ret;
}
/**
@@ -877,15 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
out_err:
trace_xprtrdma_marshal_failed(rqst, ret);
- switch (ret) {
- case -EAGAIN:
- xprt_wait_for_buffer_space(rqst->rq_xprt);
- break;
- case -ENOBUFS:
- break;
- default:
- r_xprt->rx_stats.failed_marshal_count++;
- }
+ r_xprt->rx_stats.failed_marshal_count++;
return ret;
}