diff options
author | Chuck Lever <chuck.lever@oracle.com> | 2019-06-19 10:32:48 -0400 |
---|---|---|
committer | Anna Schumaker <Anna.Schumaker@Netapp.com> | 2019-07-09 10:30:16 -0400 |
commit | 05eb06d86685e7d9dac60e6bbb46d7f4c30b056e (patch) | |
tree | 8f04bce3b88e16afd6912e44681b80752721c171 /net/sunrpc/xprtrdma/rpc_rdma.c | |
parent | 1310051c720a83c5717658bcbff710b260f2bff9 (diff) | |
download | linux-05eb06d86685e7d9dac60e6bbb46d7f4c30b056e.tar.gz |
xprtrdma: Fix occasional transport deadlock
Under high I/O workloads, I've noticed that an RPC/RDMA transport
occasionally deadlocks (IOPS goes to zero, and doesn't recover).
Diagnosis shows that the sendctx queue is empty, but when sendctxs
are returned to the queue, the xprt_write_space wake-up never
occurs. The wake-up logic in rpcrdma_sendctx_put_locked is racy.
I noticed that both EMPTY_SCQ and XPRT_WRITE_SPACE are implemented
via an atomic bit. Just one of those is sufficient. Removing
EMPTY_SCQ in favor of the generic bit mechanism makes the deadlock
un-reproducible.
Without EMPTY_SCQ, rpcrdma_buffer::rb_flags is no longer used and
is therefore removed.
Unfortunately this patch does not apply cleanly to stable. If
needed, someone will have to port it and test it.
Fixes: 2fad659209d5 ("xprtrdma: Wait on empty sendctx queue")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 26 |
1 files changed, 12 insertions, 14 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 97bfb804b6c6..59b214ba8813 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -699,22 +699,28 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, u32 hdrlen, struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) { + int ret; + + ret = -EAGAIN; req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt); if (!req->rl_sendctx) - return -EAGAIN; + goto err; req->rl_sendctx->sc_wr.num_sge = 0; req->rl_sendctx->sc_unmap_count = 0; req->rl_sendctx->sc_req = req; __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); + ret = -EIO; if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) - return -EIO; - + goto err; if (rtype != rpcrdma_areadch) if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype)) - return -EIO; - + goto err; return 0; + +err: + trace_xprtrdma_prepsend_failed(&req->rl_slot, ret); + return ret; } /** @@ -877,15 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) out_err: trace_xprtrdma_marshal_failed(rqst, ret); - switch (ret) { - case -EAGAIN: - xprt_wait_for_buffer_space(rqst->rq_xprt); - break; - case -ENOBUFS: - break; - default: - r_xprt->rx_stats.failed_marshal_count++; - } + r_xprt->rx_stats.failed_marshal_count++; return ret; } |