From a4e1d0b76e7b32c0839e72679c530445172a2564 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 15 Aug 2022 10:00:43 -0700 Subject: block: Change the return type of blk_mq_map_queues() into void Since blk_mq_map_queues() and the .map_queues() callbacks always return 0, change their return type into void. Most callers ignore the returned value anyway. Cc: Christoph Hellwig Cc: Jason Wang Cc: Keith Busch Cc: Martin K. Petersen Cc: Doug Gilbert Cc: Michael S. Tsirkin Signed-off-by: Bart Van Assche Reviewed-by: John Garry Acked-by: Md Haris Iqbal Reviewed-by: Sagi Grimberg Link: https://lore.kernel.org/r/20220815170043.19489-3-bvanassche@acm.org [axboe: fold in fix from Bart] Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 3 +-- drivers/nvme/host/pci.c | 4 +--- drivers/nvme/host/rdma.c | 4 +--- drivers/nvme/host/tcp.c | 4 +--- 4 files changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 127abaf9ba5d..42767fb75455 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2860,7 +2860,7 @@ nvme_fc_complete_rq(struct request *rq) nvme_fc_ctrl_put(ctrl); } -static int nvme_fc_map_queues(struct blk_mq_tag_set *set) +static void nvme_fc_map_queues(struct blk_mq_tag_set *set) { struct nvme_fc_ctrl *ctrl = set->driver_data; int i; @@ -2880,7 +2880,6 @@ static int nvme_fc_map_queues(struct blk_mq_tag_set *set) else blk_mq_map_queues(map); } - return 0; } static const struct blk_mq_ops nvme_fc_mq_ops = { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3a1c37f32f30..4a8cfb360d31 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -450,7 +450,7 @@ static int queue_irq_offset(struct nvme_dev *dev) return 0; } -static int nvme_pci_map_queues(struct blk_mq_tag_set *set) +static void nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; int i, qoff, offset; @@ -477,8 +477,6 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) qoff += map->nr_queues; offset += map->nr_queues; } - - return 0; } /* diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3100643be299..ba08851e42c3 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2188,7 +2188,7 @@ static void nvme_rdma_complete_rq(struct request *rq) nvme_complete_rq(rq); } -static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) +static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) { struct nvme_rdma_ctrl *ctrl = set->driver_data; struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; @@ -2231,8 +2231,6 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) ctrl->io_queues[HCTX_TYPE_DEFAULT], ctrl->io_queues[HCTX_TYPE_READ], ctrl->io_queues[HCTX_TYPE_POLL]); - - return 0; } static const struct blk_mq_ops nvme_rdma_mq_ops = { diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 044da18c06f5..ef151c23d495 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2471,7 +2471,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) +static void nvme_tcp_map_queues(struct blk_mq_tag_set *set) { struct nvme_tcp_ctrl *ctrl = set->driver_data; struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; @@ -2512,8 +2512,6 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) ctrl->io_queues[HCTX_TYPE_DEFAULT], ctrl->io_queues[HCTX_TYPE_READ], ctrl->io_queues[HCTX_TYPE_POLL]); - - return 0; } static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) -- cgit From 1abc696174f1ba27c9563c722029373e1a692933 Mon Sep 17 00:00:00 2001 From: Linjun Bao Date: Thu, 11 Aug 2022 15:40:24 +0800 Subject: nvme: add comment for unaligned "fake" nqn Current "fake" nqn field is "nqn.2014.08.org.nvmexpress:", it is not aligned with the canonical version for history reasons. Signed-off-by: Linjun Bao Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index af367b22871b..112881664695 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2704,7 +2704,11 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); } - /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ + /* + * Generate a "fake" NQN similar to the one in Section 4.5 of the NVMe + * Base Specification 2.0. It is slightly different from the format + * specified there due to historic reasons, and we can't change it now. + */ off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, "nqn.2014.08.org.nvmexpress:%04x%04x", le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); -- cgit From a8817cc09d8e6140c8561a81dded7b3f68e15a1b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:00:52 +0200 Subject: nvme: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Wolfram Sang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fabrics.c | 2 +- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/discovery.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 112881664695..8c9c1176624d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2696,7 +2696,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) { nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + strscpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 10cc4a814602..61637c1dd722 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -49,7 +49,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) goto out_unlock; kref_init(&host->ref); - strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE); list_add_tail(&host->list, &nvmf_hosts); out_unlock: diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index fc8a957fad0a..c8a061ce3ee5 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -449,7 +449,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); + strscpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* * Max command capsule size is sqe + in-capsule data size. diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index c2162eef8ce1..668d257fa986 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -292,7 +292,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) id->oaes = cpu_to_le32(NVMET_DISC_AEN_CFG_OPTIONAL); - strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); + strscpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); -- cgit From 6e6fee569d4733f028f306036aaa12669c25b362 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Fri, 12 Aug 2022 11:12:31 +0800 Subject: nvme-auth: remove the redundant req->cqe->result.u16 assignment operation req->cqe->result.u16 has already been assigned in the previous line, no need to do it again. Signed-off-by: Jackie Liu Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fabrics-cmd.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f91a56180d3d..c1dfdfb92ebf 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -332,7 +332,6 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); if (nvmet_has_auth(ctrl)) nvmet_init_auth(ctrl, req); -- cgit From 42147981561c3344d2c6781fe7029e5900daa9fb Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Fri, 12 Aug 2022 11:12:30 +0800 Subject: nvmet-auth: clean up with done_kfree Jump directly to done_kfree to release d, which is consistent with the code style behind. Reported-by: Genjian Zhang Signed-off-by: Jackie Liu Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fabrics-cmd-auth.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index ebdf9aa81041..8458ec40340b 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -229,10 +229,8 @@ void nvmet_execute_auth_send(struct nvmet_req *req) } status = nvmet_copy_from_sgl(req, 0, d, tl); - if (status) { - kfree(d); - goto done; - } + if (status) + goto done_kfree; data = d; pr_debug("%s: ctrl %d qid %d type %d id %d step %x\n", __func__, -- cgit From c46724cb8947697d4cfa5db44763d7c63b93a02b Mon Sep 17 00:00:00 2001 From: Genjian Zhang Date: Tue, 23 Aug 2022 10:14:41 +0800 Subject: nvmet-auth: remove redundant parameters req The parameter is not used in this function, so remove it. Signed-off-by: Genjian Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fabrics-cmd-auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 8458ec40340b..84601e38a335 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -177,7 +177,7 @@ static u16 nvmet_auth_reply(struct nvmet_req *req, void *d) return 0; } -static u16 nvmet_auth_failure2(struct nvmet_req *req, void *d) +static u16 nvmet_auth_failure2(void *d) { struct nvmf_auth_dhchap_failure_data *data = d; @@ -308,7 +308,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req) goto done_kfree; break; case NVME_AUTH_DHCHAP_MESSAGE_FAILURE2: - status = nvmet_auth_failure2(req, d); + status = nvmet_auth_failure2(d); if (status) { pr_warn("ctrl %d qid %d: authentication failed (%d)\n", ctrl->cntlid, req->sq->qid, status); -- cgit From d416800776b530ad629b2959909062287339defd Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 29 Jul 2022 11:53:05 +0800 Subject: nvmet: avoid unnecessary flush bio For no volatile write cache block device backend, sending flush bio is unnecessary, avoid to do that. Signed-off-by: Guixin Liu Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 2dc1c1035626..8d527a8c0f54 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -334,6 +334,11 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) { struct bio *bio = &req->b.inline_bio; + if (!bdev_write_cache(req->ns->bdev)) { + nvmet_req_complete(req, NVME_SC_SUCCESS); + return; + } + if (!nvmet_check_transfer_len(req, 0)) return; @@ -347,6 +352,9 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) u16 nvmet_bdev_flush(struct nvmet_req *req) { + if (!bdev_write_cache(req->ns->bdev)) + return 0; + if (blkdev_issue_flush(req->ns->bdev)) return NVME_SC_INTERNAL | NVME_SC_DNR; return 0; -- cgit From 3e980f5995e0bb4d86fef873a9c9ad66721580d0 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 29 Aug 2022 11:28:39 +0200 Subject: nvmet: expose max queues to configfs Allow to set the max queues the target supports. This is useful for testing the reconnect attempt of the host with changing numbers of supported queues. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 2bcd60758919..e34a2896fedb 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1281,6 +1281,34 @@ static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); #endif +static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->max_qid); +} + +static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item, + const char *page, size_t cnt) +{ + struct nvmet_port *port = to_nvmet_port(item); + u16 qid_max; + + if (nvmet_is_port_enabled(port, __func__)) + return -EACCES; + + if (sscanf(page, "%hu\n", &qid_max) != 1) + return -EINVAL; + + if (qid_max < 1 || qid_max > NVMET_NR_QUEUES) + return -EINVAL; + + down_write(&nvmet_config_sem); + to_subsys(item)->max_qid = qid_max; + up_write(&nvmet_config_sem); + return cnt; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_qid_max); + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, @@ -1288,6 +1316,7 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, + &nvmet_subsys_attr_attr_qid_max, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_subsys_attr_attr_pi_enable, #endif -- cgit From 09035f86496d8dea7a05a07f6dcb8083c0a3d885 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 29 Aug 2022 11:28:40 +0200 Subject: nvme-tcp: handle number of queue changes On reconnect, the number of queues might have changed. In the case where we have more queues available than previously we try to access queues which are not initialized yet. The other case where we have less queues than previously, the connection attempt will fail because the target doesn't support the old number of queues and we end up in a reconnect loop. Thus, only start queues which are currently present in the tagset limited by the number of available queues. Then we update the tagset and we can start any new queue. Signed-off-by: Daniel Wagner Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ef151c23d495..4c2d198145a7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1762,11 +1762,12 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) nvme_tcp_stop_queue(ctrl, i); } -static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) +static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, + int first, int last) { int i, ret; - for (i = 1; i < ctrl->queue_count; i++) { + for (i = first; i < last; i++) { ret = nvme_tcp_start_queue(ctrl, i); if (ret) goto out_stop_queues; @@ -1775,7 +1776,7 @@ static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl) return 0; out_stop_queues: - for (i--; i >= 1; i--) + for (i--; i >= first; i--) nvme_tcp_stop_queue(ctrl, i); return ret; } @@ -1901,7 +1902,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) { - int ret; + int ret, nr_queues; ret = nvme_tcp_alloc_io_queues(ctrl); if (ret) @@ -1917,7 +1918,13 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_free_tag_set; } - ret = nvme_tcp_start_io_queues(ctrl); + /* + * Only start IO queues for which we have allocated the tagset + * and limitted it to the available queues. On reconnects, the + * queue number might have changed. + */ + nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count); + ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues); if (ret) goto out_cleanup_connect_q; @@ -1937,6 +1944,15 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) nvme_unfreeze(ctrl); } + /* + * If the number of queues has increased (reconnect case) + * start all new queues now. + */ + ret = nvme_tcp_start_io_queues(ctrl, nr_queues, + ctrl->tagset->nr_hw_queues + 1); + if (ret) + goto out_wait_freeze_timed_out; + return 0; out_wait_freeze_timed_out: -- cgit From 1c467e259599864ec925d5b85066a0960320fb3c Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 29 Aug 2022 11:28:41 +0200 Subject: nvme-rdma: handle number of queue changes On reconnect, the number of queues might have changed. In the case where we have more queues available than previously we try to access queues which are not initialized yet. The other case where we have less queues than previously, the connection attempt will fail because the target doesn't support the old number of queues and we end up in a reconnect loop. Thus, only start queues which are currently present in the tagset limited by the number of available queues. Then we update the tagset and we can start any new queue. Signed-off-by: Daniel Wagner Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ba08851e42c3..4c6df34f9d7a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -696,11 +696,12 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) return ret; } -static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl, + int first, int last) { int i, ret = 0; - for (i = 1; i < ctrl->ctrl.queue_count; i++) { + for (i = first; i < last; i++) { ret = nvme_rdma_start_queue(ctrl, i); if (ret) goto out_stop_queues; @@ -709,7 +710,7 @@ static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_stop_queues: - for (i--; i >= 1; i--) + for (i--; i >= first; i--) nvme_rdma_stop_queue(&ctrl->queues[i]); return ret; } @@ -964,7 +965,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) { - int ret; + int ret, nr_queues; ret = nvme_rdma_alloc_io_queues(ctrl); if (ret) @@ -980,7 +981,13 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_tag_set; } - ret = nvme_rdma_start_io_queues(ctrl); + /* + * Only start IO queues for which we have allocated the tagset + * and limitted it to the available queues. On reconnects, the + * queue number might have changed. + */ + nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count); + ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues); if (ret) goto out_cleanup_connect_q; @@ -1000,6 +1007,15 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) nvme_unfreeze(&ctrl->ctrl); } + /* + * If the number of queues has increased (reconnect case) + * start all new queues now. + */ + ret = nvme_rdma_start_io_queues(ctrl, nr_queues, + ctrl->tag_set.nr_hw_queues + 1); + if (ret) + goto out_wait_freeze_timed_out; + return 0; out_wait_freeze_timed_out: -- cgit From 4cde03d82e2d0056d20fd5af6a264c7f5e6a3e76 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 29 Jul 2022 16:26:30 +0200 Subject: nvme: consider also host_iface when checking ip options It's perfectly fine to use the same traddr and trsvcid more than once as long we use different host interface. This is used in setups where the host has more than one interface but the target exposes only one traddr/trsvcid combination. Use the same acceptance rules for host_iface as we have for host_traddr. Signed-off-by: Daniel Wagner Reviewed-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 61637c1dd722..ce27276f552d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -971,13 +971,17 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, return false; /* - * Checking the local address is rough. In most cases, none is specified - * and the host port is selected by the stack. + * Checking the local address or host interfaces is rough. + * + * In most cases, none is specified and the host port or + * host interface is selected by the stack. * * Assume no match if: - * - local address is specified and address is not the same - * - local address is not specified but remote is, or vice versa - * (admin using specific host_traddr when it matters). + * - local address or host interface is specified and address + * or host interface is not the same + * - local address or host interface is not specified but + * remote is, or vice versa (admin using specific + * host_traddr/host_iface when it matters). */ if ((opts->mask & NVMF_OPT_HOST_TRADDR) && (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { @@ -988,6 +992,15 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, return false; } + if ((opts->mask & NVMF_OPT_HOST_IFACE) && + (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) { + if (strcmp(opts->host_iface, ctrl->opts->host_iface)) + return false; + } else if ((opts->mask & NVMF_OPT_HOST_IFACE) || + (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) { + return false; + } + return true; } EXPORT_SYMBOL_GPL(nvmf_ip_options_match); -- cgit From a53232cb3abef51524f06ee9d8fbc3364ad95794 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 6 Sep 2022 09:07:35 -0700 Subject: nvme-pci: remove nvme_queue from nvme_iod We can get the nvme_queue from the req just as easily, so remove the duplicate path to the same structure to save some space. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4a8cfb360d31..403876ad3234 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -226,7 +226,6 @@ struct nvme_queue { struct nvme_iod { struct nvme_request req; struct nvme_command cmd; - struct nvme_queue *nvmeq; bool use_sgl; int aborted; int npages; /* In the PRP list. 0 means small pool in use */ @@ -430,11 +429,6 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set, { struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0; - struct nvme_queue *nvmeq = &dev->queues[queue_idx]; - - BUG_ON(!nvmeq); - iod->nvmeq = nvmeq; nvme_req(req)->ctrl = &dev->ctrl; nvme_req(req)->cmd = &iod->cmd; @@ -526,7 +520,7 @@ static void **nvme_pci_iod_list(struct request *req) static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; int nseg = blk_rq_nr_phys_segments(req); unsigned int avg_seg_size; @@ -534,7 +528,7 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) if (!nvme_ctrl_sgl_supported(&dev->ctrl)) return false; - if (!iod->nvmeq->qid) + if (!nvmeq->qid) return false; if (!sgl_threshold || avg_seg_size < sgl_threshold) return false; @@ -831,6 +825,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, int rc; if (blk_rq_nr_phys_segments(req) == 1) { + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct bio_vec bv = req_bvec(req); if (!is_pci_p2pdma_page(bv.bv_page)) { @@ -838,7 +833,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, return nvme_setup_prp_simple(dev, req, &cmnd->rw, &bv); - if (iod->nvmeq->qid && sgl_threshold && + if (nvmeq->qid && sgl_threshold && nvme_ctrl_sgl_supported(&dev->ctrl)) return nvme_setup_sgl_simple(dev, req, &cmnd->rw, &bv); @@ -1017,12 +1012,16 @@ static void nvme_queue_rqs(struct request **rqlist) static __always_inline void nvme_pci_unmap_rq(struct request *req) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_dev *dev = iod->nvmeq->dev; + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + + if (blk_integrity_rq(req)) { + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - if (blk_integrity_rq(req)) dma_unmap_page(dev->dev, iod->meta_dma, rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + } + if (blk_rq_nr_phys_segments(req)) nvme_unmap_data(dev, req); } @@ -1270,8 +1269,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) static void abort_endio(struct request *req, blk_status_t error) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = iod->nvmeq; + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", nvme_req(req)->status); @@ -1333,7 +1331,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) static enum blk_eh_timer_return nvme_timeout(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_queue *nvmeq = iod->nvmeq; + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd = { }; -- cgit From 52da4f3f5cbd42815946c0544774167241f9f45f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 6 Sep 2022 09:07:36 -0700 Subject: nvme-pci: iod's 'aborted' is a bool It's only true or false, so make this a bool to reflect that and save some space in nvme_iod. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 403876ad3234..045ebdd8e8f3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -227,7 +227,7 @@ struct nvme_iod { struct nvme_request req; struct nvme_command cmd; bool use_sgl; - int aborted; + bool aborted; int npages; /* In the PRP list. 0 means small pool in use */ dma_addr_t first_dma; unsigned int dma_len; /* length of single DMA segment mapping */ @@ -891,7 +891,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; - iod->aborted = 0; + iod->aborted = false; iod->npages = -1; iod->sgt.nents = 0; @@ -1412,7 +1412,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } - iod->aborted = 1; + iod->aborted = true; cmd.abort.opcode = nvme_admin_abort_cmd; cmd.abort.cid = nvme_cid(req); -- cgit From c372cdd1efdf2b8e51fdde36a2a05c263ab5ebb7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 6 Sep 2022 09:07:37 -0700 Subject: nvme-pci: iod npages fits in s8 The largest allowed transfer is 4MB, which can use at most 1025 PRPs. Each PRP is 8 bytes, so the maximum number of 4k nvme pages needed for the iod_list is 3, which fits in an 's8' type. While modifying this field, change the name to "nr_allocations" to better represent that this is referring to the number of units allocated from a dma_pool. Also introduce a BUILD_BUG_ON to ensure we never accidently increase the largest transfer limit beyond 127 chained prp lists. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 045ebdd8e8f3..a553062ff3ba 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -228,7 +228,8 @@ struct nvme_iod { struct nvme_command cmd; bool use_sgl; bool aborted; - int npages; /* In the PRP list. 0 means small pool in use */ + s8 nr_allocations; /* PRP list pool allocations. 0 means small + pool in use */ dma_addr_t first_dma; unsigned int dma_len; /* length of single DMA segment mapping */ dma_addr_t meta_dma; @@ -542,7 +543,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req) dma_addr_t dma_addr = iod->first_dma; int i; - for (i = 0; i < iod->npages; i++) { + for (i = 0; i < iod->nr_allocations; i++) { __le64 *prp_list = nvme_pci_iod_list(req)[i]; dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); @@ -558,7 +559,7 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) dma_addr_t dma_addr = iod->first_dma; int i; - for (i = 0; i < iod->npages; i++) { + for (i = 0; i < iod->nr_allocations; i++) { struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); @@ -581,7 +582,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); - if (iod->npages == 0) + if (iod->nr_allocations == 0) dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], iod->first_dma); else if (iod->use_sgl) @@ -643,15 +644,15 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); if (nprps <= (256 / 8)) { pool = dev->prp_small_pool; - iod->npages = 0; + iod->nr_allocations = 0; } else { pool = dev->prp_page_pool; - iod->npages = 1; + iod->nr_allocations = 1; } prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) { - iod->npages = -1; + iod->nr_allocations = -1; return BLK_STS_RESOURCE; } list[0] = prp_list; @@ -663,7 +664,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) goto free_prps; - list[iod->npages++] = prp_list; + list[iod->nr_allocations++] = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); i = 1; @@ -738,15 +739,15 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, if (entries <= (256 / sizeof(struct nvme_sgl_desc))) { pool = dev->prp_small_pool; - iod->npages = 0; + iod->nr_allocations = 0; } else { pool = dev->prp_page_pool; - iod->npages = 1; + iod->nr_allocations = 1; } sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); if (!sg_list) { - iod->npages = -1; + iod->nr_allocations = -1; return BLK_STS_RESOURCE; } @@ -765,7 +766,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, goto free_sgls; i = 0; - nvme_pci_iod_list(req)[iod->npages++] = sg_list; + nvme_pci_iod_list(req)[iod->nr_allocations++] = sg_list; sg_list[i++] = *link; nvme_pci_sgl_set_seg(link, sgl_dma, entries); } @@ -892,7 +893,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) blk_status_t ret; iod->aborted = false; - iod->npages = -1; + iod->nr_allocations = -1; iod->sgt.nents = 0; ret = nvme_setup_cmd(req->q->queuedata, req); @@ -3559,6 +3560,8 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); + BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) > + S8_MAX); return pci_register_driver(&nvme_driver); } -- cgit From c4c22c52081385f026b430f35776f80073a22076 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 7 Sep 2022 08:42:07 +0200 Subject: nvme-pci: move iod dma_len fill gaps The 32-bit field, dma_len, packs better in the iod struct above the dma_addr_t on 64-bit systems. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a553062ff3ba..70b1922c8953 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -230,8 +230,8 @@ struct nvme_iod { bool aborted; s8 nr_allocations; /* PRP list pool allocations. 0 means small pool in use */ - dma_addr_t first_dma; unsigned int dma_len; /* length of single DMA segment mapping */ + dma_addr_t first_dma; dma_addr_t meta_dma; struct sg_table sgt; }; -- cgit From 5bfaba275ae6486700194cad962574e3eb7ae60d Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Wed, 31 Aug 2022 00:05:33 +0200 Subject: nvmet-tcp: don't map pages which can't come from HIGHMEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kmap() is being deprecated in favor of kmap_local_page().[1] There are two main problems with kmap(): (1) It comes with an overhead as mapping space is restricted and protected by a global lock for synchronization and (2) it also requires global TLB invalidation when the kmap’s pool wraps and it might block when the mapping space is fully utilized until a slot becomes available. The pages which will be mapped are allocated in nvmet_tcp_map_data(), using the GFP_KERNEL flag. This assures that they cannot come from HIGHMEM. This imply that a straight page_address() can replace the kmap() of sg_page(sg) in nvmet_tcp_map_pdu_iovec(). As a side effect, we might also delete the field "nr_mapped" from struct "nvmet_tcp_cmd" because, after removing the kmap() calls, there would be no longer any need of it. In addition, there is no reason to use a kvec for the command receive data buffers iovec, use a bio_vec instead and let iov_iter handle the buffer mapping and data copy. Test with blktests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with HIGHMEM64GB enabled. [1] "[PATCH] checkpatch: Add kmap and kmap_atomic to the deprecated list" https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com/ Cc: Chaitanya Kulkarni Cc: Keith Busch Suggested-by: Ira Weiny Signed-off-by: Fabio M. De Francesco Suggested-by: Christoph Hellwig Suggested-by: Al Viro [sagi: added bio_vec plus minor naming changes] Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 44 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index dc3b4dc8fe08..c07de4f4f719 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -77,9 +77,8 @@ struct nvmet_tcp_cmd { u32 pdu_len; u32 pdu_recv; int sg_idx; - int nr_mapped; struct msghdr recv_msg; - struct kvec *iov; + struct bio_vec *iov; u32 flags; struct list_head entry; @@ -167,7 +166,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd) @@ -301,35 +299,21 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) { - WARN_ON(unlikely(cmd->nr_mapped > 0)); - kfree(cmd->iov); sgl_free(cmd->req.sg); cmd->iov = NULL; cmd->req.sg = NULL; } -static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) -{ - struct scatterlist *sg; - int i; - - sg = &cmd->req.sg[cmd->sg_idx]; - - for (i = 0; i < cmd->nr_mapped; i++) - kunmap(sg_page(&sg[i])); - - cmd->nr_mapped = 0; -} - -static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) +static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { - struct kvec *iov = cmd->iov; + struct bio_vec *iov = cmd->iov; struct scatterlist *sg; u32 length, offset, sg_offset; + int nr_pages; length = cmd->pdu_len; - cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); + nr_pages = DIV_ROUND_UP(length, PAGE_SIZE); offset = cmd->rbytes_done; cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; @@ -338,8 +322,9 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); - iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset; - iov->iov_len = iov_len; + iov->bv_page = sg_page(sg); + iov->bv_len = sg->length; + iov->bv_offset = sg->offset + sg_offset; length -= iov_len; sg = sg_next(sg); @@ -347,8 +332,8 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) sg_offset = 0; } - iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, - cmd->nr_mapped, cmd->pdu_len); + iov_iter_bvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, + nr_pages, cmd->pdu_len); } static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) @@ -926,7 +911,7 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, } queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); cmd->flags |= NVMET_TCP_F_INIT_FAILED; } @@ -952,7 +937,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) cmd->pdu_len = le32_to_cpu(data->data_length); cmd->pdu_recv = 0; - nvmet_tcp_map_pdu_iovec(cmd); + nvmet_tcp_build_pdu_iovec(cmd); queue->cmd = cmd; queue->rcv_state = NVMET_TCP_RECV_DATA; @@ -1021,7 +1006,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (nvmet_tcp_need_data_in(queue->cmd)) { if (nvmet_tcp_has_inline_data(queue->cmd)) { queue->rcv_state = NVMET_TCP_RECV_DATA; - nvmet_tcp_map_pdu_iovec(queue->cmd); + nvmet_tcp_build_pdu_iovec(queue->cmd); return 0; } /* send back R2T */ @@ -1141,7 +1126,6 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) cmd->rbytes_done += ret; } - nvmet_tcp_unmap_pdu_iovec(cmd); if (queue->data_digest) { nvmet_tcp_prep_recv_ddgst(cmd); return 0; @@ -1411,7 +1395,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) { nvmet_req_uninit(&cmd->req); - nvmet_tcp_unmap_pdu_iovec(cmd); nvmet_tcp_free_cmd_buffers(cmd); } @@ -1424,7 +1407,6 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) if (nvmet_tcp_need_data_in(cmd)) nvmet_req_uninit(&cmd->req); - nvmet_tcp_unmap_pdu_iovec(cmd); nvmet_tcp_free_cmd_buffers(cmd); } -- cgit From 02c57a82c0081141abc19150beab48ef47f97f18 Mon Sep 17 00:00:00 2001 From: Martin Belanger Date: Wed, 7 Sep 2022 08:27:37 -0400 Subject: nvme-tcp: print actual source IP address through sysfs "address" attr TCP transport relies on the routing table to determine which source address and interface to use when making a connection. Currently, there is no way to tell from userspace where a connection was made. This patch exposes the actual source address using a new field named "src_addr=" in the "address" attribute. This is needed to diagnose and identify connectivity issues. With the source address we can infer the interface associated with each connection. This was tested with nvme-cli 2.0 to verify it does not have any adverse effect. The new "src_addr=" field will simply be displayed in the output of the "list-subsys" or "list -v" commands as shown here. $ nvme list-subsys nvme-subsys0 - NQN=nqn.2014-08.org.nvmexpress.discovery \ +- nvme0 tcp traddr=192.168.56.1,trsvcid=8009,src_addr=192.168.56.101 live Signed-off-by: Martin Belanger Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4c2d198145a7..b5f22ceaae82 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2546,6 +2546,25 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) return queue->nr_cqe; } +static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) +{ + struct nvme_tcp_queue *queue = &to_tcp_ctrl(ctrl)->queues[0]; + struct sockaddr_storage src_addr; + int ret, len; + + len = nvmf_get_address(ctrl, buf, size); + + ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); + if (ret > 0) { + if (len > 0) + len--; /* strip trailing newline */ + len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", + (len) ? "," : "", &src_addr); + } + + return len; +} + static const struct blk_mq_ops nvme_tcp_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .commit_rqs = nvme_tcp_commit_rqs, @@ -2577,7 +2596,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .free_ctrl = nvme_tcp_free_ctrl, .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, - .get_address = nvmf_get_address, + .get_address = nvme_tcp_get_address, .stop_ctrl = nvme_tcp_stop_ctrl, }; -- cgit From bc8fb906b0ff9339b4286698cb7cd9cd5b8c53eb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Sep 2022 12:36:46 -0700 Subject: nvme: handle effects after freeing the request If a reset occurs after the scan work attempts to issue a command, the reset may quisce the admin queue, which blocks the scan work's command from dispatching. The scan work will not be able to complete while the queue is quiesced. Meanwhile, the reset work will cancel all outstanding admin tags and wait until all requests have transitioned to idle, which includes the passthrough request. But the passthrough request won't be set to idle until after the scan_work flushes, so we're deadlocked. Fix this by handling the end effects after the request has been freed. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216354 Reported-by: Jonathan Derrick Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 ++++++----------- drivers/nvme/host/ioctl.c | 9 ++++++++- drivers/nvme/host/nvme.h | 4 +++- drivers/nvme/target/passthru.c | 7 ++++++- 4 files changed, 23 insertions(+), 14 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8c9c1176624d..ea6694fd550f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1111,8 +1111,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, - struct nvme_command *cmd, int status) +void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, + struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { nvme_unfreeze(ctrl); @@ -1148,21 +1148,16 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, break; } } +EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU); -int nvme_execute_passthru_rq(struct request *rq) +int nvme_execute_passthru_rq(struct request *rq, u32 *effects) { struct nvme_command *cmd = nvme_req(rq)->cmd; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; struct nvme_ns *ns = rq->q->queuedata; - u32 effects; - int ret; - effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - ret = nvme_execute_rq(rq, false); - if (effects) /* nothing to be done for zero cmd effects */ - nvme_passthru_end(ctrl, effects, cmd, ret); - - return ret; + *effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); + return nvme_execute_rq(rq, false); } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 27614bee7380..d3281f87cd6e 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -136,9 +136,11 @@ static int nvme_submit_user_cmd(struct request_queue *q, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, bool vec) { + struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; struct bio *bio; + u32 effects; int ret; req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer, @@ -147,8 +149,9 @@ static int nvme_submit_user_cmd(struct request_queue *q, return PTR_ERR(req); bio = req->bio; + ctrl = nvme_req(req)->ctrl; - ret = nvme_execute_passthru_rq(req); + ret = nvme_execute_passthru_rq(req, &effects); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); @@ -158,6 +161,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (bio) blk_rq_unmap_user(bio); blk_mq_free_request(req); + + if (effects) + nvme_passthru_end(ctrl, effects, cmd, ret); + return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1bdf714dcd9e..a0bf9560cf67 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1023,7 +1023,9 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); -int nvme_execute_passthru_rq(struct request *rq); +int nvme_execute_passthru_rq(struct request *rq, u32 *effects); +void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, + struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); void nvme_put_ns(struct nvme_ns *ns); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 6f39a29828b1..94d3153bae54 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -215,9 +215,11 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; + struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + u32 effects; int status; - status = nvme_execute_passthru_rq(rq); + status = nvme_execute_passthru_rq(rq, &effects); if (status == NVME_SC_SUCCESS && req->cmd->common.opcode == nvme_admin_identify) { @@ -238,6 +240,9 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) req->cqe->result = nvme_req(rq)->result; nvmet_req_complete(req, status); blk_mq_free_request(rq); + + if (effects) + nvme_passthru_end(ctrl, effects, req->cmd, status); } static void nvmet_passthru_req_done(struct request *rq, -- cgit From a8eb6c1ba48bddea82e8d74cbe6e119f006be97d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Sep 2022 12:45:08 -0700 Subject: nvme: copy firmware_rev on each init The firmware revision can change on after a reset so copy the most recent info each time instead of just the first time, otherwise the sysfs firmware_rev entry may contain stale data. Reported-by: Jeff Lien Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ea6694fd550f..e56ecc7fda2d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2893,7 +2893,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nvme_init_subnqn(subsys, ctrl, id); memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); memcpy(subsys->model, id->mn, sizeof(subsys->model)); - memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; @@ -3112,6 +3111,8 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->quirks |= core_quirks[i].quirks; } } + memcpy(ctrl->subsys->firmware_rev, id->fr, + sizeof(ctrl->subsys->firmware_rev)); if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); -- cgit From 23e085b2dead13b51fe86d27069895b740f749c0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 22 Sep 2022 07:54:06 -0700 Subject: nvme: restrict management ioctls to admin The passthrough commands already have this restriction, but the other operations do not. Require the same capabilities for all users as all of these operations, which include resets and rescans, can be disruptive. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d3281f87cd6e..a48a79ed5c4c 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -764,11 +764,17 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; dev_warn(ctrl->device, "resetting controller\n"); return nvme_reset_ctrl_sync(ctrl); case NVME_IOCTL_SUBSYS_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return nvme_reset_subsystem(ctrl); case NVME_IOCTL_RESCAN: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; nvme_queue_scan(ctrl); return 0; default: -- cgit From 1e866afd4bcdd01a70a5eddb4371158d3035ce03 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 22 Sep 2022 08:13:47 -0700 Subject: nvme: ensure subsystem reset is single threaded The subsystem reset writes to a register, so we have to ensure the device state is capable of handling that otherwise the driver may access unmapped registers. Use the state machine to ensure the subsystem reset doesn't try to write registers on a device already undergoing this type of reset. Link: https://bugzilla.kernel.org/show_bug.cgi?id=214771 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a0bf9560cf67..70555022cb44 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -602,11 +602,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) static inline void nvme_should_fail(struct request *req) {} #endif +bool nvme_wait_reset(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); + static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { + int ret; + if (!ctrl->subsystem) return -ENOTTY; - return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (!nvme_wait_reset(ctrl)) + return -EBUSY; + + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (ret) + return ret; + + return nvme_try_sched_reset(ctrl); } /* @@ -712,7 +724,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); -bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -802,7 +813,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); -int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); void nvme_queue_scan(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, -- cgit From bf093d971695f30e312d2d0567c5feecfbcef450 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Sep 2022 11:15:36 +0300 Subject: nvme: enumerate controller flags We expect to grow a few of these flags for various purposes so make them a proper enumeration. Signed-off-by: Sagi Grimberg Reviewed-by: James Smart Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 70555022cb44..6b4984e8b363 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -233,6 +233,11 @@ struct nvme_fault_inject { #endif }; +enum nvme_ctrl_flags { + NVME_CTRL_FAILFAST_EXPIRED = 0, + NVME_CTRL_ADMIN_Q_STOPPED = 1, +}; + struct nvme_ctrl { bool comp_seen; enum nvme_ctrl_state state; @@ -354,8 +359,6 @@ struct nvme_ctrl { u16 maxcmd; int nr_reconnects; unsigned long flags; -#define NVME_CTRL_FAILFAST_EXPIRED 0 -#define NVME_CTRL_ADMIN_Q_STOPPED 1 struct nvmf_ctrl_options *opts; struct page *discard_page; -- cgit From f46ef9e87c9e8941b7acee45611c7c6a322592bb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Sep 2022 11:15:37 +0300 Subject: nvme: send a rediscover uevent when a persistent discovery controller reconnects When a discovery controller is disconnected, no AENs will arrive to notify the host about discovery log change events. In order to solve this, send a uevent notification when a persistent discovery controller reconnects. We add a new ctrl flag NVME_CTRL_STARTED_ONCE that will be set on the first start, and consecutive calls will find it set, and send the event to userspace if the controller is a discovery controller. Upon the event reception, userspace will re-read the discovery log page and will act upon changes as it sees fit. Signed-off-by: Sagi Grimberg Reviewed-by: Daniel Wagner Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 10 ++++++++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 11 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e56ecc7fda2d..0de2c227c1ab 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4815,6 +4815,16 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_enable_aen(ctrl); + /* + * persistent discovery controllers need to send indication to userspace + * to re-read the discovery log page to learn about possible changes + * that were missed. We identify persistent discovery controllers by + * checking that they started once before, hence are reconnecting back. + */ + if (test_and_set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) && + nvme_discovery_ctrl(ctrl)) + nvme_change_uevent(ctrl, "NVME_EVENT=rediscover"); + if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6b4984e8b363..a8b054cd2ebb 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -236,6 +236,7 @@ struct nvme_fault_inject { enum nvme_ctrl_flags { NVME_CTRL_FAILFAST_EXPIRED = 0, NVME_CTRL_ADMIN_Q_STOPPED = 1, + NVME_CTRL_STARTED_ONCE = 2, }; struct nvme_ctrl { -- cgit From 61ce339f19fabbc3e51237148a7ef6f2270e44fa Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Tue, 20 Sep 2022 19:19:32 +0000 Subject: nvme-pci: set min_align_mask before calculating max_hw_sectors If swiotlb is force enabled dma_max_mapping_size ends up calling swiotlb_max_mapping_size which takes into account the min align mask for the device. Set the min align mask for nvme driver before calling dma_max_mapping_size while calculating max hw sectors. Signed-off-by: Rishabh Bhatnagar Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 70b1922c8953..e4230f2c98e8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2831,6 +2831,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_start_admin_queue(&dev->ctrl); } + dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); + /* * Limit the max command size to prevent iod->sg allocations going * over a single page. @@ -2843,7 +2845,6 @@ static void nvme_reset_work(struct work_struct *work) * Don't limit the IOMMU merged segment size. */ dma_set_max_seg_size(dev->dev, 0xffffffff); - dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); mutex_unlock(&dev->shutdown_lock); -- cgit From 6ee742fa8e5a7ee051604b16ec2ee6545461e794 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Sep 2022 14:01:07 -0700 Subject: nvme-pci: report the actual number of tagset maps We've been reporting 2 maps regardless of whether the module parameter asked for anything beyond the default queues. A consequence of this means that blk-mq will reinitialize the all the hardware contexts and io schedulers on every controller reset when the mapping is exactly the same as before. This unnecessary overhead is adding several milliseconds on a reset for environments that don't need it. Report the actual number of mappings in use. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e4230f2c98e8..8dd1dddbbbbf 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2526,9 +2526,11 @@ static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) set->ops = &nvme_mq_ops; set->nr_hw_queues = dev->online_queues - 1; - set->nr_maps = 2; /* default + read */ + set->nr_maps = 1; + if (dev->io_queues[HCTX_TYPE_READ]) + set->nr_maps = 2; if (dev->io_queues[HCTX_TYPE_POLL]) - set->nr_maps++; + set->nr_maps = 3; set->timeout = NVME_IO_TIMEOUT; set->numa_node = dev->ctrl.numa_node; set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; -- cgit From db94f240280c1da8ba1e679c422312676549570d Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 20 Sep 2022 21:16:17 +0800 Subject: nvmet-tcp: fix NULL pointer dereference during release nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(), and waits the inflight IO requests in nvmet_sq_destroy(). During wait the inflight IO requests, the callback nvmet_tcp_queue_response() is called from backend after IO complete, this leads a typical Use-After-Free issue like this: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 107f80067 P4D 107f80067 PUD 10789e067 PMD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 123 Comm: kworker/1:1H Kdump: loaded Tainted: G E 6.0.0-rc2.bm.1-amd64 #15 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp] RIP: 0010:shash_ahash_digest+0x2b/0x110 Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03 RSP: 0018:ffffc9000051bdd8 EFLAGS: 00010206 RAX: 0000000000001000 RBX: ffff888100ab5470 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff888100ab5470 RDI: ffff888100ab5420 RBP: ffff888100ab5420 R08: ffff8881024d08c8 R09: ffff888103e1b4b8 R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000 R13: 0000000000000000 R14: ffff88813412bd4c R15: ffff8881024d0800 FS: 0000000000000000(0000) GS:ffff88883fa40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000104b48000 CR4: 0000000000350ee0 Call Trace: nvmet_tcp_io_work+0xa52/0xb52 [nvmet_tcp] ? __switch_to+0x106/0x420 process_one_work+0x1ae/0x380 ? process_one_work+0x380/0x380 worker_thread+0x30/0x360 ? process_one_work+0x380/0x380 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Separate nvmet_tcp_uninit_data_in_cmds() into two steps: uninit data in cmds <- new step 1 nvmet_sq_destroy(); cancel_work_sync(&queue->io_work); free CMD buffers <- new step 2 Signed-off-by: zhenwei pi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c07de4f4f719..70baeab6af30 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1406,14 +1406,26 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) for (i = 0; i < queue->nr_cmds; i++, cmd++) { if (nvmet_tcp_need_data_in(cmd)) nvmet_req_uninit(&cmd->req); - - nvmet_tcp_free_cmd_buffers(cmd); } if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { /* failed in connect */ - nvmet_tcp_finish_cmd(&queue->connect); + nvmet_req_uninit(&queue->connect.req); + } +} + +static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmds; + int i; + + for (i = 0; i < queue->nr_cmds; i++, cmd++) { + if (nvmet_tcp_need_data_in(cmd)) + nvmet_tcp_free_cmd_buffers(cmd); } + + if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) + nvmet_tcp_free_cmd_buffers(&queue->connect); } static void nvmet_tcp_release_queue_work(struct work_struct *w) @@ -1434,6 +1446,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); cancel_work_sync(&queue->io_work); + nvmet_tcp_free_cmd_data_in_buffers(queue); sock_release(queue->sock); nvmet_tcp_free_cmds(queue); if (queue->hdr_digest || queue->data_digest) -- cgit From f614b937d850193588f161ff854a4e19940a5e43 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 21 Sep 2022 00:04:44 +0530 Subject: nvmet-tcp: handle ICReq PDU received in NVMET_TCP_Q_LIVE state As per NVMe/TCP transport specification ICReq PDU is the first PDU received by the controller and controller should receive only one ICReq PDU. If controller receives more than one ICReq PDU then this can be considered as fatal error. nvmet-tcp driver does not check for ICReq PDU opcode if queue state is NVMET_TCP_Q_LIVE. In LIVE state ICReq PDU is treated as CapsuleCmd PDU, this can result in abnormal behavior. Add a check for ICReq PDU in nvmet_tcp_done_recv_pdu() to fix this issue. Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 70baeab6af30..1762e2e90585 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -961,6 +961,13 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) return nvmet_tcp_handle_icreq(queue); } + if (unlikely(hdr->type == nvme_tcp_icreq)) { + pr_err("queue %d: received icreq pdu in state %d\n", + queue->idx, queue->state); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } + if (hdr->type == nvme_tcp_h2c_data) { ret = nvmet_tcp_handle_h2c_data_pdu(queue); if (unlikely(ret)) -- cgit From b6a545ffa2c192b1e6da4a7924edac5ba9f4ea2b Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 21 Sep 2022 00:06:49 +0530 Subject: nvmet-tcp: add bounds check on Transfer Tag ttag is used as an index to get cmd in nvmet_tcp_handle_h2c_data_pdu(), add a bounds check to avoid out-of-bounds access. Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 1762e2e90585..98e17ea184d6 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -920,10 +920,17 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; - if (likely(queue->nr_cmds)) + if (likely(queue->nr_cmds)) { + if (unlikely(data->ttag >= queue->nr_cmds)) { + pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", + queue->idx, data->ttag, queue->nr_cmds); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } cmd = &queue->cmds[data->ttag]; - else + } else { cmd = &queue->connect; + } if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { pr_err("ttag %u unexpected data offset %u (expected %u)\n", -- cgit From 0700542a823ba3d3aa9c699a255aecc23dbbcaff Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Thu, 22 Sep 2022 15:06:16 +0800 Subject: nvmet-tcp: remove nvmet_tcp_finish_cmd There is only a single call-site of nvmet_tcp_finish_cmd(), this becomes redundant. Remove nvmet_tcp_finish_cmd() and use the original function body instead. Suggested-by: Sagi Grimberg Signed-off-by: zhenwei pi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 98e17ea184d6..63d72b9cb28d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -164,7 +164,6 @@ static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, @@ -1177,7 +1176,8 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - nvmet_tcp_finish_cmd(cmd); + nvmet_req_uninit(&cmd->req); + nvmet_tcp_free_cmd_buffers(cmd); nvmet_tcp_fatal_error(queue); ret = -EPROTO; goto out; @@ -1406,12 +1406,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) -{ - nvmet_req_uninit(&cmd->req); - nvmet_tcp_free_cmd_buffers(cmd); -} - static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmds; -- cgit From 1befd944e05050d76950014f3dc04ed47faba2c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 15:37:18 +0200 Subject: nvmet-auth: don't try to cancel a non-initialized work_struct Currently blktests nvme/002 trips up debugobjects if CONFIG_NVME_AUTH is enabled, but authentication is not on a queue. This is because nvmet_auth_sq_free cancels sq->auth_expired_work unconditionaly, while auth_expired_work is only ever initialized if authentication is enabled for a given controller. Fix this by calling most of what is nvmet_init_auth unconditionally when initializing the SQ, and just do the setting of the result field in the connect command handler. Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/fabrics-cmd-auth.c | 13 ++++--------- drivers/nvme/target/fabrics-cmd.c | 6 ++++-- drivers/nvme/target/nvmet.h | 7 ++++--- 4 files changed, 13 insertions(+), 14 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a1345790005f..8e3cf0c3588c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -830,6 +830,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) } init_completion(&sq->free_done); init_completion(&sq->confirm_done); + nvmet_auth_sq_init(sq); return 0; } diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 84601e38a335..7970a7640e58 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -23,17 +23,12 @@ static void nvmet_auth_expired_work(struct work_struct *work) sq->dhchap_tid = -1; } -void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req) +void nvmet_auth_sq_init(struct nvmet_sq *sq) { - u32 result = le32_to_cpu(req->cqe->result.u32); - /* Initialize in-band authentication */ - INIT_DELAYED_WORK(&req->sq->auth_expired_work, - nvmet_auth_expired_work); - req->sq->authenticated = false; - req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; - result |= (u32)NVME_CONNECT_AUTHREQ_ATR << 16; - req->cqe->result.u32 = cpu_to_le32(result); + INIT_DELAYED_WORK(&sq->auth_expired_work, nvmet_auth_expired_work); + sq->authenticated = false; + sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; } static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index c1dfdfb92ebf..c7e903589d37 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -272,7 +272,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); if (nvmet_has_auth(ctrl)) - nvmet_init_auth(ctrl, req); + req->cqe->result.u32 |= + cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); out: kfree(d); complete: @@ -333,7 +334,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); if (nvmet_has_auth(ctrl)) - nvmet_init_auth(ctrl, req); + req->cqe->result.u32 |= + cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); out: kfree(d); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 6ffeeb0a1c49..dfe3894205aa 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -704,7 +704,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, bool set_ctrl); int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash); int nvmet_setup_auth(struct nvmet_ctrl *ctrl); -void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req); +void nvmet_auth_sq_init(struct nvmet_sq *sq); void nvmet_destroy_auth(struct nvmet_ctrl *ctrl); void nvmet_auth_sq_free(struct nvmet_sq *sq); int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id); @@ -726,8 +726,9 @@ static inline int nvmet_setup_auth(struct nvmet_ctrl *ctrl) { return 0; } -static inline void nvmet_init_auth(struct nvmet_ctrl *ctrl, - struct nvmet_req *req) {}; +static inline void nvmet_auth_sq_init(struct nvmet_sq *sq) +{ +} static inline void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) {}; static inline void nvmet_auth_sq_free(struct nvmet_sq *sq) {}; static inline bool nvmet_check_auth_status(struct nvmet_req *req) -- cgit From 1c32a8012b7fabe469b6af826edfd4ae2a6201d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 15:38:58 +0200 Subject: nvme: improve the NVME_CONNECT_AUTHREQ* definitions Mark them as unsigned so that we don't need extra casts, and define them relative to cdword0 instead of requiring extra shifts. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- drivers/nvme/target/fabrics-cmd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index c7e903589d37..618f7adca70f 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -272,8 +272,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= - cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); + req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); out: kfree(d); complete: @@ -334,8 +333,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= - cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); + req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); out: kfree(d); -- cgit From ab46d8d40f01487bf637428c4767f0e75ac2a95a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 16:09:57 +0200 Subject: nvmet: add helpers to set the result field for connect commands The code to set the result field for the admin and I/O connect commands is not only verbose and duplicated, but also violates the aliasing rules as it accesses both the u16 and u32 members in the union. Add a little helper to sort all that out. Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- drivers/nvme/target/fabrics-cmd.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 618f7adca70f..43b5bd8bb6a5 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -198,6 +198,12 @@ err: return ret; } +static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl) +{ + return (u32)ctrl->cntlid | + (nvmet_has_auth(ctrl) ? NVME_CONNECT_AUTHREQ_ATR : 0); +} + static void nvmet_execute_admin_connect(struct nvmet_req *req) { struct nvmf_connect_command *c = &req->cmd->connect; @@ -269,10 +275,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, ctrl->pi_support ? " T10-PI is enabled" : "", nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : ""); - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); - - if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); out: kfree(d); complete: @@ -328,13 +331,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) if (status) goto out_ctrl_put; - /* pass back cntlid for successful completion */ - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); - pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); - if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); - + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); out: kfree(d); complete: -- cgit From fe60e8c534118a288cd251a59d747cbf5c03e160 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Sep 2022 15:18:30 +0300 Subject: nvme: add common helpers to allocate and free tagsets Add common helpers to allocate and tear down the admin and I/O tag sets, including the special queues allocated with them. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 8 ++++ 2 files changed, 110 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0de2c227c1ab..f2786a6b1a85 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4796,6 +4796,108 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, } EXPORT_SYMBOL_GPL(nvme_complete_async_event); +int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size) +{ + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; + if (ctrl->ops->flags & NVME_F_FABRICS) + set->reserved_tags = NVMF_RESERVED_TAGS; + set->numa_node = ctrl->numa_node; + set->flags = flags; + set->cmd_size = cmd_size; + set->driver_data = ctrl; + set->nr_hw_queues = 1; + set->timeout = NVME_ADMIN_TIMEOUT; + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ret; + + ctrl->admin_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->admin_q)) { + ret = PTR_ERR(ctrl->admin_q); + goto out_free_tagset; + } + + if (ctrl->ops->flags & NVME_F_FABRICS) { + ctrl->fabrics_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->fabrics_q)) { + ret = PTR_ERR(ctrl->fabrics_q); + goto out_cleanup_admin_q; + } + } + + ctrl->admin_tagset = set; + return 0; + +out_cleanup_admin_q: + blk_mq_destroy_queue(ctrl->fabrics_q); +out_free_tagset: + blk_mq_free_tag_set(ctrl->admin_tagset); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set); + +void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl) +{ + blk_mq_destroy_queue(ctrl->admin_q); + if (ctrl->ops->flags & NVME_F_FABRICS) + blk_mq_destroy_queue(ctrl->fabrics_q); + blk_mq_free_tag_set(ctrl->admin_tagset); +} +EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set); + +int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size) +{ + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->queue_depth = ctrl->sqsize + 1; + set->reserved_tags = NVMF_RESERVED_TAGS; + set->numa_node = ctrl->numa_node; + set->flags = flags; + set->cmd_size = cmd_size, + set->driver_data = ctrl; + set->nr_hw_queues = ctrl->queue_count - 1; + set->timeout = NVME_IO_TIMEOUT; + if (ops->map_queues) + set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ret; + + if (ctrl->ops->flags & NVME_F_FABRICS) { + ctrl->connect_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->connect_q)) { + ret = PTR_ERR(ctrl->connect_q); + goto out_free_tag_set; + } + } + + ctrl->tagset = set; + return 0; + +out_free_tag_set: + blk_mq_free_tag_set(set); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_alloc_io_tag_set); + +void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl) +{ + if (ctrl->ops->flags & NVME_F_FABRICS) + blk_mq_destroy_queue(ctrl->connect_q); + blk_mq_free_tag_set(ctrl->tagset); +} +EXPORT_SYMBOL_GPL(nvme_remove_io_tag_set); + void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a8b054cd2ebb..56000a846a48 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -737,6 +737,14 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl); +int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size); +void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl); +int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size); +void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); -- cgit From fb8745d040ef5b9080003325e56b91fefe1022bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:23:24 +0200 Subject: nvme-tcp: remove the unused queue_size member in nvme_tcp_queue ->nvme_tcp_queue is not used anywhere, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/tcp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b5f22ceaae82..8a749ef63afe 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -134,7 +134,6 @@ struct nvme_tcp_queue { /* send state */ struct nvme_tcp_request *request; - int queue_size; u32 maxh2cdata; size_t cmnd_capsule_len; struct nvme_tcp_ctrl *ctrl; @@ -1479,8 +1478,7 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); } -static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, - int qid, size_t queue_size) +static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1492,7 +1490,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, INIT_LIST_HEAD(&queue->send_list); mutex_init(&queue->send_mutex); INIT_WORK(&queue->io_work, nvme_tcp_io_work); - queue->queue_size = queue_size; if (qid > 0) queue->cmnd_capsule_len = nctrl->ioccsz * 16; @@ -1785,7 +1782,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) { int ret; - ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); + ret = nvme_tcp_alloc_queue(ctrl, 0); if (ret) return ret; @@ -1805,7 +1802,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1); + ret = nvme_tcp_alloc_queue(ctrl, i); if (ret) goto out_free_queues; } -- cgit From 06427ca09b2f3103a60b384345c81cb784e19956 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:09:48 +0200 Subject: nvme-tcp: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/tcp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8a749ef63afe..863e985085d4 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -465,7 +465,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_tcp_ctrl *ctrl = set->driver_data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_tcp_cmd_pdu *pdu; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; @@ -489,7 +489,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1]; hctx->driver_data = queue; @@ -499,7 +499,7 @@ static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); struct nvme_tcp_queue *queue = &ctrl->queues[0]; hctx->driver_data = queue; @@ -1700,7 +1700,7 @@ static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *nctrl) set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = 1; set->timeout = NVME_ADMIN_TIMEOUT; ret = blk_mq_alloc_tag_set(set); @@ -1722,7 +1722,7 @@ static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *nctrl) set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; @@ -2486,7 +2486,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, static void nvme_tcp_map_queues(struct blk_mq_tag_set *set) { - struct nvme_tcp_ctrl *ctrl = set->driver_data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { -- cgit From de777825e462bc086dca9675312f1780b4b8c88e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:12:47 +0200 Subject: nvme-tcp: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_tcp_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/tcp.c | 101 ++++++++---------------------------------------- 1 file changed, 16 insertions(+), 85 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 863e985085d4..3e7b29d07c71 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1687,51 +1687,6 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) return ret; } -static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->admin_tag_set; - int ret; - - memset(set, 0, sizeof(*set)); - set->ops = &nvme_tcp_admin_mq_ops; - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_BLOCKING; - set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = 1; - set->timeout = NVME_ADMIN_TIMEOUT; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - nctrl->admin_tagset = set; - return ret; -} - -static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->tag_set; - int ret; - - memset(set, 0, sizeof(*set)); - set->ops = &nvme_tcp_mq_ops; - set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; - set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = nctrl->queue_count - 1; - set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - nctrl->tagset = set; - return ret; -} - static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) { if (to_tcp_ctrl(ctrl)->async_req.pdu) { @@ -1890,10 +1845,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_io_queues(ctrl); - if (remove) { - blk_mq_destroy_queue(ctrl->connect_q); - blk_mq_free_tag_set(ctrl->tagset); - } + if (remove) + nvme_remove_io_tag_set(ctrl); nvme_tcp_free_io_queues(ctrl); } @@ -1906,13 +1859,12 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return ret; if (new) { - ret = nvme_tcp_alloc_tag_set(ctrl); + ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, + &nvme_tcp_mq_ops, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING, + sizeof(struct nvme_tcp_request)); if (ret) goto out_free_io_queues; - - ret = nvme_ctrl_init_connect_q(ctrl); - if (ret) - goto out_free_tag_set; } /* @@ -1959,10 +1911,7 @@ out_wait_freeze_timed_out: out_cleanup_connect_q: nvme_cancel_tagset(ctrl); if (new) - blk_mq_destroy_queue(ctrl->connect_q); -out_free_tag_set: - if (new) - blk_mq_free_tag_set(ctrl->tagset); + nvme_remove_io_tag_set(ctrl); out_free_io_queues: nvme_tcp_free_io_queues(ctrl); return ret; @@ -1971,11 +1920,8 @@ out_free_io_queues: static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_queue(ctrl, 0); - if (remove) { - blk_mq_destroy_queue(ctrl->admin_q); - blk_mq_destroy_queue(ctrl->fabrics_q); - blk_mq_free_tag_set(ctrl->admin_tagset); - } + if (remove) + nvme_remove_admin_tag_set(ctrl); nvme_tcp_free_admin_queue(ctrl); } @@ -1988,26 +1934,17 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) return error; if (new) { - error = nvme_tcp_alloc_admin_tag_set(ctrl); + error = nvme_alloc_admin_tag_set(ctrl, + &to_tcp_ctrl(ctrl)->admin_tag_set, + &nvme_tcp_admin_mq_ops, BLK_MQ_F_BLOCKING, + sizeof(struct nvme_tcp_request)); if (error) goto out_free_queue; - - ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset); - if (IS_ERR(ctrl->fabrics_q)) { - error = PTR_ERR(ctrl->fabrics_q); - goto out_free_tagset; - } - - ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset); - if (IS_ERR(ctrl->admin_q)) { - error = PTR_ERR(ctrl->admin_q); - goto out_cleanup_fabrics_q; - } } error = nvme_tcp_start_queue(ctrl, 0); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; error = nvme_enable_ctrl(ctrl); if (error) @@ -2027,15 +1964,9 @@ out_quiesce_queue: out_stop_queue: nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); -out_cleanup_queue: - if (new) - blk_mq_destroy_queue(ctrl->admin_q); -out_cleanup_fabrics_q: - if (new) - blk_mq_destroy_queue(ctrl->fabrics_q); -out_free_tagset: +out_cleanup_tagset: if (new) - blk_mq_free_tag_set(ctrl->admin_tagset); + nvme_remove_admin_tag_set(ctrl); out_free_queue: nvme_tcp_free_admin_queue(ctrl); return error; -- cgit From 2d60738c8f80684d469302d60edb2101f5e4190b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:14:01 +0200 Subject: nvme-rdma: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4c6df34f9d7a..8bc2930fd496 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -295,7 +295,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; @@ -320,7 +320,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1]; BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); @@ -332,7 +332,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[0]; BUG_ON(hctx_idx != 0); @@ -801,7 +801,7 @@ static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl) set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + NVME_RDMA_DATA_SGL_SIZE; - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = 1; set->timeout = NVME_ADMIN_TIMEOUT; set->flags = BLK_MQ_F_NO_SCHED; @@ -828,7 +828,7 @@ static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl) if (nctrl->max_integrity_segments) set->cmd_size += sizeof(struct nvme_rdma_sgl) + NVME_RDMA_METADATA_SGL_SIZE; - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; @@ -2206,7 +2206,7 @@ static void nvme_rdma_complete_rq(struct request *rq) static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { -- cgit From cefa1032f1114845de305f34dbe4c0c96ed6de1c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:14:53 +0200 Subject: nvme-rdma: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_rdma_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/rdma.c | 133 ++++++++++++----------------------------------- 1 file changed, 34 insertions(+), 99 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8bc2930fd496..5ad0ab2853a4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -788,64 +788,21 @@ out_free_queues: return ret; } -static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl) +static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl) { - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->admin_tag_set; - int ret; + unsigned int cmd_size = sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_admin_mq_ops; - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = 1; - set->timeout = NVME_ADMIN_TIMEOUT; - set->flags = BLK_MQ_F_NO_SCHED; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.admin_tagset = set; - return ret; -} - -static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->tag_set; - int ret; + if (ctrl->max_integrity_segments) + cmd_size += sizeof(struct nvme_rdma_sgl) + + NVME_RDMA_METADATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_mq_ops; - set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - if (nctrl->max_integrity_segments) - set->cmd_size += sizeof(struct nvme_rdma_sgl) + - NVME_RDMA_METADATA_SGL_SIZE; - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = nctrl->queue_count - 1; - set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.tagset = set; - return ret; + return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set, + &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, cmd_size); } -static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, - bool remove) +static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) { - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); - } if (ctrl->async_event_sqe.data) { cancel_work_sync(&ctrl->ctrl.async_event_work); nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -887,26 +844,19 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, goto out_free_queue; if (new) { - error = nvme_rdma_alloc_admin_tag_set(&ctrl->ctrl); + error = nvme_alloc_admin_tag_set(&ctrl->ctrl, + &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops, + BLK_MQ_F_NO_SCHED, + sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE); if (error) goto out_free_async_qe; - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - error = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_tagset; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } } error = nvme_rdma_start_queue(ctrl, 0); if (error) - goto out_cleanup_queue; + goto out_remove_admin_tag_set; error = nvme_enable_ctrl(&ctrl->ctrl); if (error) @@ -933,15 +883,9 @@ out_quiesce_queue: out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); -out_cleanup_queue: +out_remove_admin_tag_set: if (new) - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - if (new) - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_tagset: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_async_qe: if (ctrl->async_event_sqe.data) { nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -953,16 +897,6 @@ out_free_queue: return error; } -static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, - bool remove) -{ - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(ctrl->ctrl.tagset); - } - nvme_rdma_free_io_queues(ctrl); -} - static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) { int ret, nr_queues; @@ -975,10 +909,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl); if (ret) goto out_free_io_queues; - - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tag_set; } /* @@ -989,7 +919,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count); ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues); if (ret) - goto out_cleanup_connect_q; + goto out_cleanup_tagset; if (!new) { nvme_start_queues(&ctrl->ctrl); @@ -1022,13 +952,10 @@ out_wait_freeze_timed_out: nvme_stop_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); -out_cleanup_connect_q: +out_cleanup_tagset: nvme_cancel_tagset(&ctrl->ctrl); if (new) - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.tagset); + nvme_remove_io_tag_set(&ctrl->ctrl); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; @@ -1041,9 +968,11 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_admin_queue(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, remove); + nvme_remove_admin_tag_set(&ctrl->ctrl); + } + nvme_rdma_destroy_admin_queue(ctrl); } static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, @@ -1055,9 +984,11 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_queues(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, remove); + nvme_remove_io_tag_set(&ctrl->ctrl); + } + nvme_rdma_free_io_queues(ctrl); } } @@ -1179,14 +1110,18 @@ destroy_io: nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, new); + if (new) + nvme_remove_io_tag_set(&ctrl->ctrl); + nvme_rdma_free_io_queues(ctrl); } destroy_admin: nvme_stop_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, new); + if (new) + nvme_remove_admin_tag_set(&ctrl->ctrl); + nvme_rdma_destroy_admin_queue(ctrl); return ret; } -- cgit From 18ecd97506ab27d446ad5f7292b620fef3116089 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:27:33 +0200 Subject: nvme-fc: keep ctrl->sqsize in sync with opts->queue_size Also update the sqsize field when capping the queue size, and remove the check a queue size that is larger than sqsize given that sqsize is only initialized from opts->queue_size. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart --- drivers/nvme/host/fc.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 42767fb75455..ee376111f561 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3165,15 +3165,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) "to maxcmd\n", opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; - } - - if (opts->queue_size > ctrl->ctrl.sqsize + 1) { - /* warn if sqsize is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl sqsize %u, reducing " - "to sqsize\n", - opts->queue_size, ctrl->ctrl.sqsize + 1); - opts->queue_size = ctrl->ctrl.sqsize + 1; + ctrl->ctrl.sqsize = opts->queue_size - 1; } ret = nvme_fc_init_aen_ops(ctrl); -- cgit From 1864ea46155ce5e12a3797532c531843688904cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:17:59 +0200 Subject: nvme-fc: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code and use the chance the cleanup the init_hctx methods a bit. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart --- drivers/nvme/host/fc.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ee376111f561..d707cf93f1f4 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1829,7 +1829,7 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - return __nvme_fc_exit_request(set->driver_data, op); + return __nvme_fc_exit_request(to_fc_ctrl(set->driver_data), op); } static int @@ -2135,7 +2135,7 @@ static int nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = set->driver_data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; @@ -2206,36 +2206,28 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) } } -static inline void -__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, - unsigned int qidx) +static inline int +__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int qidx) { + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(data); struct nvme_fc_queue *queue = &ctrl->queues[qidx]; hctx->driver_data = queue; queue->hctx = hctx; + return 0; } static int -nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int hctx_idx) +nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx + 1); } static int nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx); } static void @@ -2862,7 +2854,7 @@ nvme_fc_complete_rq(struct request *rq) static void nvme_fc_map_queues(struct blk_mq_tag_set *set) { - struct nvme_fc_ctrl *ctrl = set->driver_data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); int i; for (i = 0; i < set->nr_maps; i++) { @@ -2923,7 +2915,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz); - ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.driver_data = &ctrl->ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; @@ -3546,7 +3538,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz); - ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.driver_data = &ctrl->ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; -- cgit From 6dfba1c09c109f4a6ca12e156dbdbe27e0924862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:19:36 +0200 Subject: nvme-fc: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_fc_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart --- drivers/nvme/host/fc.c | 83 +++++++++++--------------------------------------- 1 file changed, 17 insertions(+), 66 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d707cf93f1f4..5d57a042dbca 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2383,10 +2383,8 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->ctrl.tagset) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - } + if (ctrl->ctrl.tagset) + nvme_remove_io_tag_set(&ctrl->ctrl); /* remove from rport list */ spin_lock_irqsave(&ctrl->rport->lock, flags); @@ -2394,9 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref) spin_unlock_irqrestore(&ctrl->rport->lock, flags); nvme_start_admin_queue(&ctrl->ctrl); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); kfree(ctrl->queues); @@ -2906,32 +2902,16 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz); - ctrl->tag_set.driver_data = &ctrl->ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, + &nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE, + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) return ret; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tag_set; - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) - goto out_cleanup_blk_queue; + goto out_cleanup_tagset; ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) @@ -2943,10 +2923,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); +out_cleanup_tagset: + nvme_remove_io_tag_set(&ctrl->ctrl); nvme_fc_free_io_queues(ctrl); /* force put free routine to ignore io queues */ @@ -3530,35 +3508,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, nvme_fc_init_queue(ctrl, 0); - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->admin_tag_set.cmd_size = - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz); - ctrl->admin_tag_set.driver_data = &ctrl->ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; - ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; - - ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_fc_admin_mq_ops, BLK_MQ_F_NO_SCHED, + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) goto out_free_queues; - ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; - - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - ret = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_admin_tag_set; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - ret = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } /* * Would have been nice to init io queues tag set as well. @@ -3569,7 +3524,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); if (ret) - goto out_cleanup_admin_q; + goto out_cleanup_tagset; /* at this point, teardown path changes to ref counting on nvme ctrl */ @@ -3624,12 +3579,8 @@ fail_ctrl: return ERR_PTR(-EIO); -out_cleanup_admin_q: - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_admin_tag_set: - blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_cleanup_tagset: + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_queues: kfree(ctrl->queues); out_free_ida: -- cgit From 379e0df5ab2c46e52e7ca2273d1238526631aa4f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:26:18 +0200 Subject: nvme-loop: initialize sqsize later Defer initializing the sqsize field from the options until it has been capped by MAXCMD. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9750a7fca268..ed6d36eb7d29 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -601,7 +601,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ret = -ENOMEM; - ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ctrl->port = nvme_loop_find_port(&ctrl->ctrl); @@ -621,6 +620,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; } + ctrl->ctrl.sqsize = opts->queue_size - 1; if (opts->nr_io_queues) { ret = nvme_loop_create_io_queues(ctrl); -- cgit From 2ade82213b7a35c761273fe0c982f4ab8707f30a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:20:46 +0200 Subject: nvme-loop: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/loop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index ed6d36eb7d29..54578cc18d52 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -204,7 +204,7 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_loop_ctrl *ctrl = set->driver_data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data); struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); nvme_req(req)->ctrl = &ctrl->ctrl; @@ -218,7 +218,7 @@ static struct lock_class_key loop_hctx_fq_lock_key; static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_loop_ctrl *ctrl = data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data); struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1]; BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); @@ -238,7 +238,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_loop_ctrl *ctrl = data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data); struct nvme_loop_queue *queue = &ctrl->queues[0]; BUG_ON(hctx_idx != 0); @@ -357,7 +357,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.driver_data = &ctrl->ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; @@ -530,7 +530,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.driver_data = &ctrl->ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ctrl->ctrl.tagset = &ctrl->tag_set; -- cgit From ceee1953f9239b699d80c5ef366712c3fd865cc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:21:17 +0200 Subject: nvme-loop: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_loop_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/loop.c | 83 +++++++++++----------------------------------- 1 file changed, 19 insertions(+), 64 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 54578cc18d52..b45fe3adf015 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -266,9 +266,7 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) return; nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) @@ -282,10 +280,8 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_loop_ctrl_mutex); - if (nctrl->tagset) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - } + if (nctrl->tagset) + nvme_remove_io_tag_set(nctrl); kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: @@ -350,52 +346,31 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = &ctrl->ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; - ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; - ctrl->queues[0].ctrl = ctrl; error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); if (error) return error; ctrl->ctrl.queue_count = 1; - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_loop_admin_mq_ops, BLK_MQ_F_NO_SCHED, + sizeof(struct nvme_loop_iod) + + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (error) goto out_free_sq; - ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; - - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - error = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_tagset; - } - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } /* reset stopped state for the fresh admin queue */ clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->ctrl.flags); error = nvmf_connect_admin_queue(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); error = nvme_enable_ctrl(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; ctrl->ctrl.max_hw_sectors = (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); @@ -404,17 +379,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; return 0; -out_cleanup_queue: +out_cleanup_tagset: clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_sq: nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); return error; @@ -522,37 +493,21 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) if (ret) return ret; - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_loop_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = &ctrl->ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, + &nvme_loop_mq_ops, BLK_MQ_F_SHOULD_MERGE, + sizeof(struct nvme_loop_iod) + + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (ret) goto out_destroy_queues; - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tagset; - ret = nvme_loop_connect_io_queues(ctrl); if (ret) - goto out_cleanup_connect_q; + goto out_cleanup_tagset; return 0; -out_cleanup_connect_q: - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->tag_set); +out_cleanup_tagset: + nvme_remove_io_tag_set(&ctrl->ctrl); out_destroy_queues: nvme_loop_destroy_io_queues(ctrl); return ret; -- cgit From fe6f04c079d0e4b38178955e0ea9d3fbc5d4066b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:50:18 +0200 Subject: nvme: remove nvme_ctrl_init_connect_q Unused now. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/nvme.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 56000a846a48..7902c5245363 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -990,14 +990,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif -static inline int nvme_ctrl_init_connect_q(struct nvme_ctrl *ctrl) -{ - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) - return PTR_ERR(ctrl->connect_q); - return 0; -} - static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; -- cgit From 8df20252c06046ef4c68107bcaaca56c21028d8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Sep 2022 10:24:07 +0200 Subject: nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all nvmet is a consumer of the block layer and should not directly look at the request_queue. Just use the NUMA node ID from the gendisk instead of the request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/target/zns.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index c7ef69f29fe4..7e89056cc411 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -387,7 +387,6 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) { struct block_device *bdev = req->ns->bdev; unsigned int nr_zones = bdev_nr_zones(bdev); - struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = NULL; sector_t sector = 0; int ret; @@ -396,7 +395,7 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) }; d.zbitmap = kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(*(d.zbitmap)), - GFP_NOIO, q->node); + GFP_NOIO, bdev->bd_disk->node_id); if (!d.zbitmap) { ret = -ENOMEM; goto out; -- cgit From 84fe64f898913ef69f70a8d91aea613b5722b63b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Sep 2022 10:26:26 +0200 Subject: nvmet: don't look at the request_queue in nvmet_bdev_set_limits nvmet is a consumer of the block layer and should not directly look at the request_queue. Use the bdev_ helpers to retrieve the device limits instead. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/target/io-cmd-bdev.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 8d527a8c0f54..c2d6cea0236b 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -12,11 +12,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) { - const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; - /* Number of logical blocks per physical block. */ - const u32 lpp = ql->physical_block_size / ql->logical_block_size; /* Logical blocks per physical block, 0's based. */ - const __le16 lpp0b = to0based(lpp); + const __le16 lpp0b = to0based(bdev_physical_block_size(bdev) / + bdev_logical_block_size(bdev)); /* * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -42,11 +40,12 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ - id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); + id->npdg = to0based(bdev_discard_granularity(bdev) / + bdev_logical_block_size(bdev)); /* NPDG = Namespace Preferred Deallocate Alignment */ id->npda = id->npdg; /* NOWS = Namespace Optimal Write Size */ - id->nows = to0based(ql->io_opt / ql->logical_block_size); + id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev)); } void nvmet_bdev_ns_disable(struct nvmet_ns *ns) -- cgit