diff options
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r-- | drivers/nvme/host/core.c | 147 |
1 files changed, 133 insertions, 14 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 13676f6cd4f6..59f80a613fd8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -185,17 +185,22 @@ static inline bool nvme_req_needs_retry(struct request *req) return false; if (nvme_req(req)->retries >= nvme_max_retries) return false; - if (blk_queue_dying(req->q)) - return false; return true; } void nvme_complete_rq(struct request *req) { if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { - nvme_req(req)->retries++; - blk_mq_requeue_request(req, true); - return; + if (nvme_req_needs_failover(req)) { + nvme_failover_req(req); + return; + } + + if (!blk_queue_dying(req->q)) { + nvme_req(req)->retries++; + blk_mq_requeue_request(req, true); + return; + } } blk_mq_end_request(req, nvme_error_status(req)); @@ -286,7 +291,8 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ctrl->state = new_state; spin_unlock_irqrestore(&ctrl->lock, flags); - + if (changed && ctrl->state == NVME_CTRL_LIVE) + nvme_kick_requeue_lists(ctrl); return changed; } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); @@ -296,6 +302,7 @@ static void nvme_free_ns_head(struct kref *ref) struct nvme_ns_head *head = container_of(ref, struct nvme_ns_head, ref); + nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); @@ -1138,11 +1145,33 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return status; } -static int nvme_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) +/* + * Issue ioctl requests on the first available path. Note that unlike normal + * block layer requests we will not retry failed request on another controller. + */ +static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, + struct nvme_ns_head **head, int *srcu_idx) { - struct nvme_ns *ns = bdev->bd_disk->private_data; +#ifdef CONFIG_NVME_MULTIPATH + if (disk->fops == &nvme_ns_head_ops) { + *head = disk->private_data; + *srcu_idx = srcu_read_lock(&(*head)->srcu); + return nvme_find_path(*head); + } +#endif + *head = NULL; + *srcu_idx = -1; + return disk->private_data; +} + +static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) +{ + if (head) + srcu_read_unlock(&head->srcu, idx); +} +static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg) +{ switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); @@ -1165,10 +1194,31 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, } } +static int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns_head *head = NULL; + struct nvme_ns *ns; + int srcu_idx, ret; + + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + ret = -EWOULDBLOCK; + else + ret = nvme_ns_ioctl(ns, cmd, arg); + nvme_put_ns_from_disk(head, srcu_idx); + return ret; +} + static int nvme_open(struct block_device *bdev, fmode_t mode) { struct nvme_ns *ns = bdev->bd_disk->private_data; +#ifdef CONFIG_NVME_MULTIPATH + /* should never be called due to GENHD_FL_HIDDEN */ + if (WARN_ON_ONCE(ns->head->disk)) + return -ENXIO; +#endif if (!kref_get_unless_zero(&ns->kref)) return -ENXIO; return 0; @@ -1329,6 +1379,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->noiob) nvme_set_chunk_size(ns); nvme_update_disk_info(disk, ns, id); +#ifdef CONFIG_NVME_MULTIPATH + if (ns->head->disk) + nvme_update_disk_info(ns->head->disk, ns, id); +#endif } static int nvme_revalidate_disk(struct gendisk *disk) @@ -1388,8 +1442,10 @@ static char nvme_pr_type(enum pr_type type) static int nvme_pr_command(struct block_device *bdev, u32 cdw10, u64 key, u64 sa_key, u8 op) { - struct nvme_ns *ns = bdev->bd_disk->private_data; + struct nvme_ns_head *head = NULL; + struct nvme_ns *ns; struct nvme_command c; + int srcu_idx, ret; u8 data[16] = { 0, }; put_unaligned_le64(key, &data[0]); @@ -1397,10 +1453,16 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, memset(&c, 0, sizeof(c)); c.common.opcode = op; - c.common.nsid = cpu_to_le32(ns->head->ns_id); + c.common.nsid = cpu_to_le32(head->ns_id); c.common.cdw10[0] = cpu_to_le32(cdw10); - return nvme_submit_sync_cmd(ns->queue, &c, data, 16); + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + ret = -EWOULDBLOCK; + else + ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); + nvme_put_ns_from_disk(head, srcu_idx); + return ret; } static int nvme_pr_register(struct block_device *bdev, u64 old, @@ -1490,6 +1552,32 @@ static const struct block_device_operations nvme_fops = { .pr_ops = &nvme_pr_ops, }; +#ifdef CONFIG_NVME_MULTIPATH +static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) +{ + struct nvme_ns_head *head = bdev->bd_disk->private_data; + + if (!kref_get_unless_zero(&head->ref)) + return -ENXIO; + return 0; +} + +static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) +{ + nvme_put_ns_head(disk->private_data); +} + +const struct block_device_operations nvme_ns_head_ops = { + .owner = THIS_MODULE, + .open = nvme_ns_head_open, + .release = nvme_ns_head_release, + .ioctl = nvme_ioctl, + .compat_ioctl = nvme_ioctl, + .getgeo = nvme_getgeo, + .pr_ops = &nvme_pr_ops, +}; +#endif /* CONFIG_NVME_MULTIPATH */ + static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) { unsigned long timeout = @@ -2592,6 +2680,10 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_cleanup_srcu; } + ret = nvme_mpath_alloc_disk(ctrl, head); + if (ret) + goto out_cleanup_srcu; + list_add_tail(&head->entry, &ctrl->subsys->nsheads); return head; out_cleanup_srcu: @@ -2704,7 +2796,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct gendisk *disk; struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; - int node = dev_to_node(ctrl->dev); + int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT; bool new = true; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); @@ -2735,7 +2827,30 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_init_ns_head(ns, nsid, id, &new)) goto out_free_id; +#ifdef CONFIG_NVME_MULTIPATH + /* + * If multipathing is enabled we need to always use the subsystem + * instance number for numbering our devices to avoid conflicts + * between subsystems that have multiple controllers and thus use + * the multipath-aware subsystem node and those that have a single + * controller and use the controller node directly. + */ + if (ns->head->disk) { + sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->cntlid, ns->head->instance); + flags = GENHD_FL_HIDDEN; + } else { + sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } +#else + /* + * But without the multipath code enabled, multiple controller per + * subsystems are visible as devices and thus we cannot use the + * subsystem instance. + */ sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); +#endif if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { @@ -2751,7 +2866,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) disk->fops = &nvme_fops; disk->private_data = ns; disk->queue = ns->queue; - disk->flags = GENHD_FL_EXT_DEVT; + disk->flags = flags; memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; @@ -2773,6 +2888,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ns->ndev && nvme_nvm_register_sysfs(ns)) pr_warn("%s: failed to register lightnvm sysfs group for identification\n", ns->disk->disk_name); + + if (new) + nvme_mpath_add_disk(ns->head); return; out_unlink_ns: mutex_lock(&ctrl->subsys->lock); @@ -2805,6 +2923,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) } mutex_lock(&ns->ctrl->subsys->lock); + nvme_mpath_clear_current_path(ns); if (head) list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); |