diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bdev.c | 23 | ||||
-rw-r--r-- | block/blk-core.c | 4 | ||||
-rw-r--r-- | block/blk-flush.c | 11 | ||||
-rw-r--r-- | block/blk-lib.c | 11 | ||||
-rw-r--r-- | block/blk-map.c | 150 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 115 | ||||
-rw-r--r-- | block/genhd.c | 3 | ||||
-rw-r--r-- | block/partitions/core.c | 3 |
9 files changed, 275 insertions, 47 deletions
diff --git a/block/bdev.c b/block/bdev.c index ce05175e71ce..d699ecdb3260 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -26,6 +26,7 @@ #include <linux/namei.h> #include <linux/part_stat.h> #include <linux/uaccess.h> +#include <linux/stat.h> #include "../fs/internal.h" #include "blk.h" @@ -1069,3 +1070,25 @@ void sync_bdevs(bool wait) spin_unlock(&blockdev_superblock->s_inode_list_lock); iput(old_inode); } + +/* + * Handle STATX_DIOALIGN for block devices. + * + * Note that the inode passed to this is the inode of a block device node file, + * not the block device's internal inode. Therefore it is *not* valid to use + * I_BDEV() here; the block device has to be looked up by i_rdev instead. + */ +void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) +{ + struct block_device *bdev; + + bdev = blkdev_get_no_open(inode->i_rdev); + if (!bdev) + return; + + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; + stat->dio_offset_align = bdev_logical_block_size(bdev); + stat->result_mask |= STATX_DIOALIGN; + + blkdev_put_no_open(bdev); +} diff --git a/block/blk-core.c b/block/blk-core.c index 4bfc0d504b2d..17667159482e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -294,7 +294,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) while (!blk_try_enter_queue(q, pm)) { if (flags & BLK_MQ_REQ_NOWAIT) - return -EBUSY; + return -EAGAIN; /* * read pair of barrier in blk_freeze_queue_start(), we need to @@ -324,7 +324,7 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio) if (test_bit(GD_DEAD, &disk->state)) goto dead; bio_wouldblock_error(bio); - return -EBUSY; + return -EAGAIN; } /* diff --git a/block/blk-flush.c b/block/blk-flush.c index d20a0c6b2c66..53202eff545e 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -205,7 +205,6 @@ static void blk_flush_complete_seq(struct request *rq, * flush data request completion path. Restore @rq for * normal completion and end it. */ - BUG_ON(!list_empty(&rq->queuelist)); list_del_init(&rq->flush.list); blk_flush_restore_request(rq); blk_mq_end_request(rq, error); @@ -218,7 +217,8 @@ static void blk_flush_complete_seq(struct request *rq, blk_kick_flush(q, fq, cmd_flags); } -static void flush_end_io(struct request *flush_rq, blk_status_t error) +static enum rq_end_io_ret flush_end_io(struct request *flush_rq, + blk_status_t error) { struct request_queue *q = flush_rq->q; struct list_head *running; @@ -232,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) if (!req_ref_put_and_test(flush_rq)) { fq->rq_status = error; spin_unlock_irqrestore(&fq->mq_flush_lock, flags); - return; + return RQ_END_IO_NONE; } blk_account_io_flush(flush_rq); @@ -269,6 +269,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) } spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + return RQ_END_IO_NONE; } bool is_flush_rq(struct request *rq) @@ -354,7 +355,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, blk_flush_queue_rq(flush_rq, false); } -static void mq_flush_data_end_io(struct request *rq, blk_status_t error) +static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq, + blk_status_t error) { struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; @@ -376,6 +378,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) spin_unlock_irqrestore(&fq->mq_flush_lock, flags); blk_mq_sched_restart(hctx); + return RQ_END_IO_NONE; } /** diff --git a/block/blk-lib.c b/block/blk-lib.c index 67e6dbc1ae81..e59c3069e835 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -309,6 +309,11 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, struct blk_plug plug; int ret = 0; + /* make sure that "len << SECTOR_SHIFT" doesn't overflow */ + if (max_sectors > UINT_MAX >> SECTOR_SHIFT) + max_sectors = UINT_MAX >> SECTOR_SHIFT; + max_sectors &= ~bs_mask; + if (max_sectors == 0) return -EOPNOTSUPP; if ((sector | nr_sects) & bs_mask) @@ -322,10 +327,10 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp); bio->bi_iter.bi_sector = sector; - bio->bi_iter.bi_size = len; + bio->bi_iter.bi_size = len << SECTOR_SHIFT; - sector += len << SECTOR_SHIFT; - nr_sects -= len << SECTOR_SHIFT; + sector += len; + nr_sects -= len; if (!nr_sects) { ret = submit_bio_wait(bio); bio_put(bio); diff --git a/block/blk-map.c b/block/blk-map.c index 7693f8e3c454..34735626b00f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -231,7 +231,7 @@ out_bmd: return ret; } -static void bio_map_put(struct bio *bio) +static void blk_mq_map_bio_put(struct bio *bio) { if (bio->bi_opf & REQ_ALLOC_CACHE) { bio_put(bio); @@ -241,17 +241,10 @@ static void bio_map_put(struct bio *bio) } } -static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, - gfp_t gfp_mask) +static struct bio *blk_rq_map_bio_alloc(struct request *rq, + unsigned int nr_vecs, gfp_t gfp_mask) { - unsigned int max_sectors = queue_max_hw_sectors(rq->q); - unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); struct bio *bio; - int ret; - int j; - - if (!iov_iter_count(iter)) - return -EINVAL; if (rq->cmd_flags & REQ_POLLED) { blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE; @@ -259,13 +252,31 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask, &fs_bio_set); if (!bio) - return -ENOMEM; + return NULL; } else { bio = bio_kmalloc(nr_vecs, gfp_mask); if (!bio) - return -ENOMEM; + return NULL; bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); } + return bio; +} + +static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, + gfp_t gfp_mask) +{ + unsigned int max_sectors = queue_max_hw_sectors(rq->q); + unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); + struct bio *bio; + int ret; + int j; + + if (!iov_iter_count(iter)) + return -EINVAL; + + bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); + if (bio == NULL) + return -ENOMEM; while (iov_iter_count(iter)) { struct page **pages, *stack_pages[UIO_FASTIOV]; @@ -331,7 +342,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, out_unmap: bio_release_pages(bio, false); - bio_map_put(bio); + blk_mq_map_bio_put(bio); return ret; } @@ -537,6 +548,62 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(blk_rq_append_bio); +/* Prepare bio for passthrough IO given ITER_BVEC iter */ +static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) +{ + struct request_queue *q = rq->q; + size_t nr_iter = iov_iter_count(iter); + size_t nr_segs = iter->nr_segs; + struct bio_vec *bvecs, *bvprvp = NULL; + struct queue_limits *lim = &q->limits; + unsigned int nsegs = 0, bytes = 0; + struct bio *bio; + size_t i; + + if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q)) + return -EINVAL; + if (nr_segs > queue_max_segments(q)) + return -EINVAL; + + /* no iovecs to alloc, as we already have a BVEC iterator */ + bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); + if (bio == NULL) + return -ENOMEM; + + bio_iov_bvec_set(bio, (struct iov_iter *)iter); + blk_rq_bio_prep(rq, bio, nr_segs); + + /* loop to perform a bunch of sanity checks */ + bvecs = (struct bio_vec *)iter->bvec; + for (i = 0; i < nr_segs; i++) { + struct bio_vec *bv = &bvecs[i]; + + /* + * If the queue doesn't support SG gaps and adding this + * offset would create a gap, fallback to copy. + */ + if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) { + blk_mq_map_bio_put(bio); + return -EREMOTEIO; + } + /* check full condition */ + if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len) + goto put_bio; + if (bytes + bv->bv_len > nr_iter) + goto put_bio; + if (bv->bv_offset + bv->bv_len > PAGE_SIZE) + goto put_bio; + + nsegs++; + bytes += bv->bv_len; + bvprvp = bv; + } + return 0; +put_bio: + blk_mq_map_bio_put(bio); + return -EINVAL; +} + /** * blk_rq_map_user_iov - map user data to a request, for passthrough requests * @q: request queue where request should be inserted @@ -556,24 +623,35 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, const struct iov_iter *iter, gfp_t gfp_mask) { - bool copy = false; + bool copy = false, map_bvec = false; unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); struct bio *bio = NULL; struct iov_iter i; int ret = -EINVAL; - if (!iter_is_iovec(iter)) - goto fail; - if (map_data) copy = true; else if (blk_queue_may_bounce(q)) copy = true; else if (iov_iter_alignment(iter) & align) copy = true; + else if (iov_iter_is_bvec(iter)) + map_bvec = true; + else if (!iter_is_iovec(iter)) + copy = true; else if (queue_virt_boundary(q)) copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); + if (map_bvec) { + ret = blk_rq_map_user_bvec(rq, iter); + if (!ret) + return 0; + if (ret != -EREMOTEIO) + goto fail; + /* fall back to copying the data on limits mismatches */ + copy = true; + } + i = *iter; do { if (copy) @@ -611,6 +689,42 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_user); +int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data, + void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask, + bool vec, int iov_count, bool check_iter_count, int rw) +{ + int ret = 0; + + if (vec) { + struct iovec fast_iov[UIO_FASTIOV]; + struct iovec *iov = fast_iov; + struct iov_iter iter; + + ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len, + UIO_FASTIOV, &iov, &iter); + if (ret < 0) + return ret; + + if (iov_count) { + /* SG_IO howto says that the shorter of the two wins */ + iov_iter_truncate(&iter, buf_len); + if (check_iter_count && !iov_iter_count(&iter)) { + kfree(iov); + return -EINVAL; + } + } + + ret = blk_rq_map_user_iov(req->q, req, map_data, &iter, + gfp_mask); + kfree(iov); + } else if (buf_len) { + ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len, + gfp_mask); + } + return ret; +} +EXPORT_SYMBOL(blk_rq_map_user_io); + /** * blk_rq_unmap_user - unmap a request with user data * @bio: start of bio list @@ -636,7 +750,7 @@ int blk_rq_unmap_user(struct bio *bio) next_bio = bio; bio = bio->bi_next; - bio_map_put(next_bio); + blk_mq_map_bio_put(next_bio); } return ret; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 2bb6f8cdc3b5..bd942341b638 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -283,7 +283,9 @@ static const char *const rqf_name[] = { RQF_NAME(SPECIAL_PAYLOAD), RQF_NAME(ZONE_WRITE_LOCKED), RQF_NAME(MQ_POLL_SLEPT), + RQF_NAME(TIMED_OUT), RQF_NAME(ELV), + RQF_NAME(RESV), }; #undef RQF_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index 9dd3ec42613f..8070b6c10e8d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -510,25 +510,87 @@ retry: alloc_time_ns); } -struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, - blk_mq_req_flags_t flags) +static struct request *blk_mq_rq_cache_fill(struct request_queue *q, + struct blk_plug *plug, + blk_opf_t opf, + blk_mq_req_flags_t flags) { struct blk_mq_alloc_data data = { .q = q, .flags = flags, .cmd_flags = opf, - .nr_tags = 1, + .nr_tags = plug->nr_ios, + .cached_rq = &plug->cached_rq, }; struct request *rq; - int ret; - ret = blk_queue_enter(q, flags); - if (ret) - return ERR_PTR(ret); + if (blk_queue_enter(q, flags)) + return NULL; + + plug->nr_ios = 1; rq = __blk_mq_alloc_requests(&data); - if (!rq) - goto out_queue_exit; + if (unlikely(!rq)) + blk_queue_exit(q); + return rq; +} + +static struct request *blk_mq_alloc_cached_request(struct request_queue *q, + blk_opf_t opf, + blk_mq_req_flags_t flags) +{ + struct blk_plug *plug = current->plug; + struct request *rq; + + if (!plug) + return NULL; + if (rq_list_empty(plug->cached_rq)) { + if (plug->nr_ios == 1) + return NULL; + rq = blk_mq_rq_cache_fill(q, plug, opf, flags); + if (rq) + goto got_it; + return NULL; + } + rq = rq_list_peek(&plug->cached_rq); + if (!rq || rq->q != q) + return NULL; + + if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) + return NULL; + if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) + return NULL; + + plug->cached_rq = rq_list_next(rq); +got_it: + rq->cmd_flags = opf; + INIT_LIST_HEAD(&rq->queuelist); + return rq; +} + +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, + blk_mq_req_flags_t flags) +{ + struct request *rq; + + rq = blk_mq_alloc_cached_request(q, opf, flags); + if (!rq) { + struct blk_mq_alloc_data data = { + .q = q, + .flags = flags, + .cmd_flags = opf, + .nr_tags = 1, + }; + int ret; + + ret = blk_queue_enter(q, flags); + if (ret) + return ERR_PTR(ret); + + rq = __blk_mq_alloc_requests(&data); + if (!rq) + goto out_queue_exit; + } rq->__data_len = 0; rq->__sector = (sector_t) -1; rq->bio = rq->biotail = NULL; @@ -761,8 +823,10 @@ static void blk_complete_request(struct request *req) * can find how many bytes remain in the request * later. */ - req->bio = NULL; - req->__data_len = 0; + if (!req->end_io) { + req->bio = NULL; + req->__data_len = 0; + } } /** @@ -939,7 +1003,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) if (rq->end_io) { rq_qos_done(rq->q, rq); - rq->end_io(rq, error); + if (rq->end_io(rq, error) == RQ_END_IO_FREE) + blk_mq_free_request(rq); } else { blk_mq_free_request(rq); } @@ -992,6 +1057,13 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) rq_qos_done(rq->q, rq); + /* + * If end_io handler returns NONE, then it still has + * ownership of the request. + */ + if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE) + continue; + WRITE_ONCE(rq->state, MQ_RQ_IDLE); if (!req_ref_put_and_test(rq)) continue; @@ -1233,15 +1305,16 @@ struct blk_rq_wait { blk_status_t ret; }; -static void blk_end_sync_rq(struct request *rq, blk_status_t ret) +static enum rq_end_io_ret blk_end_sync_rq(struct request *rq, blk_status_t ret) { struct blk_rq_wait *wait = rq->end_io_data; wait->ret = ret; complete(&wait->done); + return RQ_END_IO_NONE; } -static bool blk_rq_is_poll(struct request *rq) +bool blk_rq_is_poll(struct request *rq) { if (!rq->mq_hctx) return false; @@ -1251,6 +1324,7 @@ static bool blk_rq_is_poll(struct request *rq) return false; return true; } +EXPORT_SYMBOL_GPL(blk_rq_is_poll); static void blk_rq_poll_completion(struct request *rq, struct completion *wait) { @@ -1471,10 +1545,12 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) void blk_mq_put_rq_ref(struct request *rq) { - if (is_flush_rq(rq)) - rq->end_io(rq, 0); - else if (req_ref_put_and_test(rq)) + if (is_flush_rq(rq)) { + if (rq->end_io(rq, 0) == RQ_END_IO_FREE) + blk_mq_free_request(rq); + } else if (req_ref_put_and_test(rq)) { __blk_mq_free_request(rq); + } } static bool blk_mq_check_expired(struct request *rq, void *priv) @@ -1939,7 +2015,8 @@ out: /* If we didn't flush the entire list, we could have told the driver * there was more coming, but that turned out to be a lie. */ - if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued) + if ((!list_empty(list) || errors || needs_resource || + ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued) q->mq_ops->commit_rqs(hctx); /* * Any items that need requeuing? Stuff them into hctx->dispatch, @@ -2668,6 +2745,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, list_del_init(&rq->queuelist); ret = blk_mq_request_issue_directly(rq, list_empty(list)); if (ret != BLK_STS_OK) { + errors++; if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { blk_mq_request_bypass_insert(rq, false, @@ -2675,7 +2753,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; } blk_mq_end_request(rq, ret); - errors++; } else queued++; } diff --git a/block/genhd.c b/block/genhd.c index dc9b61dfb692..17b33c62423d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -609,7 +609,6 @@ void del_gendisk(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(q); - blk_mq_freeze_queue_wait(q); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -633,6 +632,8 @@ void del_gendisk(struct gendisk *disk) pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); + blk_mq_freeze_queue_wait(q); + blk_throtl_cancel_bios(disk); blk_sync_queue(q); diff --git a/block/partitions/core.c b/block/partitions/core.c index fc1d70384825..b8112f52d388 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -596,6 +596,9 @@ static int blk_add_partitions(struct gendisk *disk) if (disk->flags & GENHD_FL_NO_PART) return 0; + if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state)) + return 0; + state = check_partition(disk); if (!state) return 0; |