diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/brd.c | 6 | ||||
-rw-r--r-- | drivers/block/loop.c | 42 | ||||
-rw-r--r-- | drivers/block/loop.h | 1 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 2 | ||||
-rw-r--r-- | drivers/block/ps3vram.c | 10 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 16 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 10 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 6 | ||||
-rw-r--r-- | drivers/block/zram/Kconfig | 12 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 540 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.h | 11 |
12 files changed, 521 insertions, 137 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 8ddc98279c8f..80aaf3420e12 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -470,7 +470,7 @@ config VIRTIO_BLK depends on VIRTIO ---help--- This is the virtual block driver for virtio. It can be used with - lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. + QEMU based VMMs (like KVM or Xen). Say Y or M. config VIRTIO_BLK_SCSI bool "SCSI passthrough request for the Virtio block driver" diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 104b71c0490d..5d9ed0616413 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -326,7 +326,11 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, struct page *page, bool is_write) { struct brd_device *brd = bdev->bd_disk->private_data; - int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); + int err; + + if (PageTransHuge(page)) + return -ENOTSUPP; + err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); page_endio(page, is_write, err); return err; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ef8334949b42..f321b96405f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -221,8 +221,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) } static int -figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, - loff_t logical_blocksize) +figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; @@ -234,12 +233,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, lo->lo_offset = offset; if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) { - lo->lo_logical_blocksize = logical_blocksize; - blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize); - blk_queue_logical_block_size(lo->lo_queue, - lo->lo_logical_blocksize); - } set_capacity(lo->lo_disk, x); bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); /* let user-space know about the new size */ @@ -820,7 +813,6 @@ static void loop_config_discard(struct loop_device *lo) struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; - int lo_bits = 9; /* * We use punch hole to reclaim the free space used by the @@ -840,11 +832,9 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = inode->i_sb->s_blocksize; q->limits.discard_alignment = 0; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) - lo_bits = blksize_bits(lo->lo_logical_blocksize); - blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits); - blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits); + blk_queue_max_discard_sectors(q, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } @@ -938,7 +928,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->use_dio = false; lo->lo_blocksize = lo_blocksize; - lo->lo_logical_blocksize = 512; lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; @@ -1104,7 +1093,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); - int lo_flags = lo->lo_flags; if (lo->lo_encrypt_key_size && !uid_eq(lo->lo_key_owner, uid) && @@ -1137,26 +1125,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (err) goto exit; - if (info->lo_flags & LO_FLAGS_BLOCKSIZE) { - if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE)) - lo->lo_logical_blocksize = 512; - lo->lo_flags |= LO_FLAGS_BLOCKSIZE; - if (LO_INFO_BLOCKSIZE(info) != 512 && - LO_INFO_BLOCKSIZE(info) != 1024 && - LO_INFO_BLOCKSIZE(info) != 2048 && - LO_INFO_BLOCKSIZE(info) != 4096) - return -EINVAL; - if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize) - return -EINVAL; - } - if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit || - lo->lo_flags != lo_flags || - ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) && - lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) { - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit, - LO_INFO_BLOCKSIZE(info))) { + lo->lo_sizelimit != info->lo_sizelimit) { + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto exit; } @@ -1348,8 +1319,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit, - lo->lo_logical_blocksize); + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int loop_set_dio(struct loop_device *lo, unsigned long arg) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 2c096b9a17b8..fecd3f97ef8c 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -49,7 +49,6 @@ struct loop_device { struct file * lo_backing_file; struct block_device *lo_device; unsigned lo_blocksize; - unsigned lo_logical_blocksize; void *key_data; gfp_t old_gfp_mask; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 85c24cace973..81142ce781da 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -13,7 +13,7 @@ struct nullb_cmd { struct list_head list; struct llist_node ll_list; - struct call_single_data csd; + call_single_data_t csd; struct request *rq; struct bio *bio; unsigned int tag; diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index e0e81cacd781..6a55959cbf78 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -409,10 +409,8 @@ static int ps3vram_cache_init(struct ps3_system_bus_device *dev) priv->cache.page_size = CACHE_PAGE_SIZE; priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) * CACHE_PAGE_COUNT, GFP_KERNEL); - if (priv->cache.tags == NULL) { - dev_err(&dev->core, "Could not allocate cache tags\n"); + if (!priv->cache.tags) return -ENOMEM; - } dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n", CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024); @@ -743,7 +741,11 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) goto out_unmap_reports; } - ps3vram_cache_init(dev); + error = ps3vram_cache_init(dev); + if (error < 0) { + goto out_unmap_reports; + } + ps3vram_proc_init(dev); queue = blk_alloc_queue(GFP_KERNEL); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1498b899a593..d3d5523862c2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work) struct request_queue *q = vblk->disk->queue; char cap_str_2[10], cap_str_10[10]; char *envp[] = { "RESIZE=1", NULL }; + unsigned long long nblocks; u64 capacity; /* Host must always specify the capacity. */ @@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work) capacity = (sector_t)-1; } - string_get_size(capacity, queue_logical_block_size(q), + nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); + + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); - string_get_size(capacity, queue_logical_block_size(q), + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); dev_notice(&vdev->dev, - "new size: %llu %d-byte logical blocks (%s/%s)\n", - (unsigned long long)capacity, - queue_logical_block_size(q), - cap_str_10, cap_str_2); + "new size: %llu %d-byte logical blocks (%s/%s)\n", + nblocks, + queue_logical_block_size(q), + cap_str_10, + cap_str_2); set_capacity(vblk->disk, capacity); revalidate_disk(vblk->disk); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 792da683e70d..2adb8599be93 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -244,6 +244,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) { struct pending_req *req, *n; unsigned int j, r; + bool busy = false; for (r = 0; r < blkif->nr_rings; r++) { struct xen_blkif_ring *ring = &blkif->rings[r]; @@ -261,8 +262,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) * don't have any discard_io or other_io requests. So, checking * for inflight IO is enough. */ - if (atomic_read(&ring->inflight) > 0) - return -EBUSY; + if (atomic_read(&ring->inflight) > 0) { + busy = true; + continue; + } if (ring->irq) { unbind_from_irqhandler(ring->irq, ring); @@ -300,6 +303,9 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); ring->active = false; } + if (busy) + return -EBUSY; + blkif->nr_ring_pages = 0; /* * blkif->rings was allocated in connect_ring, so we should free it in diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 98e34e4c62b8..2468c28d4771 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2075,9 +2075,9 @@ static int blkfront_resume(struct xenbus_device *dev) /* * Get the bios in the request so we can re-queue them. */ - if (req_op(shadow[i].request) == REQ_OP_FLUSH || - req_op(shadow[i].request) == REQ_OP_DISCARD || - req_op(shadow[i].request) == REQ_OP_SECURE_ERASE || + if (req_op(shadow[j].request) == REQ_OP_FLUSH || + req_op(shadow[j].request) == REQ_OP_DISCARD || + req_op(shadow[j].request) == REQ_OP_SECURE_ERASE || shadow[j].request->cmd_flags & REQ_FUA) { /* * Flush operations don't contain bios, so diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index b8ecba6dcd3b..7cd4a8ec3c8f 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -13,3 +13,15 @@ config ZRAM disks and maybe many more. See zram.txt for more information. + +config ZRAM_WRITEBACK + bool "Write back incompressible page to backing device" + depends on ZRAM + default n + help + With incompressible page, there is no memory saving to keep it + in memory. Instead, write it out to backing device. + For this feature, admin should set up backing device via + /sys/block/zramX/backing_dev. + + See zram.txt for more infomration. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3b1b6340ba13..4a0438c4ef2a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -270,6 +270,349 @@ static ssize_t mem_used_max_store(struct device *dev, return len; } +#ifdef CONFIG_ZRAM_WRITEBACK +static bool zram_wb_enabled(struct zram *zram) +{ + return zram->backing_dev; +} + +static void reset_bdev(struct zram *zram) +{ + struct block_device *bdev; + + if (!zram_wb_enabled(zram)) + return; + + bdev = zram->bdev; + if (zram->old_block_size) + set_blocksize(bdev, zram->old_block_size); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + /* hope filp_close flush all of IO */ + filp_close(zram->backing_dev, NULL); + zram->backing_dev = NULL; + zram->old_block_size = 0; + zram->bdev = NULL; + + kvfree(zram->bitmap); + zram->bitmap = NULL; +} + +static ssize_t backing_dev_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + struct file *file = zram->backing_dev; + char *p; + ssize_t ret; + + down_read(&zram->init_lock); + if (!zram_wb_enabled(zram)) { + memcpy(buf, "none\n", 5); + up_read(&zram->init_lock); + return 5; + } + + p = file_path(file, buf, PAGE_SIZE - 1); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + goto out; + } + + ret = strlen(p); + memmove(buf, p, ret); + buf[ret++] = '\n'; +out: + up_read(&zram->init_lock); + return ret; +} + +static ssize_t backing_dev_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + char *file_name; + struct file *backing_dev = NULL; + struct inode *inode; + struct address_space *mapping; + unsigned int bitmap_sz, old_block_size = 0; + unsigned long nr_pages, *bitmap = NULL; + struct block_device *bdev = NULL; + int err; + struct zram *zram = dev_to_zram(dev); + + file_name = kmalloc(PATH_MAX, GFP_KERNEL); + if (!file_name) + return -ENOMEM; + + down_write(&zram->init_lock); + if (init_done(zram)) { + pr_info("Can't setup backing device for initialized device\n"); + err = -EBUSY; + goto out; + } + + strlcpy(file_name, buf, len); + + backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); + if (IS_ERR(backing_dev)) { + err = PTR_ERR(backing_dev); + backing_dev = NULL; + goto out; + } + + mapping = backing_dev->f_mapping; + inode = mapping->host; + + /* Support only block device in this moment */ + if (!S_ISBLK(inode->i_mode)) { + err = -ENOTBLK; + goto out; + } + + bdev = bdgrab(I_BDEV(inode)); + err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); + if (err < 0) + goto out; + + nr_pages = i_size_read(inode) >> PAGE_SHIFT; + bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); + bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); + if (!bitmap) { + err = -ENOMEM; + goto out; + } + + old_block_size = block_size(bdev); + err = set_blocksize(bdev, PAGE_SIZE); + if (err) + goto out; + + reset_bdev(zram); + spin_lock_init(&zram->bitmap_lock); + + zram->old_block_size = old_block_size; + zram->bdev = bdev; + zram->backing_dev = backing_dev; + zram->bitmap = bitmap; + zram->nr_pages = nr_pages; + up_write(&zram->init_lock); + + pr_info("setup backing device %s\n", file_name); + kfree(file_name); + + return len; +out: + if (bitmap) + kvfree(bitmap); + + if (bdev) + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + + if (backing_dev) + filp_close(backing_dev, NULL); + + up_write(&zram->init_lock); + + kfree(file_name); + + return err; +} + +static unsigned long get_entry_bdev(struct zram *zram) +{ + unsigned long entry; + + spin_lock(&zram->bitmap_lock); + /* skip 0 bit to confuse zram.handle = 0 */ + entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); + if (entry == zram->nr_pages) { + spin_unlock(&zram->bitmap_lock); + return 0; + } + + set_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); + + return entry; +} + +static void put_entry_bdev(struct zram *zram, unsigned long entry) +{ + int was_set; + + spin_lock(&zram->bitmap_lock); + was_set = test_and_clear_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); + WARN_ON_ONCE(!was_set); +} + +void zram_page_end_io(struct bio *bio) +{ + struct page *page = bio->bi_io_vec[0].bv_page; + + page_endio(page, op_is_write(bio_op(bio)), + blk_status_to_errno(bio->bi_status)); + bio_put(bio); +} + +/* + * Returns 1 if the submission is successful. + */ +static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent) +{ + struct bio *bio; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); + bio->bi_bdev = zram->bdev; + if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { + bio_put(bio); + return -EIO; + } + + if (!parent) { + bio->bi_opf = REQ_OP_READ; + bio->bi_end_io = zram_page_end_io; + } else { + bio->bi_opf = parent->bi_opf; + bio_chain(bio, parent); + } + + submit_bio(bio); + return 1; +} + +struct zram_work { + struct work_struct work; + struct zram *zram; + unsigned long entry; + struct bio *bio; +}; + +#if PAGE_SIZE != 4096 +static void zram_sync_read(struct work_struct *work) +{ + struct bio_vec bvec; + struct zram_work *zw = container_of(work, struct zram_work, work); + struct zram *zram = zw->zram; + unsigned long entry = zw->entry; + struct bio *bio = zw->bio; + + read_from_bdev_async(zram, &bvec, entry, bio); +} + +/* + * Block layer want one ->make_request_fn to be active at a time + * so if we use chained IO with parent IO in same context, + * it's a deadlock. To avoid, it, it uses worker thread context. + */ +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *bio) +{ + struct zram_work work; + + work.zram = zram; + work.entry = entry; + work.bio = bio; + + INIT_WORK_ONSTACK(&work.work, zram_sync_read); + queue_work(system_unbound_wq, &work.work); + flush_work(&work.work); + destroy_work_on_stack(&work.work); + + return 1; +} +#else +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *bio) +{ + WARN_ON(1); + return -EIO; +} +#endif + +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent, bool sync) +{ + if (sync) + return read_from_bdev_sync(zram, bvec, entry, parent); + else + return read_from_bdev_async(zram, bvec, entry, parent); +} + +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) +{ + struct bio *bio; + unsigned long entry; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + + entry = get_entry_bdev(zram); + if (!entry) { + bio_put(bio); + return -ENOSPC; + } + + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); + bio->bi_bdev = zram->bdev; + if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, + bvec->bv_offset)) { + bio_put(bio); + put_entry_bdev(zram, entry); + return -EIO; + } + + if (!parent) { + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; + bio->bi_end_io = zram_page_end_io; + } else { + bio->bi_opf = parent->bi_opf; + bio_chain(bio, parent); + } + + submit_bio(bio); + *pentry = entry; + + return 0; +} + +static void zram_wb_clear(struct zram *zram, u32 index) +{ + unsigned long entry; + + zram_clear_flag(zram, index, ZRAM_WB); + entry = zram_get_element(zram, index); + zram_set_element(zram, index, 0); + put_entry_bdev(zram, entry); +} + +#else +static bool zram_wb_enabled(struct zram *zram) { return false; } +static inline void reset_bdev(struct zram *zram) {}; +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) + +{ + return -EIO; +} + +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent, bool sync) +{ + return -EIO; +} +static void zram_wb_clear(struct zram *zram, u32 index) {} +#endif + + /* * We switched to per-cpu streams and this attr is not needed anymore. * However, we will keep it around for some time, because: @@ -453,30 +796,6 @@ static bool zram_same_page_read(struct zram *zram, u32 index, return false; } -static bool zram_same_page_write(struct zram *zram, u32 index, - struct page *page) -{ - unsigned long element; - void *mem = kmap_atomic(page); - - if (page_same_filled(mem, &element)) { - kunmap_atomic(mem); - /* Free memory associated with this sector now. */ - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_set_flag(zram, index, ZRAM_SAME); - zram_set_element(zram, index, element); - zram_slot_unlock(zram, index); - - atomic64_inc(&zram->stats.same_pages); - atomic64_inc(&zram->stats.pages_stored); - return true; - } - kunmap_atomic(mem); - - return false; -} - static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -515,7 +834,13 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) */ static void zram_free_page(struct zram *zram, size_t index) { - unsigned long handle = zram_get_handle(zram, index); + unsigned long handle; + + if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { + zram_wb_clear(zram, index); + atomic64_dec(&zram->stats.pages_stored); + return; + } /* * No memory is allocated for same element filled pages. @@ -529,6 +854,7 @@ static void zram_free_page(struct zram *zram, size_t index) return; } + handle = zram_get_handle(zram, index); if (!handle) return; @@ -542,13 +868,31 @@ static void zram_free_page(struct zram *zram, size_t index) zram_set_obj_size(zram, index, 0); } -static int zram_decompress_page(struct zram *zram, struct page *page, u32 index) +static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, + struct bio *bio, bool partial_io) { int ret; unsigned long handle; unsigned int size; void *src, *dst; + if (zram_wb_enabled(zram)) { + zram_slot_lock(zram, index); + if (zram_test_flag(zram, index, ZRAM_WB)) { + struct bio_vec bvec; + + zram_slot_unlock(zram, index); + + bvec.bv_page = page; + bvec.bv_len = PAGE_SIZE; + bvec.bv_offset = 0; + return read_from_bdev(zram, &bvec, + zram_get_element(zram, index), + bio, partial_io); + } + zram_slot_unlock(zram, index); + } + if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) return 0; @@ -581,7 +925,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index) } static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset) + u32 index, int offset, struct bio *bio) { int ret; struct page *page; @@ -594,7 +938,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, return -ENOMEM; } - ret = zram_decompress_page(zram, page, index); + ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); if (unlikely(ret)) goto out; @@ -613,30 +957,57 @@ out: return ret; } -static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm, - struct page *page, - unsigned long *out_handle, unsigned int *out_comp_len) +static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *bio) { - int ret; - unsigned int comp_len; - void *src; + int ret = 0; unsigned long alloced_pages; unsigned long handle = 0; + unsigned int comp_len = 0; + void *src, *dst, *mem; + struct zcomp_strm *zstrm; + struct page *page = bvec->bv_page; + unsigned long element = 0; + enum zram_pageflags flags = 0; + bool allow_wb = true; + + mem = kmap_atomic(page); + if (page_same_filled(mem, &element)) { + kunmap_atomic(mem); + /* Free memory associated with this sector now. */ + flags = ZRAM_SAME; + atomic64_inc(&zram->stats.same_pages); + goto out; + } + kunmap_atomic(mem); compress_again: + zstrm = zcomp_stream_get(zram->comp); src = kmap_atomic(page); - ret = zcomp_compress(*zstrm, src, &comp_len); + ret = zcomp_compress(zstrm, src, &comp_len); kunmap_atomic(src); if (unlikely(ret)) { + zcomp_stream_put(zram->comp); pr_err("Compression failed! err=%d\n", ret); - if (handle) - zs_free(zram->mem_pool, handle); + zs_free(zram->mem_pool, handle); return ret; } - if (unlikely(comp_len > max_zpage_size)) + if (unlikely(comp_len > max_zpage_size)) { + if (zram_wb_enabled(zram) && allow_wb) { + zcomp_stream_put(zram->comp); + ret = write_to_bdev(zram, bvec, index, bio, &element); + if (!ret) { + flags = ZRAM_WB; + ret = 1; + goto out; + } + allow_wb = false; + goto compress_again; + } comp_len = PAGE_SIZE; + } /* * handle allocation has 2 paths: @@ -663,7 +1034,6 @@ compress_again: handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); - *zstrm = zcomp_stream_get(zram->comp); if (handle) goto compress_again; return -ENOMEM; @@ -673,34 +1043,11 @@ compress_again: update_used_max(zram, alloced_pages); if (zram->limit_pages && alloced_pages > zram->limit_pages) { + zcomp_stream_put(zram->comp); zs_free(zram->mem_pool, handle); return -ENOMEM; } - *out_handle = handle; - *out_comp_len = comp_len; - return 0; -} - -static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index) -{ - int ret; - unsigned long handle; - unsigned int comp_len; - void *src, *dst; - struct zcomp_strm *zstrm; - struct page *page = bvec->bv_page; - - if (zram_same_page_write(zram, index, page)) - return 0; - - zstrm = zcomp_stream_get(zram->comp); - ret = zram_compress(zram, &zstrm, page, &handle, &comp_len); - if (ret) { - zcomp_stream_put(zram->comp); - return ret; - } - dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); src = zstrm->buffer; @@ -712,25 +1059,31 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index) zcomp_stream_put(zram->comp); zs_unmap_object(zram->mem_pool, handle); - + atomic64_add(comp_len, &zram->stats.compr_data_size); +out: /* * Free memory associated with this sector * before overwriting unused sectors. */ zram_slot_lock(zram, index); zram_free_page(zram, index); - zram_set_handle(zram, index, handle); - zram_set_obj_size(zram, index, comp_len); + + if (flags) { + zram_set_flag(zram, index, flags); + zram_set_element(zram, index, element); + } else { + zram_set_handle(zram, index, handle); + zram_set_obj_size(zram, index, comp_len); + } zram_slot_unlock(zram, index); /* Update stats */ - atomic64_add(comp_len, &zram->stats.compr_data_size); atomic64_inc(&zram->stats.pages_stored); - return 0; + return ret; } static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset) + u32 index, int offset, struct bio *bio) { int ret; struct page *page = NULL; @@ -748,7 +1101,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (!page) return -ENOMEM; - ret = zram_decompress_page(zram, page, index); + ret = __zram_bvec_read(zram, page, index, bio, true); if (ret) goto out; @@ -763,7 +1116,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, vec.bv_offset = 0; } - ret = __zram_bvec_write(zram, &vec, index); + ret = __zram_bvec_write(zram, &vec, index, bio); out: if (is_partial_io(bvec)) __free_page(page); @@ -808,8 +1161,13 @@ static void zram_bio_discard(struct zram *zram, u32 index, } } +/* + * Returns errno if it has some problem. Otherwise return 0 or 1. + * Returns 0 if IO request was done synchronously + * Returns 1 if IO request was successfully submitted. + */ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, bool is_write) + int offset, bool is_write, struct bio *bio) { unsigned long start_time = jiffies; int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; @@ -820,16 +1178,16 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (!is_write) { atomic64_inc(&zram->stats.num_reads); - ret = zram_bvec_read(zram, bvec, index, offset); + ret = zram_bvec_read(zram, bvec, index, offset, bio); flush_dcache_page(bvec->bv_page); } else { atomic64_inc(&zram->stats.num_writes); - ret = zram_bvec_write(zram, bvec, index, offset); + ret = zram_bvec_write(zram, bvec, index, offset, bio); } generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { if (!is_write) atomic64_inc(&zram->stats.failed_reads); else @@ -868,7 +1226,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, unwritten); if (zram_bvec_rw(zram, &bv, index, offset, - op_is_write(bio_op(bio))) < 0) + op_is_write(bio_op(bio)), bio) < 0) goto out; bv.bv_offset += bv.bv_len; @@ -922,16 +1280,18 @@ static void zram_slot_free_notify(struct block_device *bdev, static int zram_rw_page(struct block_device *bdev, sector_t sector, struct page *page, bool is_write) { - int offset, err = -EIO; + int offset, ret; u32 index; struct zram *zram; struct bio_vec bv; + if (PageTransHuge(page)) + return -ENOTSUPP; zram = bdev->bd_disk->private_data; if (!valid_io_request(zram, sector, PAGE_SIZE)) { atomic64_inc(&zram->stats.invalid_io); - err = -EINVAL; + ret = -EINVAL; goto out; } @@ -942,7 +1302,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - err = zram_bvec_rw(zram, &bv, index, offset, is_write); + ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -952,9 +1312,20 @@ out: * bio->bi_end_io does things to handle the error * (e.g., SetPageError, set_page_dirty and extra works). */ - if (err == 0) + if (unlikely(ret < 0)) + return ret; + + switch (ret) { + case 0: page_endio(page, is_write, 0); - return err; + break; + case 1: + ret = 0; + break; + default: + WARN_ON(1); + } + return ret; } static void zram_reset_device(struct zram *zram) @@ -983,6 +1354,7 @@ static void zram_reset_device(struct zram *zram) zram_meta_free(zram, disksize); memset(&zram->stats, 0, sizeof(zram->stats)); zcomp_destroy(comp); + reset_bdev(zram); } static ssize_t disksize_store(struct device *dev, @@ -1108,6 +1480,9 @@ static DEVICE_ATTR_WO(mem_limit); static DEVICE_ATTR_WO(mem_used_max); static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); +#ifdef CONFIG_ZRAM_WRITEBACK +static DEVICE_ATTR_RW(backing_dev); +#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1118,6 +1493,9 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, +#ifdef CONFIG_ZRAM_WRITEBACK + &dev_attr_backing_dev.attr, +#endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, &dev_attr_debug_stat.attr, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e34e44d02e3e..31762db861e3 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -60,9 +60,10 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { - /* Page consists entirely of zeros */ + /* Page consists the same element */ ZRAM_SAME = ZRAM_FLAG_SHIFT, ZRAM_ACCESS, /* page is now accessed */ + ZRAM_WB, /* page is stored on backing_device */ __NR_ZRAM_PAGEFLAGS, }; @@ -115,5 +116,13 @@ struct zram { * zram is claimed so open request will be failed */ bool claim; /* Protected by bdev->bd_mutex */ +#ifdef CONFIG_ZRAM_WRITEBACK + struct file *backing_dev; + struct block_device *bdev; + unsigned int old_block_size; + unsigned long *bitmap; + unsigned long nr_pages; + spinlock_t bitmap_lock; +#endif }; #endif |