diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2022-09-12 16:51:22 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2022-09-12 16:51:22 +0200 |
commit | a791dc135325862fdf491ac088f54993710e2515 (patch) | |
tree | 3fe941c0d8b8d315ad4b2235836462e0820f98ae /fs | |
parent | d11b1e908e9a1301e43cefc37fc17dd2b1257b77 (diff) | |
parent | 80e78fcce86de0288793a0ef0f6acf37656ee4cf (diff) | |
download | linux-a791dc135325862fdf491ac088f54993710e2515.tar.gz |
Merge 6.0-rc5 into driver-core-next
We need the driver core and debugfs changes in this branch.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
55 files changed, 449 insertions, 362 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c index c4210a3964d8..bbcc5afd1576 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -76,7 +76,7 @@ void afs_lock_op_done(struct afs_call *call) if (call->error == 0) { spin_lock(&vnode->lock); trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0); - vnode->locked_at = call->reply_time; + vnode->locked_at = call->issue_time; afs_schedule_lock_extension(vnode); spin_unlock(&vnode->lock); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 4943413d9c5f..7d37f63ef0f0 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -131,7 +131,7 @@ bad: static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) { - return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry; + return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry; } static void xdr_decode_AFSCallBack(const __be32 **_bp, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 64ad55494349..723d162078a3 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -137,7 +137,6 @@ struct afs_call { bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ bool upgrade; /* T to request service upgrade */ - bool have_reply_time; /* T if have got reply_time */ bool intr; /* T if interruptible */ bool unmarshalling_error; /* T if an unmarshalling error occurred */ u16 service_id; /* Actual service ID (after upgrade) */ @@ -151,7 +150,7 @@ struct afs_call { } __attribute__((packed)); __be64 tmp64; }; - ktime_t reply_time; /* Time of first reply packet */ + ktime_t issue_time; /* Time of issue of operation */ }; struct afs_call_type { diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 933e67fcdab1..805328ca5428 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -69,6 +69,7 @@ int afs_abort_to_error(u32 abort_code) /* Unified AFS error table */ case UAEPERM: return -EPERM; case UAENOENT: return -ENOENT; + case UAEAGAIN: return -EAGAIN; case UAEACCES: return -EACCES; case UAEBUSY: return -EBUSY; case UAEEXIST: return -EEXIST; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index d5c4785c862d..eccc3cd0cb70 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -351,6 +351,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) if (call->max_lifespan) rxrpc_kernel_set_max_life(call->net->socket, rxcall, call->max_lifespan); + call->issue_time = ktime_get_real(); /* send the request */ iov[0].iov_base = call->request; @@ -501,12 +502,6 @@ static void afs_deliver_to_call(struct afs_call *call) return; } - if (!call->have_reply_time && - rxrpc_kernel_get_reply_time(call->net->socket, - call->rxcall, - &call->reply_time)) - call->have_reply_time = true; - ret = call->type->deliver(call); state = READ_ONCE(call->state); if (ret == 0 && call->unmarshalling_error) diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index fdc7d675b4b0..11571cca86c1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -232,8 +232,7 @@ static void xdr_decode_YFSCallBack(const __be32 **_bp, struct afs_callback *cb = &scb->callback; ktime_t cb_expiry; - cb_expiry = call->reply_time; - cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100); + cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100); cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC); scb->have_cb = true; *_bp += xdr_size(x); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 993aca2f1e18..e0375ba9d0fe 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, btrfs_put_caching_control(caching_ctl); } -int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) +static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache, + struct btrfs_caching_control *caching_ctl) +{ + wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); + return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0; +} + +static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) { struct btrfs_caching_control *caching_ctl; - int ret = 0; + int ret; caching_ctl = btrfs_get_caching_control(cache); if (!caching_ctl) return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; - - wait_event(caching_ctl->wait, btrfs_block_group_done(cache)); - if (cache->cached == BTRFS_CACHE_ERROR) - ret = -EIO; + ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); btrfs_put_caching_control(caching_ctl); return ret; } -static bool space_cache_v1_done(struct btrfs_block_group *cache) -{ - bool ret; - - spin_lock(&cache->lock); - ret = cache->cached != BTRFS_CACHE_FAST; - spin_unlock(&cache->lock); - - return ret; -} - -void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache, - struct btrfs_caching_control *caching_ctl) -{ - wait_event(caching_ctl->wait, space_cache_v1_done(cache)); -} - #ifdef CONFIG_BTRFS_DEBUG static void fragment_free_space(struct btrfs_block_group *block_group) { @@ -750,9 +737,8 @@ done: btrfs_put_block_group(block_group); } -int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) +int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait) { - DEFINE_WAIT(wait); struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_caching_control *caching_ctl = NULL; int ret = 0; @@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only } WARN_ON(cache->caching_ctl); cache->caching_ctl = caching_ctl; - if (btrfs_test_opt(fs_info, SPACE_CACHE)) - cache->cached = BTRFS_CACHE_FAST; - else - cache->cached = BTRFS_CACHE_STARTED; + cache->cached = BTRFS_CACHE_STARTED; cache->has_caching_ctl = 1; spin_unlock(&cache->lock); @@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); out: - if (load_cache_only && caching_ctl) - btrfs_wait_space_cache_v1_finished(cache, caching_ctl); + if (wait && caching_ctl) + ret = btrfs_caching_ctl_wait_done(cache, caching_ctl); if (caching_ctl) btrfs_put_caching_control(caching_ctl); @@ -3312,7 +3295,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, * space back to the block group, otherwise we will leak space. */ if (!alloc && !btrfs_block_group_done(cache)) - btrfs_cache_block_group(cache, 1); + btrfs_cache_block_group(cache, true); byte_in_group = bytenr - cache->start; WARN_ON(byte_in_group > cache->length); diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 35e0e860cc0b..6b3cdc4cbc41 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, u64 num_bytes); -int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache); -int btrfs_cache_block_group(struct btrfs_block_group *cache, - int load_cache_only); +int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait); void btrfs_put_caching_control(struct btrfs_caching_control *ctl); struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_block_group *cache); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4edb4bfb2166..df8c99c99df9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,7 +505,6 @@ struct btrfs_free_cluster { enum btrfs_caching_type { BTRFS_CACHE_NO, BTRFS_CACHE_STARTED, - BTRFS_CACHE_FAST, BTRFS_CACHE_FINISHED, BTRFS_CACHE_ERROR, }; @@ -1089,8 +1088,6 @@ struct btrfs_fs_info { spinlock_t zone_active_bgs_lock; struct list_head zone_active_bgs; - /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */ - wait_queue_head_t zone_finish_wait; /* Updates are not protected by any lock */ struct btrfs_commit_stats commit_stats; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index f43196a893ca..41cddd3ff059 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -165,7 +165,7 @@ no_valid_dev_replace_entry_found: */ if (btrfs_find_device(fs_info->fs_devices, &args)) { btrfs_err(fs_info, - "replace devid present without an active replace item"); +"replace without active item, run 'device scan --forget' on the target device"); ret = -EUCLEAN; } else { dev_replace->srcdev = NULL; @@ -1129,8 +1129,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) up_write(&dev_replace->rwsem); /* Scrub for replace must not be running in suspended state */ - ret = btrfs_scrub_cancel(fs_info); - ASSERT(ret != -ENOTCONN); + btrfs_scrub_cancel(fs_info); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 820b1f1e6b67..1af28b066b42 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait); - init_waitqueue_head(&fs_info->zone_finish_wait); /* Usable values until the real ones are cached from the superblock */ fs_info->nodesize = 4096; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ab944d1f94ef..6914cd8024ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2551,17 +2551,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, return -EINVAL; /* - * pull in the free space cache (if any) so that our pin - * removes the free space from the cache. We have load_only set - * to one because the slow code to read in the free extents does check - * the pinned extents. + * Fully cache the free space first so that our pin removes the free space + * from the cache. */ - btrfs_cache_block_group(cache, 1); - /* - * Make sure we wait until the cache is completely built in case it is - * missing or is invalid and therefore needs to be rebuilt. - */ - ret = btrfs_wait_block_group_cache_done(cache); + ret = btrfs_cache_block_group(cache, true); if (ret) goto out; @@ -2584,12 +2577,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, if (!block_group) return -EINVAL; - btrfs_cache_block_group(block_group, 1); - /* - * Make sure we wait until the cache is completely built in case it is - * missing or is invalid and therefore needs to be rebuilt. - */ - ret = btrfs_wait_block_group_cache_done(block_group); + ret = btrfs_cache_block_group(block_group, true); if (ret) goto out; @@ -4399,7 +4387,7 @@ have_block_group: ffe_ctl->cached = btrfs_block_group_done(block_group); if (unlikely(!ffe_ctl->cached)) { ffe_ctl->have_caching_bg = true; - ret = btrfs_cache_block_group(block_group, 0); + ret = btrfs_cache_block_group(block_group, false); /* * If we get ENOMEM here or something else we want to @@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) if (end - start >= range->minlen) { if (!btrfs_block_group_done(cache)) { - ret = btrfs_cache_block_group(cache, 0); - if (ret) { - bg_failed++; - bg_ret = ret; - continue; - } - ret = btrfs_wait_block_group_cache_done(cache); + ret = btrfs_cache_block_group(cache, true); if (ret) { bg_failed++; bg_ret = ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eed81a7e36a4..cf4f19e80e2f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3233,7 +3233,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, u32 bio_size = bio->bi_iter.bi_size; u32 real_size; const sector_t sector = disk_bytenr >> SECTOR_SHIFT; - bool contig; + bool contig = false; int ret; ASSERT(bio); @@ -3242,10 +3242,35 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, if (bio_ctrl->compress_type != compress_type) return 0; - if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE) + + if (bio->bi_iter.bi_size == 0) { + /* We can always add a page into an empty bio. */ + contig = true; + } else if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE) { + struct bio_vec *bvec = bio_last_bvec_all(bio); + + /* + * The contig check requires the following conditions to be met: + * 1) The pages are belonging to the same inode + * This is implied by the call chain. + * + * 2) The range has adjacent logical bytenr + * + * 3) The range has adjacent file offset + * This is required for the usage of btrfs_bio->file_offset. + */ + if (bio_end_sector(bio) == sector && + page_offset(bvec->bv_page) + bvec->bv_offset + + bvec->bv_len == page_offset(page) + pg_offset) + contig = true; + } else { + /* + * For compression, all IO should have its logical bytenr + * set to the starting bytenr of the compressed extent. + */ contig = bio->bi_iter.bi_sector == sector; - else - contig = bio_end_sector(bio) == sector; + } + if (!contig) return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 66c822182ecc..5a3f6e0d9688 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2482,6 +2482,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_mark_buffer_dirty(leaf); goto out; } @@ -2498,6 +2499,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); + btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_mark_buffer_dirty(leaf); goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f0c97d25b4a0..1372210869b1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode, done_offset = end; if (done_offset == start) { - struct btrfs_fs_info *info = inode->root->fs_info; - - wait_var_event(&info->zone_finish_wait, - !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags)); + wait_on_bit_io(&inode->root->fs_info->flags, + BTRFS_FS_NEED_ZONE_FINISH, + TASK_UNINTERRUPTIBLE); continue; } @@ -7694,6 +7693,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, bool unlock_extents = false; /* + * We could potentially fault if we have a buffer > PAGE_SIZE, and if + * we're NOWAIT we may submit a bio for a partial range and return + * EIOCBQUEUED, which would result in an errant short read. + * + * The best way to handle this would be to allow for partial completions + * of iocb's, so we could submit the partial bio, return and fault in + * the rest of the pages, and then submit the io for the rest of the + * range. However we don't have that currently, so simply return + * -EAGAIN at this point so that the normal path is used. + */ + if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE) + return -EAGAIN; + + /* * Cap the size of reads to that usually seen in buffered I/O as we need * to allocate a contiguous array for the checksums. */ diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a64b26b16904..d647cb2938c0 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, key.offset = ref_id; again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - if (ret < 0) + if (ret < 0) { + err = ret; goto out; - if (ret == 0) { + } else if (ret == 0) { leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d0cbeb7ae81c..435559ba94fa 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags) ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK); if (flags & BTRFS_BLOCK_GROUP_DATA) - return SZ_1G; + return BTRFS_MAX_DATA_CHUNK_SIZE; else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) return SZ_32M; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 272901514b0c..f63ff91e2883 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2345,8 +2345,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, &bdev, &disk_super); - if (ret) + if (ret) { + btrfs_put_dev_args_from_path(args); return ret; + } + args->devid = btrfs_stack_device_id(&disk_super->dev_item); memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); if (btrfs_fs_incompat(fs_info, METADATA_UUID)) @@ -5264,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl, ctl->stripe_size); } + /* Stripe size should not go beyond 1G. */ + ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G); + /* Align to BTRFS_STRIPE_LEN */ ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN); ctl->chunk_size = ctl->stripe_size * data_stripes; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 7421abcf325a..5bb8d8c86311 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, const char *name, const void *buffer, size_t size, int flags) { + if (btrfs_root_readonly(BTRFS_I(inode)->root)) + return -EROFS; + name = xattr_full_name(handler, name); return btrfs_setxattr_trans(inode, name, buffer, size, flags); } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b150b07ba1a7..62e7007a7e46 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) * since btrfs adds the pages one by one to a bio, and btrfs cannot * increase the metadata reservation even if it increases the number of * extents, it is safe to stick with the limit. + * + * With the zoned emulation, we can have non-zoned device on the zoned + * mode. In this case, we don't have a valid max zone append size. So, + * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size. */ - zone_info->max_zone_append_size = - min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT, - (u64)bdev_max_segments(bdev) << PAGE_SHIFT); + if (bdev_is_zoned(bdev)) { + zone_info->max_zone_append_size = min_t(u64, + (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT, + (u64)bdev_max_segments(bdev) << PAGE_SHIFT); + } else { + zone_info->max_zone_append_size = + (u64)bdev_max_segments(bdev) << PAGE_SHIFT; + } if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; @@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) * offset. */ static int calculate_alloc_pointer(struct btrfs_block_group *cache, - u64 *offset_ret) + u64 *offset_ret, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_root *root; @@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache, int ret; u64 length; + /* + * Avoid tree lookups for a new block group, there's no use for it. + * It must always be 0. + * + * Also, we have a lock chain of extent buffer lock -> chunk mutex. + * For new a block group, this function is called from + * btrfs_make_block_group() which is already taking the chunk mutex. + * Thus, we cannot call calculate_alloc_pointer() which takes extent + * buffer locks to avoid deadlock. + */ + if (new) { + *offset_ret = 0; + return 0; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) else num_conventional++; + /* + * Consider a zone as active if we can allow any number of + * active zones. + */ + if (!device->zone_info->max_active_zones) + __set_bit(i, active); + if (!is_sequential) { alloc_offsets[i] = WP_CONVENTIONAL; continue; @@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) __set_bit(i, active); break; } - - /* - * Consider a zone as active if we can allow any number of - * active zones. - */ - if (!device->zone_info->max_active_zones) - __set_bit(i, active); } if (num_sequential > 0) cache->seq_zone = true; if (num_conventional > 0) { - /* - * Avoid calling calculate_alloc_pointer() for new BG. It - * is no use for new BG. It must be always 0. - * - * Also, we have a lock chain of extent buffer lock -> - * chunk mutex. For new BG, this function is called from - * btrfs_make_block_group() which is already taking the - * chunk mutex. Thus, we cannot call - * calculate_alloc_pointer() which takes extent buffer - * locks to avoid deadlock. - */ - /* Zone capacity is always zone size in emulation */ cache->zone_capacity = cache->length; - if (new) { - cache->alloc_offset = 0; - goto out; - } - ret = calculate_alloc_pointer(cache, &last_alloc); - if (ret || map->num_stripes == num_conventional) { - if (!ret) - cache->alloc_offset = last_alloc; - else - btrfs_err(fs_info, + ret = calculate_alloc_pointer(cache, &last_alloc, new); + if (ret) { + btrfs_err(fs_info, "zoned: failed to determine allocation offset of bg %llu", - cache->start); + cache->start); + goto out; + } else if (map->num_stripes == num_conventional) { + cache->alloc_offset = last_alloc; + cache->zone_is_active = 1; goto out; } } @@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } - if (cache->zone_is_active) { - btrfs_get_block_group(cache); - spin_lock(&fs_info->zone_active_bgs_lock); - list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs); - spin_unlock(&fs_info->zone_active_bgs_lock); - } - out: if (cache->alloc_offset > fs_info->zone_size) { btrfs_err(fs_info, @@ -1526,10 +1528,16 @@ out: ret = -EIO; } - if (!ret) + if (!ret) { cache->meta_write_pointer = cache->alloc_offset + cache->start; - - if (ret) { + if (cache->zone_is_active) { + btrfs_get_block_group(cache); + spin_lock(&fs_info->zone_active_bgs_lock); + list_add_tail(&cache->active_bg_list, + &fs_info->zone_active_bgs); + spin_unlock(&fs_info->zone_active_bgs_lock); + } + } else { kfree(cache->physical_map); cache->physical_map = NULL; } @@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ /* For active_bg_list */ btrfs_put_block_group(block_group); - clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); - wake_up_all(&fs_info->zone_finish_wait); + clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); return 0; } diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 6cba2c6de2f9..2ad58c465208 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -111,6 +111,7 @@ struct cachefiles_cache { char *tag; /* cache binding tag */ refcount_t unbind_pincount;/* refcount to do daemon unbind */ struct xarray reqs; /* xarray of pending on-demand requests */ + unsigned long req_id_next; struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ u32 ondemand_id_next; }; diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 1fee702d5529..0254ed39f68c 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -158,9 +158,13 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) /* fail OPEN request if daemon reports an error */ if (size < 0) { - if (!IS_ERR_VALUE(size)) - size = -EINVAL; - req->error = size; + if (!IS_ERR_VALUE(size)) { + req->error = -EINVAL; + ret = -EINVAL; + } else { + req->error = size; + ret = 0; + } goto out; } @@ -238,14 +242,19 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, unsigned long id = 0; size_t n; int ret = 0; - XA_STATE(xas, &cache->reqs, 0); + XA_STATE(xas, &cache->reqs, cache->req_id_next); /* - * Search for a request that has not ever been processed, to prevent - * requests from being processed repeatedly. + * Cyclically search for a request that has not ever been processed, + * to prevent requests from being processed repeatedly, and make + * request distribution fair. */ xa_lock(&cache->reqs); req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); + if (!req && cache->req_id_next > 0) { + xas_set(&xas, 0); + req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW); + } if (!req) { xa_unlock(&cache->reqs); return 0; @@ -260,6 +269,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, } xas_clear_mark(&xas, CACHEFILES_REQ_NEW); + cache->req_id_next = xas.xa_index + 1; xa_unlock(&cache->reqs); id = xas.xa_index; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 8f7835ccbca1..46f5718754f9 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -32,10 +32,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst, int rc; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; - int is_smb2 = server->vals->header_preamble_size == 0; /* iov[0] is actual data and not the rfc1002 length for SMB2+ */ - if (is_smb2) { + if (!is_smb1(server)) { if (iov[0].iov_len <= 4) return -EIO; i = 0; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f54d8bf2732a..8042d7280dec 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1248,6 +1248,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, lock_two_nondirectories(target_inode, src_inode); cifs_dbg(FYI, "about to flush pages\n"); + + rc = filemap_write_and_wait_range(src_inode->i_mapping, off, + off + len - 1); + if (rc) + goto out; + /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f15d7b0c123d..ae7f571a7dba 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -557,6 +557,8 @@ struct smb_version_values { #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) +#define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) +#define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server)) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when @@ -750,6 +752,11 @@ struct TCP_Server_Info { #endif }; +static inline bool is_smb1(struct TCP_Server_Info *server) +{ + return HEADER_PREAMBLE_SIZE(server) != 0; +} + static inline void cifs_server_lock(struct TCP_Server_Info *server) { unsigned int nofs_flag = memalloc_nofs_save(); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 3da5da9f16b0..a0a06b6f252b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -871,7 +871,7 @@ smb2_get_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (server->vals->header_preamble_size) + if (is_smb1(server)) return 0; return le16_to_cpu(shdr->CreditRequest); @@ -1050,7 +1050,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* make sure this will fit in a large buffer */ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - - server->vals->header_preamble_size) { + HEADER_PREAMBLE_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); cifs_reconnect(server, true); return -ECONNABORTED; @@ -1065,8 +1065,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* now read the rest */ length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, - pdu_length - HEADER_SIZE(server) + 1 - + server->vals->header_preamble_size); + pdu_length - MID_HEADER_SIZE(server)); if (length < 0) return length; @@ -1122,7 +1121,7 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server) /* * SMB1 does not use credits. */ - if (server->vals->header_preamble_size) + if (is_smb1(server)) return; if (shdr->CreditRequest) { @@ -1180,10 +1179,10 @@ cifs_demultiplex_thread(void *p) if (length < 0) continue; - if (server->vals->header_preamble_size == 0) - server->total_read = 0; - else + if (is_smb1(server)) server->total_read = length; + else + server->total_read = 0; /* * The right amount was read from socket - 4 bytes, @@ -1198,8 +1197,7 @@ next_pdu: server->pdu_size = pdu_length; /* make sure we have enough to get to the MID */ - if (server->pdu_size < HEADER_SIZE(server) - 1 - - server->vals->header_preamble_size) { + if (server->pdu_size < MID_HEADER_SIZE(server)) { cifs_server_dbg(VFS, "SMB response too short (%u bytes)\n", server->pdu_size); cifs_reconnect(server, true); @@ -1208,9 +1206,8 @@ next_pdu: /* read down to the MID */ length = cifs_read_from_socket(server, - buf + server->vals->header_preamble_size, - HEADER_SIZE(server) - 1 - - server->vals->header_preamble_size); + buf + HEADER_PREAMBLE_SIZE(server), + MID_HEADER_SIZE(server)); if (length < 0) continue; server->total_read += length; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96f3b0573606..421be43af425 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1600,17 +1600,8 @@ smb2_copychunk_range(const unsigned int xid, int chunks_copied = 0; bool chunk_sizes_updated = false; ssize_t bytes_written, total_bytes_written = 0; - struct inode *inode; pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); - - /* - * We need to flush all unwritten data before we can send the - * copychunk ioctl to the server. - */ - inode = d_inode(trgtfile->dentry); - filemap_write_and_wait(inode->i_mapping); - if (pcchunk == NULL) return -ENOMEM; @@ -3307,26 +3298,43 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, return pntsd; } +static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon, + loff_t offset, loff_t len, unsigned int xid) +{ + struct cifsFileInfo *cfile = file->private_data; + struct file_zero_data_information fsctl_buf; + + cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); + + fsctl_buf.FileOffset = cpu_to_le64(offset); + fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + + return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, + cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, + (char *)&fsctl_buf, + sizeof(struct file_zero_data_information), + 0, NULL, NULL); +} + static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len, bool keep_size) { struct cifs_ses *ses = tcon->ses; - struct inode *inode; - struct cifsInodeInfo *cifsi; + struct inode *inode = file_inode(file); + struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; - struct file_zero_data_information fsctl_buf; long rc; unsigned int xid; __le64 eof; xid = get_xid(); - inode = d_inode(cfile->dentry); - cifsi = CIFS_I(inode); - trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + /* * We zero the range through ioctl, so we need remove the page caches * first, otherwise the data may be inconsistent with the server. @@ -3334,26 +3342,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, truncate_pagecache_range(inode, offset, offset + len - 1); /* if file not oplocked can't be sure whether asking to extend size */ - if (!CIFS_CACHE_READ(cifsi)) - if (keep_size == false) { - rc = -EOPNOTSUPP; - trace_smb3_zero_err(xid, cfile->fid.persistent_fid, - tcon->tid, ses->Suid, offset, len, rc); - free_xid(xid); - return rc; - } - - cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); - - fsctl_buf.FileOffset = cpu_to_le64(offset); - fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); + rc = -EOPNOTSUPP; + if (keep_size == false && !CIFS_CACHE_READ(cifsi)) + goto zero_range_exit; - rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, - cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, - (char *)&fsctl_buf, - sizeof(struct file_zero_data_information), - 0, NULL, NULL); - if (rc) + rc = smb3_zero_data(file, tcon, offset, len, xid); + if (rc < 0) goto zero_range_exit; /* @@ -3366,6 +3360,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, } zero_range_exit: + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); free_xid(xid); if (rc) trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid, @@ -3379,7 +3375,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len) { - struct inode *inode; + struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; struct file_zero_data_information fsctl_buf; long rc; @@ -3388,14 +3384,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, xid = get_xid(); - inode = d_inode(cfile->dentry); - + inode_lock(inode); /* Need to make file sparse, if not already, before freeing range. */ /* Consider adding equivalent for compressed since it could also work */ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { rc = -EOPNOTSUPP; - free_xid(xid); - return rc; + goto out; } filemap_invalidate_lock(inode->i_mapping); @@ -3415,8 +3409,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), CIFSMaxBufSize, NULL, NULL); - free_xid(xid); filemap_invalidate_unlock(inode->i_mapping); +out: + inode_unlock(inode); + free_xid(xid); return rc; } @@ -3673,39 +3669,50 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, { int rc; unsigned int xid; - struct inode *inode; + struct inode *inode = file_inode(file); struct cifsFileInfo *cfile = file->private_data; - struct cifsInodeInfo *cifsi; + struct cifsInodeInfo *cifsi = CIFS_I(inode); __le64 eof; + loff_t old_eof; xid = get_xid(); - inode = d_inode(cfile->dentry); - cifsi = CIFS_I(inode); + inode_lock(inode); - if (off >= i_size_read(inode) || - off + len >= i_size_read(inode)) { + old_eof = i_size_read(inode); + if ((off >= old_eof) || + off + len >= old_eof) { rc = -EINVAL; goto out; } + filemap_invalidate_lock(inode->i_mapping); + rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof - 1); + if (rc < 0) + goto out_2; + + truncate_pagecache_range(inode, off, old_eof); + rc = smb2_copychunk_range(xid, cfile, cfile, off + len, - i_size_read(inode) - off - len, off); + old_eof - off - len, off); if (rc < 0) - goto out; + goto out_2; - eof = cpu_to_le64(i_size_read(inode) - len); + eof = cpu_to_le64(old_eof - len); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); if (rc < 0) - goto out; + goto out_2; rc = 0; cifsi->server_eof = i_size_read(inode) - len; truncate_setsize(inode, cifsi->server_eof); fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof); +out_2: + filemap_invalidate_unlock(inode->i_mapping); out: + inode_unlock(inode); free_xid(xid); return rc; } @@ -3716,34 +3723,47 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, int rc; unsigned int xid; struct cifsFileInfo *cfile = file->private_data; + struct inode *inode = file_inode(file); __le64 eof; - __u64 count; + __u64 count, old_eof; xid = get_xid(); - if (off >= i_size_read(file->f_inode)) { + inode_lock(inode); + + old_eof = i_size_read(inode); + if (off >= old_eof) { rc = -EINVAL; goto out; } - count = i_size_read(file->f_inode) - off; - eof = cpu_to_le64(i_size_read(file->f_inode) + len); + count = old_eof - off; + eof = cpu_to_le64(old_eof + len); + + filemap_invalidate_lock(inode->i_mapping); + rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof + len - 1); + if (rc < 0) + goto out_2; + truncate_pagecache_range(inode, off, old_eof); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); if (rc < 0) - goto out; + goto out_2; rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); if (rc < 0) - goto out; + goto out_2; - rc = smb3_zero_range(file, tcon, off, len, 1); + rc = smb3_zero_data(file, tcon, off, len, xid); if (rc < 0) - goto out; + goto out_2; rc = 0; +out_2: + filemap_invalidate_unlock(inode->i_mapping); out: + inode_unlock(inode); free_xid(xid); return rc; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 91cfc5b47ac7..6352ab32c7e7 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -965,16 +965,17 @@ SMB2_negotiate(const unsigned int xid, } else if (rc != 0) goto neg_exit; + rc = -EIO; if (strcmp(server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { cifs_server_dbg(VFS, "SMB2 dialect returned but not requested\n"); - return -EIO; + goto neg_exit; } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { cifs_server_dbg(VFS, "SMB2.1 dialect returned but not requested\n"); - return -EIO; + goto neg_exit; } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { /* ops set to 3.0 by default for default so update */ server->ops = &smb311_operations; @@ -985,7 +986,7 @@ SMB2_negotiate(const unsigned int xid, if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { cifs_server_dbg(VFS, "SMB2 dialect returned but not requested\n"); - return -EIO; + goto neg_exit; } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ server->ops = &smb21_operations; @@ -999,7 +1000,7 @@ SMB2_negotiate(const unsigned int xid, /* if requested single dialect ensure returned dialect matched */ cifs_server_dbg(VFS, "Invalid 0x%x dialect returned: not requested\n", le16_to_cpu(rsp->DialectRevision)); - return -EIO; + goto neg_exit; } cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); @@ -1017,9 +1018,10 @@ SMB2_negotiate(const unsigned int xid, else { cifs_server_dbg(VFS, "Invalid dialect returned by server 0x%x\n", le16_to_cpu(rsp->DialectRevision)); - rc = -EIO; goto neg_exit; } + + rc = 0; server->dialect = le16_to_cpu(rsp->DialectRevision); /* @@ -2572,19 +2574,15 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, path_len = UniStrnlen((wchar_t *)path, PATH_MAX); - /* - * make room for one path separator between the treename and - * path - */ - *out_len = treename_len + 1 + path_len; + /* make room for one path separator only if @path isn't empty */ + *out_len = treename_len + (path[0] ? 1 : 0) + path_len; /* - * final path needs to be null-terminated UTF16 with a - * size aligned to 8 + * final path needs to be 8-byte aligned as specified in + * MS-SMB2 2.2.13 SMB2 CREATE Request. */ - - *out_size = roundup((*out_len+1)*2, 8); - *out_path = kzalloc(*out_size, GFP_KERNEL); + *out_size = roundup(*out_len * sizeof(__le16), 8); + *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL); if (!*out_path) return -ENOMEM; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index de7aeced7e16..c2fe035e573b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -261,8 +261,8 @@ smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) int nvec; unsigned long buflen = 0; - if (server->vals->header_preamble_size == 0 && - rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { + if (!is_smb1(server) && rqst->rq_nvec >= 2 && + rqst->rq_iov[0].iov_len == 4) { iov = &rqst->rq_iov[1]; nvec = rqst->rq_nvec - 1; } else { @@ -346,7 +346,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, sigprocmask(SIG_BLOCK, &mask, &oldmask); /* Generate a rfc1002 marker for SMB2+ */ - if (server->vals->header_preamble_size == 0) { + if (!is_smb1(server)) { struct kvec hiov = { .iov_base = &rfc1002_marker, .iov_len = 4 @@ -1238,7 +1238,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, buf = (char *)midQ[i]->resp_buf; resp_iov[i].iov_base = buf; resp_iov[i].iov_len = midQ[i]->resp_buf_size + - server->vals->header_preamble_size; + HEADER_PREAMBLE_SIZE(server); if (midQ[i]->large_buf) resp_buf_type[i] = CIFS_LARGE_BUFFER; @@ -1643,7 +1643,7 @@ int cifs_discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = server->pdu_size; - int remaining = rfclen + server->vals->header_preamble_size - + int remaining = rfclen + HEADER_PREAMBLE_SIZE(server) - server->total_read; while (remaining > 0) { @@ -1689,8 +1689,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) unsigned int data_offset, data_len; struct cifs_readdata *rdata = mid->callback_data; char *buf = server->smallbuf; - unsigned int buflen = server->pdu_size + - server->vals->header_preamble_size; + unsigned int buflen = server->pdu_size + HEADER_PREAMBLE_SIZE(server); bool use_rdma_mr = false; cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", @@ -1724,10 +1723,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) /* set up first two iov for signature check and to get credits */ rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = server->vals->header_preamble_size; - rdata->iov[1].iov_base = buf + server->vals->header_preamble_size; + rdata->iov[0].iov_len = HEADER_PREAMBLE_SIZE(server); + rdata->iov[1].iov_base = buf + HEADER_PREAMBLE_SIZE(server); rdata->iov[1].iov_len = - server->total_read - server->vals->header_preamble_size; + server->total_read - HEADER_PREAMBLE_SIZE(server); cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", rdata->iov[0].iov_base, rdata->iov[0].iov_len); cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", @@ -1752,7 +1751,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) } data_offset = server->ops->read_data_offset(buf) + - server->vals->header_preamble_size; + HEADER_PREAMBLE_SIZE(server); if (data_offset < server->total_read) { /* * win2k8 sometimes sends an offset of 0 when the read diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 3dcf0b8b4e93..232cfdf095ae 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -745,6 +745,28 @@ void debugfs_remove(struct dentry *dentry) EXPORT_SYMBOL_GPL(debugfs_remove); /** + * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it + * @name: a pointer to a string containing the name of the item to look up. + * @parent: a pointer to the parent dentry of the item. + * + * This is the equlivant of doing something like + * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting + * handled for the directory being looked up. + */ +void debugfs_lookup_and_remove(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + + dentry = debugfs_lookup(name, parent); + if (!dentry) + return; + + debugfs_remove(dentry); + dput(dentry); +} +EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove); + +/** * debugfs_rename - rename a file/directory in the debugfs filesystem * @old_dir: a pointer to the parent dentry for the renamed object. This * should be a directory dentry. diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 8e01d89c3319..b5fd9d71e67f 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -222,8 +222,10 @@ static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) rreq = erofs_fscache_alloc_request(folio_mapping(folio), folio_pos(folio), folio_size(folio)); - if (IS_ERR(rreq)) + if (IS_ERR(rreq)) { + ret = PTR_ERR(rreq); goto out; + } return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, rreq, mdev.m_pa); @@ -301,8 +303,10 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio) rreq = erofs_fscache_alloc_request(folio_mapping(folio), folio_pos(folio), folio_size(folio)); - if (IS_ERR(rreq)) + if (IS_ERR(rreq)) { + ret = PTR_ERR(rreq); goto out_unlock; + } pstart = mdev.m_pa + (pos - map.m_la); return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index cfee49d33b95..a01cc82795a2 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -195,7 +195,6 @@ struct erofs_workgroup { atomic_t refcount; }; -#if defined(CONFIG_SMP) static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp, int val) { @@ -224,34 +223,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) return atomic_cond_read_relaxed(&grp->refcount, VAL != EROFS_LOCKED_MAGIC); } -#else -static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp, - int val) -{ - preempt_disable(); - /* no need to spin on UP platforms, let's just disable preemption. */ - if (val != atomic_read(&grp->refcount)) { - preempt_enable(); - return false; - } - return true; -} - -static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp, - int orig_val) -{ - preempt_enable(); -} - -static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) -{ - int v = atomic_read(&grp->refcount); - - /* workgroup is never freezed on uniprocessor systems */ - DBG_BUGON(v == EROFS_LOCKED_MAGIC); - return v; -} -#endif /* !CONFIG_SMP */ #endif /* !CONFIG_EROFS_FS_ZIP */ /* we strictly follow PAGE_SIZE and no buffer head yet */ diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 572f0b8151ba..d58549ca1df9 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -141,7 +141,7 @@ struct z_erofs_maprecorder { u8 type, headtype; u16 clusterofs; u16 delta[2]; - erofs_blk_t pblk, compressedlcs; + erofs_blk_t pblk, compressedblks; erofs_off_t nextpackoff; }; @@ -192,7 +192,7 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, DBG_BUGON(1); return -EFSCORRUPTED; } - m->compressedlcs = m->delta[0] & + m->compressedblks = m->delta[0] & ~Z_EROFS_VLE_DI_D0_CBLKCNT; m->delta[0] = 1; } @@ -293,7 +293,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, DBG_BUGON(1); return -EFSCORRUPTED; } - m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->compressedblks = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; m->delta[0] = 1; return 0; } else if (i + 1 != (int)vcnt) { @@ -497,7 +497,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, return 0; } lcn = m->lcn + 1; - if (m->compressedlcs) + if (m->compressedblks) goto out; err = z_erofs_load_cluster_from_disk(m, lcn, false); @@ -506,7 +506,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, /* * If the 1st NONHEAD lcluster has already been handled initially w/o - * valid compressedlcs, which means at least it mustn't be CBLKCNT, or + * valid compressedblks, which means at least it mustn't be CBLKCNT, or * an internal implemenatation error is detected. * * The following code can also handle it properly anyway, but let's @@ -523,12 +523,12 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. */ - m->compressedlcs = 1; + m->compressedblks = 1 << (lclusterbits - LOG_BLOCK_SIZE); break; case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: if (m->delta[0] != 1) goto err_bonus_cblkcnt; - if (m->compressedlcs) + if (m->compressedblks) break; fallthrough; default: @@ -539,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, return -EFSCORRUPTED; } out: - map->m_plen = (u64)m->compressedlcs << lclusterbits; + map->m_plen = (u64)m->compressedblks << LOG_BLOCK_SIZE; return 0; err_bonus_cblkcnt: erofs_err(m->inode->i_sb, diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 05221366a16d..08a1993ab7fd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode, static void wb_wakeup(struct bdi_writeback *wb) { - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) mod_delayed_work(bdi_wq, &wb->dwork, 0); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } static void finish_writeback_work(struct bdi_writeback *wb, @@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb, if (work->done) atomic_inc(&work->done->cnt); - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (test_bit(WB_registered, &wb->state)) { list_add_tail(&work->list, &wb->work_list); @@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb, } else finish_writeback_work(wb, work); - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); } /** @@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) { struct wb_writeback_work *work = NULL; - spin_lock_bh(&wb->work_lock); + spin_lock_irq(&wb->work_lock); if (!list_empty(&wb->work_list)) { work = list_entry(wb->work_list.next, struct wb_writeback_work, list); list_del_init(&work->list); } - spin_unlock_bh(&wb->work_lock); + spin_unlock_irq(&wb->work_lock); return work; } diff --git a/fs/inode.c b/fs/inode.c index 6462276dfdf0..ba1de23c13c1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2018,23 +2018,25 @@ static int __file_remove_privs(struct file *file, unsigned int flags) { struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); - int error; + int error = 0; int kill; if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode)) return 0; kill = dentry_needs_remove_privs(dentry); - if (kill <= 0) + if (kill < 0) return kill; - if (flags & IOCB_NOWAIT) - return -EAGAIN; + if (kill) { + if (flags & IOCB_NOWAIT) + return -EAGAIN; + + error = __remove_privs(file_mnt_user_ns(file), dentry, kill); + } - error = __remove_privs(file_mnt_user_ns(file), dentry, kill); if (!error) inode_has_no_xattr(inode); - return error; } diff --git a/fs/locks.c b/fs/locks.c index c266cfdc3291..607f94a0e789 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2129,6 +2129,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) else error = locks_lock_file_wait(f.file, &fl); + locks_release_private(&fl); out_putf: fdput(f); diff --git a/fs/namespace.c b/fs/namespace.c index 68789f896f08..df137ba19d37 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, err = -EPERM; goto out_fput; } + + /* We're not controlling the target namespace. */ + if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_fput; + } + kattr->mnt_userns = get_user_ns(mnt_userns); out_fput: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dbab3caa15ed..5d6c2ddc7ea6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2382,7 +2382,8 @@ static void nfs_dentry_remove_handle_error(struct inode *dir, { switch (error) { case -ENOENT: - d_delete(dentry); + if (d_really_is_positive(dentry)) + d_delete(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; case 0: @@ -2484,8 +2485,10 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) */ error = -ETXTBSY; if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) || - WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) + WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) { + spin_unlock(&dentry->d_lock); goto out; + } if (dentry->d_fsdata) /* old devname */ kfree(dentry->d_fsdata); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d2bcd4834c0e..e032fe201a36 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync) int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); + struct nfs_inode *nfsi = NFS_I(inode); + long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); + long nredirtied; int ret; trace_nfs_fsync_enter(inode); @@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) ret = pnfs_sync_inode(inode, !!datasync); if (ret != 0) break; - if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) + nredirtied = atomic_long_read(&nfsi->redirtied_pages); + if (nredirtied == save_nredirtied) break; - /* - * If nfs_file_fsync_commit detected a server reboot, then - * resend all dirty pages that might have been covered by - * the NFS_CONTEXT_RESEND_WRITES flag - */ - start = 0; - end = LLONG_MAX; + save_nredirtied = nredirtied; } trace_nfs_fsync_exit(inode, ret); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b4e46b0ffa2d..bea7c005119c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) static void nfs_inode_init_regular(struct nfs_inode *nfsi) { atomic_long_set(&nfsi->nrequests, 0); + atomic_long_set(&nfsi->redirtied_pages, 0); INIT_LIST_HEAD(&nfsi->commit_info.list); atomic_long_set(&nfsi->commit_info.ncommit, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e88f6b18445e..9eb181287879 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out; } + if (!S_ISREG(fattr->mode)) { + res = ERR_PTR(-EBADF); + goto out; + } + res = ERR_PTR(-ENOMEM); len = strlen(SSC_READ_NAME_BODY) + 16; read_name = kzalloc(len, GFP_KERNEL); @@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, r_ino->i_fop); if (IS_ERR(filep)) { res = ERR_CAST(filep); + iput(r_ino); goto out_free_name; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 41a9b6b58fb9..2613b7e36eb9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2817,7 +2817,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); - set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 51a7e202d6e5..1843fa235d9b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, */ static void nfs_redirty_request(struct nfs_page *req) { + struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); + /* Bump the transmission count */ req->wb_nio++; nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&nfsi->redirtied_pages); nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1904,7 +1906,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk_cont(" mismatch\n"); nfs_mark_request_dirty(req); - set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); + atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); next: nfs_unlock_and_release_request(req); /* Latency breaker */ diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 5bdff12a1232..6ae1f56b7358 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -483,8 +483,7 @@ out: } #ifdef CONFIG_NTFS3_FS_POSIX_ACL -static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, - struct inode *inode, int type, +static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type, int locked) { struct ntfs_inode *ni = ntfs_i(inode); @@ -519,7 +518,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, /* Translate extended attribute to acl. */ if (err >= 0) { - acl = posix_acl_from_xattr(mnt_userns, buf, err); + acl = posix_acl_from_xattr(&init_user_ns, buf, err); } else if (err == -ENODATA) { acl = NULL; } else { @@ -542,8 +541,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) if (rcu) return ERR_PTR(-ECHILD); - /* TODO: init_user_ns? */ - return ntfs_get_acl_ex(&init_user_ns, inode, type, 0); + return ntfs_get_acl_ex(inode, type, 0); } static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, @@ -595,7 +593,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, value = kmalloc(size, GFP_NOFS); if (!value) return -ENOMEM; - err = posix_acl_to_xattr(mnt_userns, acl, value, size); + err = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (err < 0) goto out; flags = 0; @@ -646,7 +644,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns, if (!acl) return -ENODATA; - err = posix_acl_to_xattr(mnt_userns, acl, buffer, size); + err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return err; @@ -670,12 +668,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns, if (!value) { acl = NULL; } else { - acl = posix_acl_from_xattr(mnt_userns, value, size); + acl = posix_acl_from_xattr(&init_user_ns, value, size); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { - err = posix_acl_valid(mnt_userns, acl); + err = posix_acl_valid(&init_user_ns, acl); if (err) goto release_and_out; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 801e60bab955..c28bc983a7b1 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb, ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres); ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres); - ocfs2_cluster_disconnect(osb->cconn, hangup_pending); - osb->cconn = NULL; + if (osb->cconn) { + ocfs2_cluster_disconnect(osb->cconn, hangup_pending); + osb->cconn = NULL; - ocfs2_dlm_shutdown_debug(osb); + ocfs2_dlm_shutdown_debug(osb); + } } static int ocfs2_drop_lock(struct ocfs2_super *osb, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 013a727bd7c8..e2cc9eec287c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) !ocfs2_is_hard_readonly(osb)) hangup_needed = 1; - if (osb->cconn) - ocfs2_dlm_shutdown(osb, hangup_needed); + ocfs2_dlm_shutdown(osb, hangup_needed); ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); debugfs_remove_recursive(osb->osb_debug_root); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b45fea69fff3..0fbcb590af84 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -460,9 +460,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) * of the POSIX ACLs retrieved from the lower layer to this function to not * alter the POSIX ACLs for the underlying filesystem. */ -static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns, +static void ovl_idmap_posix_acl(struct inode *realinode, + struct user_namespace *mnt_userns, struct posix_acl *acl) { + struct user_namespace *fs_userns = i_user_ns(realinode); + for (unsigned int i = 0; i < acl->a_count; i++) { vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -470,11 +473,11 @@ static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns, struct posix_acl_entry *e = &acl->a_entries[i]; switch (e->e_tag) { case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid); + vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid); e->e_uid = vfsuid_into_kuid(vfsuid); break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid); + vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid); e->e_gid = vfsgid_into_kgid(vfsgid); break; } @@ -536,7 +539,7 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) if (!clone) clone = ERR_PTR(-ENOMEM); else - ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone); + ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone); /* * Since we're not in RCU path walk we always need to release the * original ACLs. diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 1d17d7b13dcd..5af33800743e 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -361,6 +361,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, const struct posix_acl *acl, int want) { const struct posix_acl_entry *pa, *pe, *mask_obj; + struct user_namespace *fs_userns = i_user_ns(inode); int found = 0; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -376,7 +377,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, goto check_perm; break; case ACL_USER: - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, + vfsuid = make_vfsuid(mnt_userns, fs_userns, pa->e_uid); if (vfsuid_eq_kuid(vfsuid, current_fsuid())) goto mask; @@ -390,7 +391,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, } break; case ACL_GROUP: - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, + vfsgid = make_vfsgid(mnt_userns, fs_userns, pa->e_gid); if (vfsgid_in_group_p(vfsgid)) { found = 1; @@ -736,6 +737,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, { struct posix_acl_xattr_header *header = value; struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; + struct user_namespace *fs_userns = i_user_ns(inode); int count; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -753,13 +755,13 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, switch (le16_to_cpu(entry->e_tag)) { case ACL_USER: uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid); + vfsuid = make_vfsuid(mnt_userns, fs_userns, uid); entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, vfsuid_into_kuid(vfsuid))); break; case ACL_GROUP: gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); - vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid); + vfsgid = make_vfsgid(mnt_userns, fs_userns, gid); entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, vfsgid_into_kgid(vfsgid))); break; @@ -775,6 +777,7 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, { struct posix_acl_xattr_header *header = value; struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; + struct user_namespace *fs_userns = i_user_ns(inode); int count; vfsuid_t vfsuid; vfsgid_t vfsgid; @@ -793,13 +796,13 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns, case ACL_USER: uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); vfsuid = VFSUIDT_INIT(uid); - uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid); + uid = from_vfsuid(mnt_userns, fs_userns, vfsuid); entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid)); break; case ACL_GROUP: gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id)); vfsgid = VFSGIDT_INIT(gid); - gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid); + gid = from_vfsgid(mnt_userns, fs_userns, vfsgid); entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid)); break; default: diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a3398d0f1927..4e0023643f8b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -527,10 +527,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; - bool migration = false; + bool migration = false, young = false, dirty = false; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); + young = pte_young(*pte); + dirty = pte_dirty(*pte); } else if (is_swap_pte(*pte)) { swp_entry_t swpent = pte_to_swp_entry(*pte); @@ -560,8 +562,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), - locked, migration); + smaps_account(mss, page, false, young, dirty, locked, migration); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 98e64fec75b7..e56510964b22 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -593,7 +593,7 @@ static void squashfs_readahead(struct readahead_control *ractl) res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - kfree(actor); + squashfs_page_actor_free(actor); if (res == expected) { int bytes; diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index be4b12d31e0c..f1ccad519e28 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -74,7 +74,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize, /* Decompress directly into the page cache buffers */ res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - kfree(actor); + squashfs_page_actor_free(actor); if (res < 0) goto mark_errored; diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index b23b780d8f42..54b93bf4a25c 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -52,6 +52,7 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, actor->buffer = buffer; actor->pages = pages; actor->next_page = 0; + actor->tmp_buffer = NULL; actor->squashfs_first_page = cache_first_page; actor->squashfs_next_page = cache_next_page; actor->squashfs_finish_page = cache_finish_page; @@ -68,20 +69,9 @@ static void *handle_next_page(struct squashfs_page_actor *actor) if ((actor->next_page == actor->pages) || (actor->next_index != actor->page[actor->next_page]->index)) { - if (actor->alloc_buffer) { - void *tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); - - if (tmp_buffer) { - actor->tmp_buffer = tmp_buffer; - actor->next_index++; - actor->returned_pages++; - return tmp_buffer; - } - } - actor->next_index++; actor->returned_pages++; - return ERR_PTR(-ENOMEM); + return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM); } actor->next_index++; @@ -96,11 +86,10 @@ static void *direct_first_page(struct squashfs_page_actor *actor) static void *direct_next_page(struct squashfs_page_actor *actor) { - if (actor->pageaddr) + if (actor->pageaddr) { kunmap_local(actor->pageaddr); - - kfree(actor->tmp_buffer); - actor->pageaddr = actor->tmp_buffer = NULL; + actor->pageaddr = NULL; + } return handle_next_page(actor); } @@ -109,8 +98,6 @@ static void direct_finish_page(struct squashfs_page_actor *actor) { if (actor->pageaddr) kunmap_local(actor->pageaddr); - - kfree(actor->tmp_buffer); } struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk, @@ -121,6 +108,16 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_ if (actor == NULL) return NULL; + if (msblk->decompressor->alloc_buffer) { + actor->tmp_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + + if (actor->tmp_buffer == NULL) { + kfree(actor); + return NULL; + } + } else + actor->tmp_buffer = NULL; + actor->length = length ? : pages * PAGE_SIZE; actor->page = page; actor->pages = pages; @@ -128,7 +125,6 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_ actor->returned_pages = 0; actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1); actor->pageaddr = NULL; - actor->tmp_buffer = NULL; actor->alloc_buffer = msblk->decompressor->alloc_buffer; actor->squashfs_first_page = direct_first_page; actor->squashfs_next_page = direct_next_page; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 24841d28bc0f..95ffbb543d91 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -29,6 +29,11 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, extern struct squashfs_page_actor *squashfs_page_actor_init_special( struct squashfs_sb_info *msblk, struct page **page, int pages, int length); +static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor) +{ + kfree(actor->tmp_buffer); + kfree(actor); +} static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { return actor->squashfs_first_page(actor); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 81d26abf486f..da85b3979195 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -141,6 +141,8 @@ struct tracefs_mount_opts { kuid_t uid; kgid_t gid; umode_t mode; + /* Opt_* bitfield. */ + unsigned int opts; }; enum { @@ -241,6 +243,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) kgid_t gid; char *p; + opts->opts = 0; opts->mode = TRACEFS_DEFAULT_MODE; while ((p = strsep(&data, ",")) != NULL) { @@ -275,24 +278,36 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) * but traditionally tracefs has ignored all mount options */ } + + opts->opts |= BIT(token); } return 0; } -static int tracefs_apply_options(struct super_block *sb) +static int tracefs_apply_options(struct super_block *sb, bool remount) { struct tracefs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); struct tracefs_mount_opts *opts = &fsi->mount_opts; - inode->i_mode &= ~S_IALLUGO; - inode->i_mode |= opts->mode; + /* + * On remount, only reset mode/uid/gid if they were provided as mount + * options. + */ + + if (!remount || opts->opts & BIT(Opt_mode)) { + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= opts->mode; + } - inode->i_uid = opts->uid; + if (!remount || opts->opts & BIT(Opt_uid)) + inode->i_uid = opts->uid; - /* Set all the group ids to the mount option */ - set_gid(sb->s_root, opts->gid); + if (!remount || opts->opts & BIT(Opt_gid)) { + /* Set all the group ids to the mount option */ + set_gid(sb->s_root, opts->gid); + } return 0; } @@ -307,7 +322,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data) if (err) goto fail; - tracefs_apply_options(sb); + tracefs_apply_options(sb, true); fail: return err; @@ -359,7 +374,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &tracefs_super_operations; - tracefs_apply_options(sb); + tracefs_apply_options(sb, false); return 0; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1c44bf75f916..175de70e3adf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range); } + /* Reset ptes for the whole vma range if wr-protected */ + if (userfaultfd_wp(vma)) + uffd_wp_range(mm, vma, start, vma_end - start, false); + new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, |