From 1c2150283cae895526d0db3953d13d139f4e7a03 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 29 Aug 2014 20:52:15 -0400 Subject: ext4: convert ext4_bread() to use the ERR_PTR convention Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0b28b36e7915..896e452b739d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5305,7 +5305,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, { struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; int offset = off & (sb->s_blocksize - 1); int tocopy; size_t toread; @@ -5320,9 +5319,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, while (toread > 0) { tocopy = sb->s_blocksize - offset < toread ? sb->s_blocksize - offset : toread; - bh = ext4_bread(NULL, inode, blk, 0, &err); - if (err) - return err; + bh = ext4_bread(NULL, inode, blk, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) /* A hole? */ memset(data, 0, tocopy); else @@ -5343,8 +5342,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, { struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; - int offset = off & (sb->s_blocksize - 1); + int err, offset = off & (sb->s_blocksize - 1); struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -5365,14 +5363,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, return -EIO; } - bh = ext4_bread(handle, inode, blk, 1, &err); + bh = ext4_bread(handle, inode, blk, 1); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) goto out; BUFFER_TRACE(bh, "get write access"); err = ext4_journal_get_write_access(handle, bh); if (err) { brelse(bh); - goto out; + return err; } lock_buffer(bh); memcpy(bh->b_data+offset, data, len); @@ -5381,8 +5381,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); out: - if (err) - return err; if (inode->i_size < off + len) { i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; -- cgit From 45f1a9c3f63db3d4562c16062a51740801fbd88c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 1 Sep 2014 21:34:09 -0400 Subject: ext4: enable block_validity by default Enable by default the block_validity feature, which checks for collisions between newly allocated blocks and critical system metadata. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 896e452b739d..7194a51eb217 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3519,8 +3519,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sb, ERRORS_CONT); else set_opt(sb, ERRORS_RO); - if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) - set_opt(sb, BLOCK_VALIDITY); + /* block_validity enabled by default; disable with noblock_validity */ + set_opt(sb, BLOCK_VALIDITY); if (def_mount_opts & EXT4_DEFM_DISCARD) set_opt(sb, DISCARD); -- cgit From eb68d0e2fc5a4e5c06324ea5f485fccbae626d05 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 1 Sep 2014 22:26:49 -0400 Subject: ext4: track extent status tree shrinker delay statictics This commit adds some statictics in extent status tree shrinker. The purpose to add these is that we want to collect more details when we encounter a stall caused by extent status tree shrinker. Here we count the following statictics: stats: the number of all objects on all extent status trees the number of reclaimable objects on lru list cache hits/misses the last sorted interval the number of inodes on lru list average: scan time for shrinking some objects the number of shrunk objects maximum: the inode that has max nr. of objects on lru list the maximum scan time for shrinking some objects The output looks like below: $ cat /proc/fs/ext4/sda1/es_shrinker_info stats: 28228 objects 6341 reclaimable objects 5281/631 cache hits/misses 586 ms last sorted interval 250 inodes on lru list average: 153 us scan time 128 shrunk objects maximum: 255 inode (255 objects, 198 reclaimable) 125723 us max scan time If the lru list has never been sorted, the following line will not be printed: 586ms last sorted interval If there is an empty lru list, the following lines also will not be printed: 250 inodes on lru list ... maximum: 255 inode (255 objects, 198 reclaimable) 0 us max scan time Meanwhile in this commit a new trace point is defined to print some details in __ext4_es_shrink(). Cc: Andreas Dilger Cc: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Zheng Liu Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7194a51eb217..487c65b8cff0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -820,7 +820,6 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -885,6 +884,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ext4_es_init_tree(&ei->i_es_tree); rwlock_init(&ei->i_es_lock); INIT_LIST_HEAD(&ei->i_es_lru); + ei->i_es_all_nr = 0; ei->i_es_lru_nr = 0; ei->i_touch_when = 0; ei->i_reserved_data_blocks = 0; @@ -3890,12 +3890,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_err_report.data = (unsigned long) sb; /* Register extent status tree shrinker */ - ext4_es_register_shrinker(sbi); - - if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) { - ext4_msg(sb, KERN_ERR, "insufficient memory"); + if (ext4_es_register_shrinker(sbi)) goto failed_mount3; - } sbi->s_stripe = ext4_get_stripe_size(sbi); sbi->s_extent_max_zeroout_kb = 32; @@ -4225,10 +4221,9 @@ failed_mount_wq: jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } -failed_mount3: ext4_es_unregister_shrinker(sbi); +failed_mount3: del_timer_sync(&sbi->s_err_report); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: -- cgit From a8ac900b8163703340a2fdad11c32f96b8fe686d Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Thu, 4 Sep 2014 22:36:15 -0400 Subject: ext4: use non-movable memory for the ext4 superblock Since the ext4 superblock is not released until the file system is unmounted, allocate the buffer cache entry for the ext4 superblock out of the non-moveable are to allow page migrations and thus CMA allocations to more easily succeed if the CMA area is limited. Signed-off-by: Gioh Kim Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 487c65b8cff0..4b81747b3a80 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3436,7 +3436,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) logical_sb_block = sb_block; } - if (!(bh = sb_bread(sb, logical_sb_block))) { + if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { ext4_msg(sb, KERN_ERR, "unable to read superblock"); goto out_fail; } @@ -3646,7 +3646,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); - bh = sb_bread(sb, logical_sb_block); + bh = sb_bread_unmovable(sb, logical_sb_block); if (!bh) { ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try"); @@ -3868,7 +3868,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread(sb, block); + sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); if (!sbi->s_group_desc[i]) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); -- cgit From a2d4a646e619541e803fb52636964df39aed94b7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 11 Sep 2014 11:15:15 -0400 Subject: ext4: don't use MAXQUOTAS value MAXQUOTAS value defines maximum number of quota types VFS supports. This isn't necessarily the number of types ext4 supports. Although ext4 will support project quotas, use ext4 private definition for consistency with other filesystems. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4b81747b3a80..a318a2dfc79e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -822,7 +822,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_dirtyclusters_counter); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); #endif @@ -2207,7 +2207,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, /* Needed for iput() to work correctly and not trash data */ sb->s_flags |= MS_ACTIVE; /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); if (ret < 0) @@ -2263,7 +2263,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA /* Turn quotas off */ - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (sb_dqopt(sb)->files[i]) dquot_quota_off(sb, i); } @@ -4238,7 +4238,7 @@ failed_mount: remove_proc_entry(sb->s_id, ext4_proc_root); } #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); #endif ext4_blkdev_remove(sbi); @@ -4765,7 +4765,7 @@ struct ext4_mount_options { u32 s_min_batch_time, s_max_batch_time; #ifdef CONFIG_QUOTA int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; + char *s_qf_names[EXT4_MAXQUOTAS]; #endif }; @@ -4795,7 +4795,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_max_batch_time = sbi->s_max_batch_time; #ifdef CONFIG_QUOTA old_opts.s_jquota_fmt = sbi->s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) if (sbi->s_qf_names[i]) { old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], GFP_KERNEL); @@ -4956,7 +4956,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #ifdef CONFIG_QUOTA /* Release old quota file names */ - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(old_opts.s_qf_names[i]); if (enable_quota) { if (sb_any_quota_suspended(sb)) @@ -4985,7 +4985,7 @@ restore_opts: sbi->s_max_batch_time = old_opts.s_max_batch_time; #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { kfree(sbi->s_qf_names[i]); sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } @@ -5188,7 +5188,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, { int err; struct inode *qf_inode; - unsigned long qf_inums[MAXQUOTAS] = { + unsigned long qf_inums[EXT4_MAXQUOTAS] = { le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; @@ -5216,13 +5216,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, static int ext4_enable_quotas(struct super_block *sb) { int type, err = 0; - unsigned long qf_inums[MAXQUOTAS] = { + unsigned long qf_inums[EXT4_MAXQUOTAS] = { le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; - for (type = 0; type < MAXQUOTAS; type++) { + for (type = 0; type < EXT4_MAXQUOTAS; type++) { if (qf_inums[type]) { err = ext4_quota_enable(sb, type, QFMT_VFS_V1, DQUOT_USAGE_ENABLED); -- cgit From 52c198c6820f68b6fbe1d83f76e34a82bf736024 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 11 Sep 2014 11:18:13 -0400 Subject: ext4: add sysfs entry showing whether the fs contains errors Currently there is no easy way to tell that the mounted file system contains errors other than checking for log messages, or reading the information directly from superblock. This patch adds new sysfs entries: errors_count (number of fs errors we encounter) first_error_time (unix timestamp for the first error we see) last_error_time (unix timestamp for the last error we see) If the file system is not marked as containing errors then any of the file will return 0. Otherwise it will contain valid information. More details about the errors should as always be found in the logs. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a318a2dfc79e..ff889e1f11d5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2548,6 +2548,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, return count; } +static ssize_t es_ui_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + + unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + + a->u.offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + static ssize_t reserved_clusters_show(struct ext4_attr *a, struct ext4_sb_info *sbi, char *buf) { @@ -2601,14 +2611,29 @@ static struct ext4_attr ext4_attr_##_name = { \ .offset = offsetof(struct ext4_sb_info, _elname),\ }, \ } + +#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .u = { \ + .offset = offsetof(struct ext4_super_block, _elname), \ + }, \ +} + #define EXT4_ATTR(name, mode, show, store) \ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) + +#define EXT4_RO_ATTR_ES_UI(name, elname) \ + EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) #define EXT4_RW_ATTR_SBI_UI(name, elname) \ EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) + #define ATTR_LIST(name) &ext4_attr_##name.attr #define EXT4_DEPRECATED_ATTR(_name, _val) \ static struct ext4_attr ext4_attr_##_name = { \ @@ -2641,6 +2666,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); +EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); +EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); +EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), @@ -2664,6 +2692,9 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(warning_ratelimit_burst), ATTR_LIST(msg_ratelimit_interval_ms), ATTR_LIST(msg_ratelimit_burst), + ATTR_LIST(errors_count), + ATTR_LIST(first_error_time), + ATTR_LIST(last_error_time), NULL, }; -- cgit From c7f725435adcf2ade4b9152ee33339d28f4cc330 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 11 Sep 2014 11:27:58 -0400 Subject: ext4: provide separate operations for sysfs feature files Currently sysfs feature files uses ext4_attr_ops as the file operations to show/store data. However the feature files is not supposed to contain any data at all, the sole existence of the file means that the module support the feature. Moreover, none of the sysfs feature attributes actually register show/store functions so that would not be a problem. However if a sysfs feature attribute register a show or store function we might be in trouble because the kobject in this case is _not_ embedded in the ext4_sb_info structure as ext4_attr_show/store expect. So just to be safe, provide separate empty sysfs_ops to use in ext4_feat_ktype. This might safe us from potential problems in the future. As a bonus we can "store" something more descriptive than nothing in the files, so let it contain "enabled" to make it clear that the feature is really present in the module. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ff889e1f11d5..2766b8eba121 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2754,9 +2754,25 @@ static void ext4_feat_release(struct kobject *kobj) complete(&ext4_feat->f_kobj_unregister); } +static ssize_t ext4_feat_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "supported\n"); +} + +/* + * We can not use ext4_attr_show/store because it relies on the kobject + * being embedded in the ext4_sb_info structure which is definitely not + * true in this case. + */ +static const struct sysfs_ops ext4_feat_ops = { + .show = ext4_feat_show, + .store = NULL, +}; + static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, - .sysfs_ops = &ext4_attr_ops, + .sysfs_ops = &ext4_feat_ops, .release = ext4_feat_release, }; -- cgit From feb8c6d3dd0f2cc0e1c3376d099cf298c5f2c2c8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 11 Sep 2014 11:38:21 -0400 Subject: jbd2: fix journal checksum feature flag handling Clear all three journal checksum feature flags before turning on whichever journal checksum options we want. Rearrange the error checking so that newer flags get complained about first. Reported-by: TR Reardon Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2766b8eba121..fb219b95f8d2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3237,6 +3237,10 @@ static int set_journal_csum_feature_set(struct super_block *sb) incompat = 0; } + jbd2_journal_clear_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_CSUM_V3 | + JBD2_FEATURE_INCOMPAT_CSUM_V2); if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ret = jbd2_journal_set_features(sbi->s_journal, compat, 0, @@ -3249,11 +3253,8 @@ static int set_journal_csum_feature_set(struct super_block *sb) jbd2_journal_clear_features(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); } else { - jbd2_journal_clear_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | - JBD2_FEATURE_INCOMPAT_CSUM_V3 | - JBD2_FEATURE_INCOMPAT_CSUM_V2); + jbd2_journal_clear_features(sbi->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); } return ret; -- cgit From df4763bea5b04d8eed941cfe3df51f22cfe95570 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 11 Sep 2014 11:44:36 -0400 Subject: ext4: validate external journal superblock checksum If the external journal device has metadata_csum enabled, verify that the superblock checksum matches the block before we try to mount. Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fb219b95f8d2..263201793c65 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4414,6 +4414,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, goto out_bdev; } + if ((le32_to_cpu(es->s_feature_ro_compat) & + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + es->s_checksum != ext4_superblock_csum(sb, es)) { + ext4_msg(sb, KERN_ERR, "external journal has " + "corrupt superblock"); + brelse(bh); + goto out_bdev; + } + if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { ext4_msg(sb, KERN_ERR, "journal UUID does not match"); brelse(bh); -- cgit From 844749764b416ee2c4ba2da328c04eaad7388242 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 16 Sep 2014 14:52:03 -0400 Subject: ext4: explicitly inform user about orphan list cleanup Production fs likely compiled/mounted w/o jbd debugging, so orphan list clearing will be silent. Signed-off-by: Dmitry Monakhov Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 263201793c65..028935fc2760 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2191,7 +2191,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { /* don't clear list on RO mount w/ errors */ if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { - jbd_debug(1, "Errors on filesystem, " + ext4_msg(sb, KERN_INFO, "Errors on filesystem, " "clearing orphan list.\n"); es->s_last_orphan = 0; } -- cgit From 279bf6d390933d5353ab298fcc306c391a961469 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 Sep 2014 01:12:15 -0400 Subject: ext4: don't check quota format when there are no quota files The check whether quota format is set even though there are no quota files with journalled quota is pointless and it actually makes it impossible to turn off journalled quotas (as there's no way to unset journalled quota format). Just remove the check. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 028935fc2760..115e27d855ef 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1712,13 +1712,6 @@ static int parse_options(char *options, struct super_block *sb, "not specified"); return 0; } - } else { - if (sbi->s_jquota_fmt) { - ext4_msg(sb, KERN_ERR, "journaled quota format " - "specified with no journaling " - "enabled"); - return 0; - } } #endif if (test_opt(sb, DIOREAD_NOLOCK)) { -- cgit From bda3253043c54a705c8352096194ab6216e2e5c1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 18 Sep 2014 16:12:37 -0400 Subject: ext4: fold ext4_sync_fs_nojournal() into ext4_sync_fs() This allows us to eliminate duplicate code, and eventually allow us to also fold ext4_sops and ext4_nojournal_sops together. Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 115e27d855ef..4770c98bdb61 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb, static void ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); -static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); @@ -1131,7 +1130,7 @@ static const struct super_operations ext4_nojournal_sops = { .dirty_inode = ext4_dirty_inode, .drop_inode = ext4_drop_inode, .evict_inode = ext4_evict_inode, - .sync_fs = ext4_sync_fs_nojournal, + .sync_fs = ext4_sync_fs, .put_super = ext4_put_super, .statfs = ext4_statfs, .remount_fs = ext4_remount, @@ -4718,15 +4717,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait) * being sent at the end of the function. But we can skip it if * transaction_commit will do it for us. */ - target = jbd2_get_latest_transaction(sbi->s_journal); - if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && - !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) + if (sbi->s_journal) { + target = jbd2_get_latest_transaction(sbi->s_journal); + if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && + !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) + needs_barrier = true; + + if (jbd2_journal_start_commit(sbi->s_journal, &target)) { + if (wait) + ret = jbd2_log_wait_commit(sbi->s_journal, + target); + } + } else if (wait && test_opt(sb, BARRIER)) needs_barrier = true; - - if (jbd2_journal_start_commit(sbi->s_journal, &target)) { - if (wait) - ret = jbd2_log_wait_commit(sbi->s_journal, target); - } if (needs_barrier) { int err; err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); @@ -4737,19 +4740,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait) return ret; } -static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) -{ - int ret = 0; - - trace_ext4_sync_fs(sb, wait); - flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); - dquot_writeback_dquots(sb, -1); - if (wait && test_opt(sb, BARRIER)) - ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); - - return ret; -} - /* * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. -- cgit From bb0445765866e5b1607af81e2f48ca5a8efbeed8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 18 Sep 2014 17:12:02 -0400 Subject: ext4: support freezing ext2 (nojournal) file systems Through an oversight, when we added nojournal support to ext4, we didn't add support to allow file system freezing. This is relatively easy to add, so let's do it. Signed-off-by: Theodore Ts'o Reported-by: Dexuan Cui --- fs/ext4/super.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4770c98bdb61..4db537b3a162 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1131,6 +1131,8 @@ static const struct super_operations ext4_nojournal_sops = { .drop_inode = ext4_drop_inode, .evict_inode = ext4_evict_inode, .sync_fs = ext4_sync_fs, + .freeze_fs = ext4_freeze, + .unfreeze_fs = ext4_unfreeze, .put_super = ext4_put_super, .statfs = ext4_statfs, .remount_fs = ext4_remount, @@ -4758,23 +4760,26 @@ static int ext4_freeze(struct super_block *sb) journal = EXT4_SB(sb)->s_journal; - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); + if (journal) { + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); - /* - * Don't clear the needs_recovery flag if we failed to flush - * the journal. - */ - error = jbd2_journal_flush(journal); - if (error < 0) - goto out; + /* + * Don't clear the needs_recovery flag if we failed to + * flush the journal. + */ + error = jbd2_journal_flush(journal); + if (error < 0) + goto out; + } /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); out: - /* we rely on upper layer to stop further updates */ - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (journal) + /* we rely on upper layer to stop further updates */ + jbd2_journal_unlock_updates(journal); return error; } -- cgit From f6e63f90809946d410c42045577cb159fedabf8c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 18 Sep 2014 17:12:30 -0400 Subject: ext4: fold ext4_nojournal_sops into ext4_sops There's no longer any need to have a separate set of super_operations for nojournal mode. Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4db537b3a162..1070d6e521c6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1123,27 +1123,6 @@ static const struct super_operations ext4_sops = { .bdev_try_to_free_page = bdev_try_to_free_page, }; -static const struct super_operations ext4_nojournal_sops = { - .alloc_inode = ext4_alloc_inode, - .destroy_inode = ext4_destroy_inode, - .write_inode = ext4_write_inode, - .dirty_inode = ext4_dirty_inode, - .drop_inode = ext4_drop_inode, - .evict_inode = ext4_evict_inode, - .sync_fs = ext4_sync_fs, - .freeze_fs = ext4_freeze, - .unfreeze_fs = ext4_unfreeze, - .put_super = ext4_put_super, - .statfs = ext4_statfs, - .remount_fs = ext4_remount, - .show_options = ext4_show_options, -#ifdef CONFIG_QUOTA - .quota_read = ext4_quota_read, - .quota_write = ext4_quota_write, -#endif - .bdev_try_to_free_page = bdev_try_to_free_page, -}; - static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, @@ -3941,11 +3920,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* * set up enough so that it can read an inode */ - if (!test_opt(sb, NOLOAD) && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) - sb->s_op = &ext4_sops; - else - sb->s_op = &ext4_nojournal_sops; + sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA -- cgit From f4bb2981024fc91b23b4d09a8817c415396dbabb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 5 Oct 2014 22:56:00 -0400 Subject: ext4: add ext4_iget_normal() which is to be used for dir tree lookups If there is a corrupted file system which has directory entries that point at reserved, metadata inodes, prohibit them from being used by treating them the same way we treat Boot Loader inodes --- that is, mark them to be bad inodes. This prohibits them from being opened, deleted, or modified via chmod, chown, utimes, etc. In particular, this prevents a corrupted file system which has a directory entry which points at the journal inode from being deleted and its blocks released, after which point Much Hilarity Ensues. Reported-by: Sami Liedes Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1070d6e521c6..a0811cc00c91 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1001,7 +1001,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = ext4_iget(sb, ino); + inode = ext4_iget_normal(sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); if (generation && inode->i_generation != generation) { -- cgit From 9aa5d32ba269bec0e7eaba2697a986a7b0bc8528 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 13 Oct 2014 03:36:16 -0400 Subject: ext4: Replace open coded mdata csum feature to helper function Besides the fact that this replacement improves code readability it also protects from errors caused direct EXT4_S(sb)->s_es manipulation which may result attempt to use uninitialized csum machinery. #Testcase_BEGIN IMG=/dev/ram0 MNT=/mnt mkfs.ext4 $IMG mount $IMG $MNT #Enable feature directly on disk, on mounted fs tune2fs -O metadata_csum $IMG # Provoke metadata update, likey result in OOPS touch $MNT/test umount $MNT #Testcase_END # Replacement script @@ expression E; @@ - EXT4_HAS_RO_COMPAT_FEATURE(E, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) + ext4_has_metadata_csum(E) https://bugzilla.kernel.org/show_bug.cgi?id=82201 Signed-off-by: Dmitry Monakhov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/super.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a0811cc00c91..5afe42db303c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -140,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb, static int ext4_superblock_csum_verify(struct super_block *sb, struct ext4_super_block *es) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; return es->s_checksum == ext4_superblock_csum(sb, es); @@ -151,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; es->s_checksum = ext4_superblock_csum(sb, es); @@ -1989,8 +1987,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, __u16 crc = 0; __le32 le_group = cpu_to_le32(block_group); - if ((sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { + if (ext4_has_metadata_csum(sbi->s_sb)) { /* Use new metadata_csum algorithm */ __le16 save_csum; __u32 csum32; @@ -3199,8 +3196,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) int compat, incompat; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(sb)) { /* journal checksum v3 */ compat = 0; incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; @@ -3508,8 +3504,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Precompute checksum seed for all metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); -- cgit From 813d32f91333e4c33d5a19b67167c4bae42dae75 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 14 Oct 2014 02:35:49 -0400 Subject: ext4: check s_chksum_driver when looking for bg csum presence Convert the ext4_has_group_desc_csum predicate to look for a checksum driver instead of the metadata_csum flag and change the bg checksum calculation function to look for GDT_CSUM before taking the crc16 path. Without this patch, if we mount with ^uninit_bg,^metadata_csum and later metadata_csum gets turned on by accident, the block group checksum functions will incorrectly assume that checksumming is enabled (metadata_csum) but that crc16 should be used (!s_chksum_driver). This is totally wrong, so fix the predicate and the checksum formula selection. (Granted, if the metadata_csum feature bit gets enabled on a live FS then something underhanded is going on, but we could at least avoid writing garbage into the on-disk fields.) Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Reviewed-by: Dmitry Monakhov Cc: stable@vger.kernel.org --- fs/ext4/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5afe42db303c..e96b6ecc8d34 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2005,6 +2005,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, } /* old crc16 code */ + if (!(sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) + return 0; + offset = offsetof(struct ext4_group_desc, bg_checksum); crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); -- cgit