From 66e61a9e9504f61b9a928c9055368c81da613a50 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 27 Jul 2010 11:56:04 -0400 Subject: [PATCH] ext4: Once a day, printk file system error information to dmesg This allows us to grab any file system error messages by scraping /var/log/messages. This will make it easy for us to do error analysis across the very large number of machines as we deploy ext4 across the fleet. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ fs/ext4/super.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6b96125..5d3d768 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1166,6 +1166,9 @@ struct ext4_sb_info { /* workqueue for dio unwritten */ struct workqueue_struct *dio_unwritten_wq; + + /* timer for periodic error stats printing */ + struct timer_list s_err_report; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a94d3f5..ed00c14 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -325,6 +325,12 @@ static void __save_error_info(struct super_block *sb, const char *func, es->s_first_error_ino = es->s_last_error_ino; es->s_first_error_block = es->s_last_error_block; } + /* + * Start the daily error reporting function if it hasn't been + * started already + */ + if (!es->s_error_count) + mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); } @@ -2480,6 +2486,53 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 1; } +/* + * This function is called once a day if we have errors logged + * on the file system + */ +static void print_daily_error_info(unsigned long arg) +{ + struct super_block *sb = (struct super_block *) arg; + struct ext4_sb_info *sbi; + struct ext4_super_block *es; + + sbi = EXT4_SB(sb); + es = sbi->s_es; + + if (es->s_error_count) + ext4_msg(sb, KERN_NOTICE, "error count: %u", + le32_to_cpu(es->s_error_count)); + if (es->s_first_error_time) { + printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", + sb->s_id, le32_to_cpu(es->s_first_error_time), + (int) sizeof(es->s_first_error_func), + es->s_first_error_func, + le32_to_cpu(es->s_first_error_line)); + if (es->s_first_error_ino) + printk(": inode %u", + le32_to_cpu(es->s_first_error_ino)); + if (es->s_first_error_block) + printk(": block %llu", (unsigned long long) + le64_to_cpu(es->s_first_error_block)); + printk("\n"); + } + if (es->s_last_error_time) { + printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", + sb->s_id, le32_to_cpu(es->s_last_error_time), + (int) sizeof(es->s_last_error_func), + es->s_last_error_func, + le32_to_cpu(es->s_last_error_line)); + if (es->s_last_error_ino) + printk(": inode %u", + le32_to_cpu(es->s_last_error_ino)); + if (es->s_last_error_block) + printk(": block %llu", (unsigned long long) + le64_to_cpu(es->s_last_error_block)); + printk("\n"); + } + mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) @@ -3083,6 +3136,12 @@ no_journal: ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " "Opts: %s", descr, orig_data); + init_timer(&sbi->s_err_report); + sbi->s_err_report.function = print_daily_error_info; + sbi->s_err_report.data = (unsigned long) sb; + if (es->s_error_count) + mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ + lock_kernel(); kfree(orig_data); return 0; -- 2.7.4