f2fs: add sysfs node to control ra_pages for fadvise seq file
authorDaeho Jeong <daehojeong@google.com>
Tue, 3 Aug 2021 04:22:45 +0000 (21:22 -0700)
committerJaegeuk Kim <jaegeuk@kernel.org>
Tue, 3 Aug 2021 18:16:22 +0000 (11:16 -0700)
fadvise() allows the user to expand the readahead window to double with
POSIX_FADV_SEQUENTIAL, now. But, in some use cases, it is not that
sufficient and we need to meet the need in a restricted way. We can
control the multiplier value of bdi device readahead between 2 (default)
and 256 for POSIX_FADV_SEQUENTIAL advise option.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Documentation/ABI/testing/sysfs-fs-f2fs
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/super.c
fs/f2fs/sysfs.c

index 845c4be..73211f7 100644 (file)
@@ -507,3 +507,9 @@ Date:               July 2021
 Contact:       "Daeho Jeong" <daehojeong@google.com>
 Description:   You can control for which gc mode the "gc_reclaimed_segments" node shows.
                Refer to the description of the modes in "gc_reclaimed_segments".
+
+What:          /sys/fs/f2fs/<disk>/seq_file_ra_mul
+Date:          July 2021
+Contact:       "Daeho Jeong" <daehojeong@google.com>
+Description:   You can control the multiplier value of bdi device readahead window size
+               between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
index 8d4665a..1b4c482 100644 (file)
@@ -1749,6 +1749,8 @@ struct f2fs_sb_info {
        unsigned int gc_segment_mode;           /* GC state for reclaimed segments */
        unsigned int gc_reclaimed_segs[MAX_GC_MODE];    /* Reclaimed segs for each mode */
 
+       unsigned long seq_file_ra_mul;          /* multiplier for ra_pages of seq. files in fadvise */
+
 #ifdef CONFIG_F2FS_FS_COMPRESSION
        struct kmem_cache *page_array_slab;     /* page array entry */
        unsigned int page_array_slab_size;      /* default page array slab size */
@@ -4003,6 +4005,9 @@ void f2fs_destroy_extent_cache(void);
 /*
  * sysfs.c
  */
+#define MIN_RA_MUL     2
+#define MAX_RA_MUL     256
+
 int __init f2fs_init_sysfs(void);
 void f2fs_exit_sysfs(void);
 int f2fs_register_sysfs(struct f2fs_sb_info *sbi);
index e931782..7d8ee60 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/nls.h>
 #include <linux/sched/signal.h>
 #include <linux/fileattr.h>
+#include <linux/fadvise.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -4344,6 +4345,34 @@ out:
        return ret;
 }
 
+static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
+               int advice)
+{
+       struct inode *inode;
+       struct address_space *mapping;
+       struct backing_dev_info *bdi;
+
+       if (advice == POSIX_FADV_SEQUENTIAL) {
+               inode = file_inode(filp);
+               if (S_ISFIFO(inode->i_mode))
+                       return -ESPIPE;
+
+               mapping = filp->f_mapping;
+               if (!mapping || len < 0)
+                       return -EINVAL;
+
+               bdi = inode_to_bdi(mapping->host);
+               filp->f_ra.ra_pages = bdi->ra_pages *
+                       F2FS_I_SB(inode)->seq_file_ra_mul;
+               spin_lock(&filp->f_lock);
+               filp->f_mode &= ~FMODE_RANDOM;
+               spin_unlock(&filp->f_lock);
+               return 0;
+       }
+
+       return generic_fadvise(filp, offset, len, advice);
+}
+
 #ifdef CONFIG_COMPAT
 struct compat_f2fs_gc_range {
        u32 sync;
@@ -4472,4 +4501,5 @@ const struct file_operations f2fs_file_operations = {
 #endif
        .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
+       .fadvise        = f2fs_file_fadvise,
 };
index a4fed18..84cd085 100644 (file)
@@ -3466,6 +3466,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
        sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
        sbi->migration_granularity = sbi->segs_per_sec;
+       sbi->seq_file_ra_mul = MIN_RA_MUL;
 
        sbi->dir_level = DEF_DIR_LEVEL;
        sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
index f98afd9..0954761 100644 (file)
@@ -540,6 +540,14 @@ out:
                return count;
        }
 
+       if (!strcmp(a->attr.name, "seq_file_ra_mul")) {
+               if (t >= MIN_RA_MUL && t <= MAX_RA_MUL)
+                       sbi->seq_file_ra_mul = t;
+               else
+                       return -EINVAL;
+               return count;
+       }
+
        *ui = (unsigned int)t;
 
        return count;
@@ -765,6 +773,7 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_candidate_count, max_candidate_cou
 F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_weight, age_weight);
 F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
 
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
 
@@ -840,6 +849,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(atgc_candidate_count),
        ATTR_LIST(atgc_age_weight),
        ATTR_LIST(atgc_age_threshold),
+       ATTR_LIST(seq_file_ra_mul),
        ATTR_LIST(gc_segment_mode),
        ATTR_LIST(gc_reclaimed_segments),
        NULL,