f2fs: let fstrim issue discard commands in lower priority
authorJaegeuk Kim <jaegeuk@kernel.org>
Thu, 24 May 2018 20:57:26 +0000 (13:57 -0700)
committerJaegeuk Kim <jaegeuk@kernel.org>
Tue, 29 May 2018 19:04:08 +0000 (12:04 -0700)
The fstrim gathers huge number of large discard commands, and tries to issue
without IO awareness, which results in long user-perceive IO latencies on
READ, WRITE, and FLUSH in UFS. We've observed some of commands take several
seconds due to long discard latency.

This patch limits the maximum size to 2MB per candidate, and check IO congestion
when issuing them to disk.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/f2fs.h
fs/f2fs/segment.c

index f32a0c7..cbfe50d 100644 (file)
@@ -183,6 +183,7 @@ enum {
                (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
 #define MAX_DISCARD_BLOCKS(sbi)                BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST                8       /* issue 8 discards per round */
+#define DEF_MAX_DISCARD_LEN            512     /* Max. 2MB per discard */
 #define DEF_MIN_DISCARD_ISSUE_TIME     50      /* 50 ms, if exists */
 #define DEF_MAX_DISCARD_ISSUE_TIME     60000   /* 60 s, if no candidates */
 #define DEF_CP_INTERVAL                        60      /* 60 secs */
@@ -694,7 +695,8 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
 static inline bool __is_discard_mergeable(struct discard_info *back,
                                                struct discard_info *front)
 {
-       return back->lstart + back->len == front->lstart;
+       return (back->lstart + back->len == front->lstart) &&
+               (back->len + front->len < DEF_MAX_DISCARD_LEN);
 }
 
 static inline bool __is_discard_back_mergeable(struct discard_info *cur,
index 1e365e9..6e40e53 100644 (file)
@@ -1130,68 +1130,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
        return 0;
 }
 
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
-                                       struct discard_policy *dpolicy,
-                                       unsigned int start, unsigned int end)
-{
-       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
-       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
-       struct rb_node **insert_p = NULL, *insert_parent = NULL;
-       struct discard_cmd *dc;
-       struct blk_plug plug;
-       int issued;
-
-next:
-       issued = 0;
-
-       mutex_lock(&dcc->cmd_lock);
-       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
-
-       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
-                                       NULL, start,
-                                       (struct rb_entry **)&prev_dc,
-                                       (struct rb_entry **)&next_dc,
-                                       &insert_p, &insert_parent, true);
-       if (!dc)
-               dc = next_dc;
-
-       blk_start_plug(&plug);
-
-       while (dc && dc->lstart <= end) {
-               struct rb_node *node;
-
-               if (dc->len < dpolicy->granularity)
-                       goto skip;
-
-               if (dc->state != D_PREP) {
-                       list_move_tail(&dc->list, &dcc->fstrim_list);
-                       goto skip;
-               }
-
-               __submit_discard_cmd(sbi, dpolicy, dc);
-
-               if (++issued >= dpolicy->max_requests) {
-                       start = dc->lstart + dc->len;
-
-                       blk_finish_plug(&plug);
-                       mutex_unlock(&dcc->cmd_lock);
-
-                       schedule();
-
-                       goto next;
-               }
-skip:
-               node = rb_next(&dc->rb_node);
-               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
-
-               if (fatal_signal_pending(current))
-                       break;
-       }
-
-       blk_finish_plug(&plug);
-       mutex_unlock(&dcc->cmd_lock);
-}
-
 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
                                        struct discard_policy *dpolicy)
 {
@@ -1332,7 +1270,18 @@ next:
 static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
                                                struct discard_policy *dpolicy)
 {
-       __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+       struct discard_policy dp;
+
+       if (dpolicy) {
+               __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+               return;
+       }
+
+       /* wait all */
+       init_discard_policy(&dp, DPOLICY_FSTRIM, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+       init_discard_policy(&dp, DPOLICY_UMOUNT, 1);
+       __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1380,8 +1329,9 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
        init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
        __issue_discard_cmd(sbi, &dpolicy);
        dropped = __drop_discard_cmd(sbi);
-       __wait_all_discard_cmd(sbi, &dpolicy);
 
+       /* just to make sure there is no pending discard commands */
+       __wait_all_discard_cmd(sbi, NULL);
        return dropped;
 }
 
@@ -2373,6 +2323,67 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        return has_candidate;
 }
 
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+                                       struct discard_policy *dpolicy,
+                                       unsigned int start, unsigned int end)
+{
+       struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+       struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+       struct rb_node **insert_p = NULL, *insert_parent = NULL;
+       struct discard_cmd *dc;
+       struct blk_plug plug;
+       int issued;
+
+next:
+       issued = 0;
+
+       mutex_lock(&dcc->cmd_lock);
+       f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+       dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+                                       NULL, start,
+                                       (struct rb_entry **)&prev_dc,
+                                       (struct rb_entry **)&next_dc,
+                                       &insert_p, &insert_parent, true);
+       if (!dc)
+               dc = next_dc;
+
+       blk_start_plug(&plug);
+
+       while (dc && dc->lstart <= end) {
+               struct rb_node *node;
+
+               if (dc->len < dpolicy->granularity)
+                       goto skip;
+
+               if (dc->state != D_PREP) {
+                       list_move_tail(&dc->list, &dcc->fstrim_list);
+                       goto skip;
+               }
+
+               __submit_discard_cmd(sbi, dpolicy, dc);
+
+               if (++issued >= dpolicy->max_requests) {
+                       start = dc->lstart + dc->len;
+
+                       blk_finish_plug(&plug);
+                       mutex_unlock(&dcc->cmd_lock);
+                       __wait_all_discard_cmd(sbi, NULL);
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto next;
+               }
+skip:
+               node = rb_next(&dc->rb_node);
+               dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+               if (fatal_signal_pending(current))
+                       break;
+       }
+
+       blk_finish_plug(&plug);
+       mutex_unlock(&dcc->cmd_lock);
+}
+
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 {
        __u64 start = F2FS_BYTES_TO_BLK(range->start);