GFS2: Limit number of transaction blocks requested for truncates
authorBob Peterson <rpeterso@redhat.com>
Fri, 16 Dec 2016 14:01:28 +0000 (08:01 -0600)
committerBob Peterson <rpeterso@redhat.com>
Thu, 5 Jan 2017 19:47:36 +0000 (14:47 -0500)
This patch limits the number of transaction blocks requested during
file truncates. If we have very large multi-terabyte files, and want
to delete or truncate them, they might span so many resource groups
that we overflow the journal blocks, and cause an assert failure.
By limiting the number of blocks in the transaction, we prevent this
overflow and give other running processes time to do transactions.

The limiting factor I chose is sd_log_thresh2 which is currently
set to 4/5ths of the journal. This same ratio is used in function
gfs2_ail_flush_reqd to determine when a log flush is required.
If we make the maximum value less than this, we can get into a
infinite hang whereby the log stops moving because the number of
used blocks is less than the threshold and the iterative loop
needs more, but since we're under the threshold, the log daemon
never starts any IO on the log.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
fs/gfs2/bmap.c

index 645721f..cbad0ef 100644 (file)
@@ -720,6 +720,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrp_list rlist;
+       struct gfs2_trans *tr;
        u64 bn, bstart;
        u32 blen, btotal;
        __be64 *p;
@@ -728,6 +729,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        unsigned int revokes = 0;
        int x;
        int error;
+       int jblocks_rqsted;
 
        error = gfs2_rindex_update(sdp);
        if (error)
@@ -791,12 +793,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */
                gfs2_rs_deltree(&ip->i_res);
 
-       error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
-                                RES_INDIRECT + RES_STATFS + RES_QUOTA,
-                                revokes);
+restart:
+       jblocks_rqsted = rg_blocks + RES_DINODE +
+               RES_INDIRECT + RES_STATFS + RES_QUOTA +
+               gfs2_struct2blk(sdp, revokes, sizeof(u64));
+       if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2))
+               jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2);
+       error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
        if (error)
                goto out_rg_gunlock;
 
+       tr = current->journal_info;
        down_write(&ip->i_rw_mutex);
 
        gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -810,6 +817,16 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
                if (!*p)
                        continue;
 
+               /* check for max reasonable journal transaction blocks */
+               if (tr->tr_num_buf_new + RES_STATFS +
+                   RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
+                       if (rg_blocks >= tr->tr_num_buf_new)
+                               rg_blocks -= tr->tr_num_buf_new;
+                       else
+                               rg_blocks = 0;
+                       break;
+               }
+
                bn = be64_to_cpu(*p);
 
                if (bstart + blen == bn)
@@ -827,6 +844,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
                *p = 0;
                gfs2_add_inode_blocks(&ip->i_inode, -1);
        }
+       if (p == bottom)
+               rg_blocks = 0;
+
        if (bstart) {
                __gfs2_free_blocks(ip, bstart, blen, metadata);
                btotal += blen;
@@ -844,6 +864,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 
        gfs2_trans_end(sdp);
 
+       if (rg_blocks)
+               goto restart;
+
 out_rg_gunlock:
        gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
 out_rlist: