mkfs_ext2: another update by Vladimir
authorDenys Vlasenko <vda.linux@googlemail.com>
Tue, 20 Oct 2009 20:12:11 +0000 (22:12 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Tue, 20 Oct 2009 20:12:11 +0000 (22:12 +0200)
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
util-linux/mkfs_ext2.c
util-linux/mkfs_ext2_test.sh

index 3f4f5c6..be9e363 100644 (file)
@@ -75,8 +75,8 @@ static unsigned int_log2(unsigned arg)
 }
 
 // taken from mkfs_minix.c. libbb candidate?
-// why "uint64_t size"? we never use it for anything >32 bits
-static uint32_t div_roundup(uint64_t size, uint32_t n)
+// "uint32_t size", since we never use it for anything >32 bits
+static uint32_t div_roundup(uint32_t size, uint32_t n)
 {
        // Overflow-resistant
        uint32_t res = size / n;
@@ -176,12 +176,15 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        unsigned blocksize, blocksize_log2;
        unsigned reserved_percent = 5;
        unsigned long long kilobytes;
-       uint32_t nblocks, nblocks_full, nreserved;
+       uint32_t nblocks, nblocks_full;
+       uint32_t nreserved;
        uint32_t ngroups;
        uint32_t bytes_per_inode;
        uint32_t first_block;
        uint32_t inodes_per_group;
-       uint32_t gdtsz, itsz;
+       uint32_t group_desc_blocks;
+       uint32_t inode_table_blocks;
+       uint32_t lost_and_found_blocks;
        time_t timestamp;
        unsigned opts;
        const char *label = "";
@@ -245,6 +248,8 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        ) {
                bb_error_msg_and_die("blocksize %u is bad", blocksize);
        }
+       if ((int32_t)bytes_per_inode < blocksize)
+               bb_error_msg_and_die("-%c is bad", 'i');
        // number of bits in one block, i.e. 8*blocksize
 #define blocks_per_group (8 * blocksize)
        first_block = (EXT2_MIN_BLOCK_SIZE == blocksize);
@@ -266,7 +271,6 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        // How many reserved blocks?
        if (reserved_percent > 50)
                bb_error_msg_and_die("-%c is bad", 'm');
-       //nreserved = div_roundup((uint64_t) nblocks * reserved_percent, 100);
        nreserved = (uint64_t)nblocks * reserved_percent / 100;
 
        // N.B. killing e2fsprogs feature! Unused blocks don't account in calculations
@@ -279,10 +283,8 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
  retry:
        // N.B. a block group can have no more than blocks_per_group blocks
        ngroups = div_roundup(nblocks - first_block, blocks_per_group);
-       if (0 == ngroups)
-               bb_error_msg_and_die("ngroups");
 
-       gdtsz = div_roundup(ngroups, blocksize / sizeof(*gd));
+       group_desc_blocks = div_roundup(ngroups, blocksize / sizeof(*gd));
        // TODO: reserved blocks must be marked as such in the bitmaps,
        // or resulting filesystem is corrupt
        if (ENABLE_FEATURE_MKFS_EXT2_RESERVED_GDT) {
@@ -294,15 +296,15 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
                 * We set it at 1024x the current filesystem size, or
                 * the upper block count limit (2^32), whichever is lower.
                 */
-               uint32_t rgdtsz = 0xFFFFFFFF; // maximum block number
-               if (nblocks < rgdtsz / 1024)
-                       rgdtsz = nblocks * 1024;
-               rgdtsz = div_roundup(rgdtsz - first_block, blocks_per_group);
-               rgdtsz = div_roundup(rgdtsz, blocksize / sizeof(*gd)) - gdtsz;
-               if (rgdtsz > blocksize / sizeof(uint32_t))
-                       rgdtsz = blocksize / sizeof(uint32_t);
-               //TODO: STORE_LE(sb->s_reserved_gdt_blocks, rgdtsz);
-               gdtsz += rgdtsz;
+               uint32_t reserved_group_desc_blocks = 0xFFFFFFFF; // maximum block number
+               if (nblocks < reserved_group_desc_blocks / 1024)
+                       reserved_group_desc_blocks = nblocks * 1024;
+               reserved_group_desc_blocks = div_roundup(reserved_group_desc_blocks - first_block, blocks_per_group);
+               reserved_group_desc_blocks = div_roundup(reserved_group_desc_blocks, blocksize / sizeof(*gd)) - group_desc_blocks;
+               if (reserved_group_desc_blocks > blocksize / sizeof(uint32_t))
+                       reserved_group_desc_blocks = blocksize / sizeof(uint32_t);
+               //TODO: STORE_LE(sb->s_reserved_gdt_blocks, reserved_group_desc_blocks);
+               group_desc_blocks += reserved_group_desc_blocks;
        }
 
        {
@@ -324,13 +326,25 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
                inodes_per_group = (div_roundup(inodes_per_group * sizeof(*inode), blocksize) * blocksize) / sizeof(*inode);
                // make sure the number of inodes per group is a multiple of 8
                inodes_per_group &= ~7;
-               itsz = div_roundup(inodes_per_group * sizeof(*inode), blocksize);
+               inode_table_blocks = div_roundup(inodes_per_group * sizeof(*inode), blocksize);
+
+               // to be useful, lost+found should occupy at least 2 blocks (but not exceeding 16*1024 bytes),
+               // and at most EXT2_NDIR_BLOCKS. So reserve these blocks right now
+               /* Or e2fsprogs comment verbatim (what does it mean?):
+                * Ensure that lost+found is at least 2 blocks, so we always
+                * test large empty blocks for big-block filesystems. */
+               lost_and_found_blocks = MIN(EXT2_NDIR_BLOCKS, 16 >> (blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE));
 
                // the last group needs more attention: isn't it too small for possible overhead?
-               overhead = (has_super(ngroups - 1) ? (1/*sb*/ + gdtsz) : 0) + 1/*bbmp*/ + 1/*ibmp*/ + itsz;
+               overhead = (has_super(ngroups - 1) ? (1/*sb*/ + group_desc_blocks) : 0) + 1/*bbmp*/ + 1/*ibmp*/ + inode_table_blocks;
                remainder = (nblocks - first_block) % blocks_per_group;
-               if ((1 == ngroups) && remainder && (remainder < overhead))
-                       bb_error_msg_and_die("way small device");
+               ////can't happen, nblocks >= 60 guarantees this
+               ////if ((1 == ngroups)
+               //// && remainder
+               //// && (remainder < overhead + 1/* "/" */ + lost_and_found_blocks)
+               ////) {
+               ////    bb_error_msg_and_die("way small device");
+               ////}
                // Standard mke2fs uses 50. Looks like a bug in our calculation
                // of "remainder" or "overhead" - we don't match standard mke2fs
                // when we transition from one group to two groups
@@ -363,7 +377,7 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
                , inodes_per_group * ngroups, nblocks
                , nreserved, reserved_percent
                , first_block
-               , gdtsz * (blocksize / sizeof(*gd)) * blocks_per_group
+               , group_desc_blocks * (blocksize / sizeof(*gd)) * blocks_per_group
                , ngroups
                , blocks_per_group, blocks_per_group
                , inodes_per_group
@@ -382,9 +396,11 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        }
        bb_putchar('\n');
 
-       // dry run? -> we are done
-       if (opts & OPT_n)
-               goto done;
+       if (opts & OPT_n) {
+               if (ENABLE_FEATURE_CLEAN_UP)
+                       close(fd);
+               return EXIT_SUCCESS;
+       }
 
        // TODO: 3/5 refuse if mounted
        // TODO: 4/5 compat options
@@ -395,7 +411,7 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        // TODO: 3/5 dir_index?
 
        // fill the superblock
-       sb = xzalloc(blocksize);
+       sb = xzalloc(1024);
        STORE_LE(sb->s_rev_level, 1); // revision 1 filesystem
        STORE_LE(sb->s_magic, EXT2_SUPER_MAGIC);
        STORE_LE(sb->s_inode_size, sizeof(*inode));
@@ -444,49 +460,61 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
         * don't check all the filesystems at the same time.  We use a
         * kludgy hack of using the UUID to derive a random jitter value.
         */
-       STORE_LE(sb->s_max_mnt_count, EXT2_DFL_MAX_MNT_COUNT
+       STORE_LE(sb->s_max_mnt_count,
+               EXT2_DFL_MAX_MNT_COUNT
                + (sb->s_uuid[ARRAY_SIZE(sb->s_uuid)-1] % EXT2_DFL_MAX_MNT_COUNT));
 
        // write the label
        safe_strncpy((char *)sb->s_volume_name, label, sizeof(sb->s_volume_name));
 
-       // fill group descriptors
-       gd = xzalloc(gdtsz * blocksize);
+       // calculate filesystem skeleton structures
+       gd = xzalloc(group_desc_blocks * blocksize);
        buf = xmalloc(blocksize);
        sb->s_free_blocks_count = 0;
        for (i = 0, pos = first_block, n = nblocks - first_block;
                i < ngroups;
                i++, pos += blocks_per_group, n -= blocks_per_group
        ) {
-               uint32_t overhead = pos + (has_super(i) ? (1/*sb*/ + gdtsz) : 0);
-               uint32_t fb;
+               uint32_t overhead = pos + (has_super(i) ? (1/*sb*/ + group_desc_blocks) : 0);
+               uint32_t free_blocks;
+               // fill group descriptors
                STORE_LE(gd[i].bg_block_bitmap, overhead + 0);
                STORE_LE(gd[i].bg_inode_bitmap, overhead + 1);
                STORE_LE(gd[i].bg_inode_table, overhead + 2);
-               overhead = overhead - pos + 1/*bbmp*/ + 1/*ibmp*/ + itsz;
+               overhead = overhead - pos + 1/*bbmp*/ + 1/*ibmp*/ + inode_table_blocks;
                gd[i].bg_free_inodes_count = inodes_per_group;
                //STORE_LE(gd[i].bg_used_dirs_count, 0);
-               // N.B. both root and lost+found dirs are within the first block group, thus +2
+               // N.B. both "/" and "/lost+found" are within the first block group
+               // "/" occupies 1 block, "/lost+found" occupies lost_and_found_blocks...
                if (0 == i) {
-                       overhead += 2;
+                       // ... thus increased overhead for the first block group ...
+                       overhead += 1 + lost_and_found_blocks;
+                       // ... and 2 used directories
                        STORE_LE(gd[i].bg_used_dirs_count, 2);
+                       // well known reserved inodes belong to the first block too
                        gd[i].bg_free_inodes_count -= EXT2_GOOD_OLD_FIRST_INO;
                }
 
+               // cache free block count of the group
+               free_blocks = (n < blocks_per_group ? n : blocks_per_group) - overhead;
+
                // mark preallocated blocks as allocated
-               fb = (n < blocks_per_group ? n : blocks_per_group) - overhead;
-//bb_info_msg("ALLOC: [%u][%u][%u]", blocksize, overhead, blocks_per_group - (fb + overhead));
+//bb_info_msg("ALLOC: [%u][%u][%u]", blocksize, overhead, blocks_per_group - (free_blocks + overhead));
                allocate(buf, blocksize,
+                       // reserve "overhead" blocks
                        overhead,
-                       blocks_per_group - (fb + overhead)
+                       // mark unused trailing blocks
+                       blocks_per_group - (free_blocks + overhead)
                );
                // dump block bitmap
                PUT((uint64_t)(FETCH_LE32(gd[i].bg_block_bitmap)) * blocksize, buf, blocksize);
-               STORE_LE(gd[i].bg_free_blocks_count, fb);
+               STORE_LE(gd[i].bg_free_blocks_count, free_blocks);
 
                // mark preallocated inodes as allocated
                allocate(buf, blocksize,
+                       // mark reserved inodes
                        inodes_per_group - gd[i].bg_free_inodes_count,
+                       // mark unused trailing inodes
                        blocks_per_group - inodes_per_group
                );
                // dump inode bitmap
@@ -496,7 +524,7 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
                STORE_LE(gd[i].bg_free_inodes_count, gd[i].bg_free_inodes_count);
 
                // count overall free blocks
-               sb->s_free_blocks_count += fb;
+               sb->s_free_blocks_count += free_blocks;
        }
        STORE_LE(sb->s_free_blocks_count, sb->s_free_blocks_count);
 
@@ -506,8 +534,10 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
                // dump superblock and group descriptors and their backups
                if (has_super(i)) {
                        // N.B. 1024 byte blocks are special
-                       PUT(((uint64_t)pos * blocksize) + ((0 == i && 0 == first_block) ? 1024 : 0), sb, 1024);//blocksize);
-                       PUT(((uint64_t)pos * blocksize) + blocksize, gd, gdtsz * blocksize);
+                       PUT(((uint64_t)pos * blocksize) + ((0 == i && 1024 != blocksize) ? 1024 : 0),
+                                       sb, 1024);
+                       PUT(((uint64_t)pos * blocksize) + blocksize,
+                                       gd, group_desc_blocks * blocksize);
                }
        }
 
@@ -516,8 +546,9 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        PUT(0, buf, 1024); // N.B. 1024 <= blocksize, so buf[0..1023] contains zeros
        // zero inode tables
        for (i = 0; i < ngroups; ++i)
-               for (n = 0; n < itsz; ++n)
-                       PUT((uint64_t)(FETCH_LE32(gd[i].bg_inode_table) + n) * blocksize, buf, blocksize);
+               for (n = 0; n < inode_table_blocks; ++n)
+                       PUT((uint64_t)(FETCH_LE32(gd[i].bg_inode_table) + n) * blocksize,
+                               buf, blocksize);
 
        // prepare directory inode
        inode = (struct ext2_inode *)buf;
@@ -526,21 +557,34 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        STORE_LE(inode->i_atime, timestamp);
        STORE_LE(inode->i_ctime, timestamp);
        STORE_LE(inode->i_size, blocksize);
-       // N.B. inode->i_blocks stores the number of 512 byte data blocks. Why on Earth?!
+       // inode->i_blocks stores the number of 512 byte data blocks
+       // (512, because it goes directly to struct stat without scaling)
        STORE_LE(inode->i_blocks, blocksize / 512);
 
        // dump root dir inode
        STORE_LE(inode->i_links_count, 3); // "/.", "/..", "/lost+found/.." point to this inode
-       STORE_LE(inode->i_block[0], FETCH_LE32(gd[0].bg_inode_table) + itsz);
-       PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_ROOT_INO-1) * sizeof(*inode), buf, sizeof(*inode));
+       STORE_LE(inode->i_block[0], FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks);
+       PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_ROOT_INO-1) * sizeof(*inode),
+                               buf, sizeof(*inode));
 
        // dump lost+found dir inode
        STORE_LE(inode->i_links_count, 2); // both "/lost+found" and "/lost+found/." point to this inode
-       STORE_LE(inode->i_block[0], inode->i_block[0]+1); // use next block
-       PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_GOOD_OLD_FIRST_INO-1) * sizeof(*inode), buf, sizeof(*inode));
+       STORE_LE(inode->i_size, lost_and_found_blocks * blocksize);
+       STORE_LE(inode->i_blocks, (lost_and_found_blocks * blocksize) / 512);
+       n = FETCH_LE32(inode->i_block[0]) + 1;
+       for (i = 0; i < lost_and_found_blocks; ++i)
+               STORE_LE(inode->i_block[i], i + n); // use next block
+//bb_info_msg("LAST BLOCK USED[%u]", i + n);
+       PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_GOOD_OLD_FIRST_INO-1) * sizeof(*inode),
+                               buf, sizeof(*inode));
+
+       // zero the blocks for "/lost+found"
+       memset(buf, 0, blocksize);
+       for (i = 1; i < lost_and_found_blocks; ++i)
+               PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 1+i) * blocksize,
+                               buf, blocksize);
 
        // dump directories
-       memset(buf, 0, blocksize);
        dir = (struct ext2_dir *)buf;
 
        // dump lost+found dir block
@@ -554,7 +598,7 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        STORE_LE(dir->name_len2, 2);
        STORE_LE(dir->file_type2, EXT2_FT_DIR);
        dir->name2[0] = '.'; dir->name2[1] = '.';
-       PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + itsz + 1) * blocksize, buf, blocksize);
+       PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 1) * blocksize, buf, blocksize);
 
        // dump root dir block
        STORE_LE(dir->inode1, EXT2_ROOT_INO);
@@ -564,9 +608,8 @@ int mkfs_ext2_main(int argc UNUSED_PARAM, char **argv)
        STORE_LE(dir->name_len3, 10);
        STORE_LE(dir->file_type3, EXT2_FT_DIR);
        strcpy(dir->name3, "lost+found");
-       PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + itsz + 0) * blocksize, buf, blocksize);
+       PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 0) * blocksize, buf, blocksize);
 
- done:
        // cleanup
        if (ENABLE_FEATURE_CLEAN_UP) {
                free(buf);
index a636de5..0aa9818 100755 (executable)
@@ -7,9 +7,6 @@ gen_image() { # params: mke2fs_invocation image_name
     >$2
     dd seek=$((kilobytes-1)) bs=1K count=1 </dev/zero of=$2 >/dev/null 2>&1 || exit 1
     $1 -F $2 $kilobytes >$2.raw_out 2>&1 || return 1
-
-#off    | sed 's/inodes, [0-9]* blocks/inodes, N blocks/' \
-
     cat $2.raw_out \
     | grep -v '^mke2fs [0-9]*\.[0-9]*\.[0-9]* ' \
     | grep -v '^Maximum filesystem' \
@@ -55,54 +52,51 @@ done
 
 # Transition from one block group to two
 # fails in [8378..8410] range
-kilobytes=$((8*1024 - 20))
+kilobytes=$((1 * 8*1024 - 50))
 while true; do
     test_mke2fs #|| exit 1
     : $((kilobytes++))
-    test $kilobytes = $((8*1024 + 300)) && break
+    test $kilobytes = $((1 * 8*1024 + 300)) && break
 done
 
-# Transition from two block groups to three
+# Transition from 2 block groups to 3
 # works
-kilobytes=$((16*1024 - 40))
+kilobytes=$((2 * 8*1024 - 50))
 while true; do
     test_mke2fs || exit 1
     : $((kilobytes++))
-    test $kilobytes = $((16*1024 + 500)) && break
+    test $kilobytes = $((2 * 8*1024 + 400)) && break
 done
-exit
-
-# Specific sizes with known differences:
-
-# -6240 inodes, 24908 blocks
-# +6240 inodes, 24577 blocks
-# -4 block group
-# +3 block group
-# -1560 inodes per group
-# +2080 inodes per group
-kilobytes=24908 test_mke2fs
 
-# -304 inodes, N blocks
-# +152 inodes, N blocks
-# -304 inodes per group
-# +152 inodes per group
-kilobytes=1218 test_mke2fs
-
-# -14464 inodes, N blocks
-# +14448 inodes, N blocks
-# -8 block group
-# +7 block group
-# -1808 inodes per group
-# +2064 inodes per group
-kilobytes=57696 test_mke2fs
+# Transition from 3 block groups to 4
+# fails in [24825..24922] range
+kilobytes=$((3 * 8*1024 - 50))
+while true; do
+    test_mke2fs #|| exit 1
+    : $((kilobytes++))
+    test $kilobytes = $((3 * 8*1024 + 500)) && break
+done
 
-# -warning: 239 blocks unused.
-# +warning: 242 blocks unused.
-kilobytes=49395 test_mke2fs
+# Transition from 4 block groups to 5
+# works
+kilobytes=$((4 * 8*1024 - 50))
+while true; do
+    test_mke2fs || exit 1
+    : $((kilobytes++))
+    test $kilobytes = $((4 * 8*1024 + 600)) && break
+done
 
-## This size results in "warning: 75 blocks unused"
-#kilobytes=98380 test_mke2fs
+# Transition from 5 block groups to 6
+# fails in [41230..41391] range
+kilobytes=$((5 * 8*1024 - 50))
+while true; do
+    test_mke2fs #|| exit 1
+    : $((kilobytes++))
+    test $kilobytes = $((5 * 8*1024 + 700)) && break
+done
+exit
 
+# Random sizes
 while true; do
     kilobytes=$(( (RANDOM*RANDOM) % 1000000 + 60))
     test_mke2fs || exit 1