btrfs: tree-checker: Verify dev item
authorQu Wenruo <wqu@suse.com>
Fri, 8 Mar 2019 06:20:03 +0000 (14:20 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 29 Apr 2019 17:02:32 +0000 (19:02 +0200)
[BUG]
For fuzzed image whose DEV_ITEM has invalid total_bytes as 0, then
kernel will just panic:
  BUG: unable to handle kernel NULL pointer dereference at 0000000000000098
  #PF error: [normal kernel read fault]
  PGD 800000022b2bd067 P4D 800000022b2bd067 PUD 22b2bc067 PMD 0
  Oops: 0000 [#1] SMP PTI
  CPU: 0 PID: 1106 Comm: mount Not tainted 5.0.0-rc8+ #9
  RIP: 0010:btrfs_verify_dev_extents+0x2a5/0x5a0
  Call Trace:
   open_ctree+0x160d/0x2149
   btrfs_mount_root+0x5b2/0x680

[CAUSE]
If device extent verification finds a deivce with 0 total_bytes, then it
assumes it's a seed dummy, then search for seed devices.

But in this case, there is no seed device at all, causing NULL pointer.

[FIX]
Since this is caused by fuzzed image, let's go the tree-check way, just
add a new verification for device item.

Reported-by: Yoon Jungyeon <jungyeon@gatech.edu>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=202691
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/tree-checker.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h

index 33d04fc..00ab4e7 100644 (file)
@@ -600,6 +600,77 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
+__printf(4, 5)
+__cold
+static void dev_item_err(const struct btrfs_fs_info *fs_info,
+                        const struct extent_buffer *eb, int slot,
+                        const char *fmt, ...)
+{
+       struct btrfs_key key;
+       struct va_format vaf;
+       va_list args;
+
+       btrfs_item_key_to_cpu(eb, &key, slot);
+       va_start(args, fmt);
+
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       btrfs_crit(fs_info,
+       "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
+               btrfs_header_level(eb) == 0 ? "leaf" : "node",
+               btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+               key.objectid, &vaf);
+       va_end(args);
+}
+
+static int check_dev_item(struct btrfs_fs_info *fs_info,
+                         struct extent_buffer *leaf,
+                         struct btrfs_key *key, int slot)
+{
+       struct btrfs_dev_item *ditem;
+       u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
+
+       if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
+               dev_item_err(fs_info, leaf, slot,
+                            "invalid objectid: has=%llu expect=%llu",
+                            key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
+               return -EUCLEAN;
+       }
+       if (key->offset > max_devid) {
+               dev_item_err(fs_info, leaf, slot,
+                            "invalid devid: has=%llu expect=[0, %llu]",
+                            key->offset, max_devid);
+               return -EUCLEAN;
+       }
+       ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
+       if (btrfs_device_id(leaf, ditem) != key->offset) {
+               dev_item_err(fs_info, leaf, slot,
+                            "devid mismatch: key has=%llu item has=%llu",
+                            key->offset, btrfs_device_id(leaf, ditem));
+               return -EUCLEAN;
+       }
+
+       /*
+        * For device total_bytes, we don't have reliable way to check it, as
+        * it can be 0 for device removal. Device size check can only be done
+        * by dev extents check.
+        */
+       if (btrfs_device_bytes_used(leaf, ditem) >
+           btrfs_device_total_bytes(leaf, ditem)) {
+               dev_item_err(fs_info, leaf, slot,
+                            "invalid bytes used: have %llu expect [0, %llu]",
+                            btrfs_device_bytes_used(leaf, ditem),
+                            btrfs_device_total_bytes(leaf, ditem));
+               return -EUCLEAN;
+       }
+       /*
+        * Remaining members like io_align/type/gen/dev_group aren't really
+        * utilized.  Skip them to make later usage of them easier.
+        */
+       return 0;
+}
+
 /*
  * Common point to switch the item-specific validation.
  */
@@ -630,6 +701,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info,
                ret = btrfs_check_chunk_valid(fs_info, leaf, chunk,
                                              key->offset);
                break;
+       case BTRFS_DEV_ITEM_KEY:
+               ret = check_dev_item(fs_info, leaf, key, slot);
+               break;
        }
        return ret;
 }
index 97dbd7f..77bca3a 100644 (file)
@@ -4958,15 +4958,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
        btrfs_set_fs_incompat(info, RAID56);
 }
 
-#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)       \
-                       - sizeof(struct btrfs_chunk))           \
-                       / sizeof(struct btrfs_stripe) + 1)
-
-#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE       \
-                               - 2 * sizeof(struct btrfs_disk_key)     \
-                               - 2 * sizeof(struct btrfs_chunk))       \
-                               / sizeof(struct btrfs_stripe) + 1)
-
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                               u64 start, u64 type)
 {
index 3ad9d58..38ed94b 100644 (file)
@@ -258,6 +258,15 @@ struct btrfs_fs_devices {
 
 #define BTRFS_BIO_INLINE_CSUM_SIZE     64
 
+#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)       \
+                       - sizeof(struct btrfs_chunk))           \
+                       / sizeof(struct btrfs_stripe) + 1)
+
+#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE       \
+                               - 2 * sizeof(struct btrfs_disk_key)     \
+                               - 2 * sizeof(struct btrfs_chunk))       \
+                               / sizeof(struct btrfs_stripe) + 1)
+
 /*
  * we need the mirror number and stripe index to be passed around
  * the call chain while we are processing end_io (especially errors).