Merge branch '2020-09-14-btrfs-rewrite' into next
authorTom Rini <trini@konsulko.com>
Tue, 8 Sep 2020 01:00:47 +0000 (21:00 -0400)
committerTom Rini <trini@konsulko.com>
Tue, 8 Sep 2020 01:00:47 +0000 (21:00 -0400)
- Bring in the update to btrfs support that rewrites it based on
  btrfs-progs.

30 files changed:
MAINTAINERS
fs/btrfs/Makefile
fs/btrfs/btrfs.c
fs/btrfs/btrfs.h
fs/btrfs/btrfs_tree.h [deleted file]
fs/btrfs/chunk-map.c [deleted file]
fs/btrfs/common/rbtree-utils.c [new file with mode: 0644]
fs/btrfs/common/rbtree-utils.h [new file with mode: 0644]
fs/btrfs/compat.h [new file with mode: 0644]
fs/btrfs/compression.c
fs/btrfs/crypto/hash.c [new file with mode: 0644]
fs/btrfs/crypto/hash.h [new file with mode: 0644]
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c [new file with mode: 0644]
fs/btrfs/disk-io.h [new file with mode: 0644]
fs/btrfs/extent-cache.c [new file with mode: 0644]
fs/btrfs/extent-cache.h [new file with mode: 0644]
fs/btrfs/extent-io.c
fs/btrfs/extent-io.h [new file with mode: 0644]
fs/btrfs/hash.c [deleted file]
fs/btrfs/inode.c
fs/btrfs/kernel-shared/btrfs_tree.h [new file with mode: 0644]
fs/btrfs/root-tree.c [new file with mode: 0644]
fs/btrfs/root.c [deleted file]
fs/btrfs/subvolume.c
fs/btrfs/super.c [deleted file]
fs/btrfs/volumes.c [new file with mode: 0644]
fs/btrfs/volumes.h [new file with mode: 0644]

index 101f4e1..332112f 100644 (file)
@@ -599,6 +599,8 @@ F:  tools/binman/
 
 BTRFS
 M:     Marek Behun <marek.behun@nic.cz>
+R:     Qu Wenruo <wqu@suse.com>
+L:     linux-btrfs@vger.kernel.org
 S:     Maintained
 F:     cmd/btrfs.c
 F:     fs/btrfs/
index 9b31592..fc074c8 100644 (file)
@@ -2,5 +2,6 @@
 #
 # 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
 
-obj-y := btrfs.o chunk-map.o compression.o ctree.o dev.o dir-item.o \
-       extent-io.o hash.o inode.o root.o subvolume.o super.o
+obj-y := btrfs.o compression.o ctree.o dev.o dir-item.o \
+       extent-io.o inode.o subvolume.o crypto/hash.o disk-io.o \
+       common/rbtree-utils.o extent-cache.o volumes.o root-tree.o
index de16217..cbf9dcf 100644 (file)
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
 #include <config.h>
 #include <malloc.h>
 #include <uuid.h>
 #include <linux/time.h>
+#include "btrfs.h"
+#include "crypto/hash.h"
+#include "disk-io.h"
 
-struct btrfs_info btrfs_info;
+struct btrfs_fs_info *current_fs_info;
 
-static int readdir_callback(const struct btrfs_root *root,
-                           struct btrfs_dir_item *item)
+static int show_dir(struct btrfs_root *root, struct extent_buffer *eb,
+                   struct btrfs_dir_item *di)
 {
-       static const char typestr[BTRFS_FT_MAX][4] = {
-               [BTRFS_FT_UNKNOWN]  = " ? ",
-               [BTRFS_FT_REG_FILE] = "   ",
-               [BTRFS_FT_DIR]      = "DIR",
-               [BTRFS_FT_CHRDEV]   = "CHR",
-               [BTRFS_FT_BLKDEV]   = "BLK",
-               [BTRFS_FT_FIFO]     = "FIF",
-               [BTRFS_FT_SOCK]     = "SCK",
-               [BTRFS_FT_SYMLINK]  = "SYM",
-               [BTRFS_FT_XATTR]    = " ? ",
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_inode_item ii;
+       struct btrfs_key key;
+       static const char* dir_item_str[] = {
+               [BTRFS_FT_REG_FILE]     = "FILE",
+               [BTRFS_FT_DIR]          = "DIR",
+               [BTRFS_FT_CHRDEV]       = "CHRDEV",
+               [BTRFS_FT_BLKDEV]       = "BLKDEV",
+               [BTRFS_FT_FIFO]         = "FIFO",
+               [BTRFS_FT_SOCK]         = "SOCK",
+               [BTRFS_FT_SYMLINK]      = "SYMLINK",
+               [BTRFS_FT_XATTR]        = "XATTR"
        };
-       struct btrfs_inode_item inode;
-       const char *name = (const char *) (item + 1);
-       char filetime[32], *target = NULL;
+       u8 type = btrfs_dir_type(eb, di);
+       char namebuf[BTRFS_NAME_LEN];
+       char *target = NULL;
+       char filetime[32];
        time_t mtime;
+       int ret;
 
-       if (btrfs_lookup_inode(root, &item->location, &inode, NULL)) {
-               printf("%s: Cannot find inode item for directory entry %.*s!\n",
-                      __func__, item->name_len, name);
-               return 0;
-       }
-
-       mtime = inode.mtime.sec;
-       ctime_r(&mtime, filetime);
+       btrfs_dir_item_key_to_cpu(eb, di, &key);
 
-       if (item->type == BTRFS_FT_SYMLINK) {
-               target = malloc(min(inode.size + 1,
-                                   (u64) btrfs_info.sb.sectorsize));
+       if (key.type == BTRFS_ROOT_ITEM_KEY) {
+               struct btrfs_root *subvol;
 
-               if (target && btrfs_readlink(root, item->location.objectid,
-                                            target)) {
-                       free(target);
-                       target = NULL;
+               /* It's a subvolume, get its mtime from root item */
+               subvol = btrfs_read_fs_root(fs_info, &key);
+               if (IS_ERR(subvol)) {
+                       ret = PTR_ERR(subvol);
+                       error("Can't find root %llu", key.objectid);
+                       return ret;
                }
+               mtime = btrfs_stack_timespec_sec(&subvol->root_item.otime);
+       } else {
+               struct btrfs_path path;
+
+               /* It's regular inode, get its mtime from inode item */
+               btrfs_init_path(&path);
+               ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+               if (ret > 0)
+                       ret = -ENOENT;
+               if (ret < 0) {
+                       error("Can't find inode %llu", key.objectid);
+                       btrfs_release_path(&path);
+                       return ret;
+               }
+               read_extent_buffer(path.nodes[0], &ii,
+                       btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
+                       sizeof(ii));
+               btrfs_release_path(&path);
+               mtime = btrfs_stack_timespec_sec(&ii.mtime);
+       }
+       ctime_r(&mtime, filetime);
 
-               if (!target)
-                       printf("%s: Cannot read symlink target!\n", __func__);
+       if (type == BTRFS_FT_SYMLINK) {
+               target = malloc(fs_info->sectorsize);
+               if (!target) {
+                       error("Can't alloc memory for symlink %llu",
+                               key.objectid);
+                       return -ENOMEM;
+               }
+               ret = btrfs_readlink(root, key.objectid, target);
+               if (ret < 0) {
+                       error("Failed to read symlink %llu", key.objectid);
+                       goto out;
+               }
+               target[ret] = '\0';
        }
 
-       printf("<%s> ", typestr[item->type]);
-       if (item->type == BTRFS_FT_CHRDEV || item->type == BTRFS_FT_BLKDEV)
-               printf("%4u,%5u  ", (unsigned int) (inode.rdev >> 20),
-                       (unsigned int) (inode.rdev & 0xfffff));
+       if (type < ARRAY_SIZE(dir_item_str) && dir_item_str[type])
+               printf("<%s> ", dir_item_str[type]);
        else
-               printf("%10llu  ", inode.size);
-
-       printf("%24.24s  %.*s", filetime, item->name_len, name);
-
-       if (item->type == BTRFS_FT_SYMLINK) {
-               printf(" -> %s", target ? target : "?");
-               if (target)
-                       free(target);
+               printf("DIR_ITEM.%u", type);
+       if (type == BTRFS_FT_CHRDEV || type == BTRFS_FT_BLKDEV) {
+               ASSERT(key.type == BTRFS_INODE_ITEM_KEY);
+               printf("%4llu,%5llu  ", btrfs_stack_inode_rdev(&ii) >> 20,
+                               btrfs_stack_inode_rdev(&ii) & 0xfffff);
+       } else {
+               if (key.type == BTRFS_INODE_ITEM_KEY)
+                       printf("%10llu  ", btrfs_stack_inode_size(&ii));
+               else
+                       printf("%10llu  ", 0ULL);
        }
 
+       read_extent_buffer(eb, namebuf, (unsigned long)(di + 1),
+                          btrfs_dir_name_len(eb, di));
+       printf("%24.24s  %.*s", filetime, btrfs_dir_name_len(eb, di), namebuf);
+       if (type == BTRFS_FT_SYMLINK)
+               printf(" -> %s", target ? target : "?");
        printf("\n");
-
-       return 0;
+out:
+       free(target);
+       return ret;
 }
 
 int btrfs_probe(struct blk_desc *fs_dev_desc,
                struct disk_partition *fs_partition)
 {
-       btrfs_blk_desc = fs_dev_desc;
-       btrfs_part_info = fs_partition;
-
-       memset(&btrfs_info, 0, sizeof(btrfs_info));
+       struct btrfs_fs_info *fs_info;
+       int ret = -1;
 
        btrfs_hash_init();
-       if (btrfs_read_superblock())
-               return -1;
-
-       if (btrfs_chunk_map_init()) {
-               printf("%s: failed to init chunk map\n", __func__);
-               return -1;
+       fs_info = open_ctree_fs_info(fs_dev_desc, fs_partition);
+       if (fs_info) {
+               current_fs_info = fs_info;
+               ret = 0;
        }
-
-       btrfs_info.tree_root.objectid = 0;
-       btrfs_info.tree_root.bytenr = btrfs_info.sb.root;
-       btrfs_info.chunk_root.objectid = 0;
-       btrfs_info.chunk_root.bytenr = btrfs_info.sb.chunk_root;
-
-       if (btrfs_read_chunk_tree()) {
-               printf("%s: failed to read chunk tree\n", __func__);
-               return -1;
-       }
-
-       if (btrfs_find_root(btrfs_get_default_subvol_objectid(),
-                           &btrfs_info.fs_root, NULL)) {
-               printf("%s: failed to find default subvolume\n", __func__);
-               return -1;
-       }
-
-       return 0;
+       return ret;
 }
 
 int btrfs_ls(const char *path)
 {
-       struct btrfs_root root = btrfs_info.fs_root;
-       u64 inr;
+       struct btrfs_fs_info *fs_info = current_fs_info;
+       struct btrfs_root *root = fs_info->fs_root;
+       u64 ino = BTRFS_FIRST_FREE_OBJECTID;
        u8 type;
+       int ret;
 
-       inr = btrfs_lookup_path(&root, root.root_dirid, path, &type, NULL, 40);
-
-       if (inr == -1ULL) {
+       ASSERT(fs_info);
+       ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+                               path, &root, &ino, &type, 40);
+       if (ret < 0) {
                printf("Cannot lookup path %s\n", path);
-               return -1;
+               return ret;
        }
 
        if (type != BTRFS_FT_DIR) {
-               printf("Not a directory: %s\n", path);
-               return -1;
+               error("Not a directory: %s", path);
+               return -ENOENT;
        }
-
-       if (btrfs_readdir(&root, inr, readdir_callback)) {
-               printf("An error occured while listing directory %s\n", path);
-               return -1;
+       ret = btrfs_iter_dir(root, ino, show_dir);
+       if (ret < 0) {
+               error("An error occured while listing directory %s", path);
+               return ret;
        }
-
        return 0;
 }
 
 int btrfs_exists(const char *file)
 {
-       struct btrfs_root root = btrfs_info.fs_root;
-       u64 inr;
+       struct btrfs_fs_info *fs_info = current_fs_info;
+       struct btrfs_root *root;
+       u64 ino;
        u8 type;
+       int ret;
 
-       inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, NULL, 40);
+       ASSERT(fs_info);
 
-       return (inr != -1ULL && type == BTRFS_FT_REG_FILE);
+       ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+                               file, &root, &ino, &type, 40);
+       if (ret < 0)
+               return 0;
+
+       if (type == BTRFS_FT_REG_FILE)
+               return 1;
+       return 0;
 }
 
 int btrfs_size(const char *file, loff_t *size)
 {
-       struct btrfs_root root = btrfs_info.fs_root;
-       struct btrfs_inode_item inode;
-       u64 inr;
+       struct btrfs_fs_info *fs_info = current_fs_info;
+       struct btrfs_inode_item *ii;
+       struct btrfs_root *root;
+       struct btrfs_path path;
+       struct btrfs_key key;
+       u64 ino;
        u8 type;
+       int ret;
 
-       inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode,
-                               40);
-
-       if (inr == -1ULL) {
+       ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+                               file, &root, &ino, &type, 40);
+       if (ret < 0) {
                printf("Cannot lookup file %s\n", file);
-               return -1;
+               return ret;
        }
-
        if (type != BTRFS_FT_REG_FILE) {
                printf("Not a regular file: %s\n", file);
-               return -1;
+               return -ENOENT;
        }
-
-       *size = inode.size;
-       return 0;
+       btrfs_init_path(&path);
+       key.objectid = ino;
+       key.type = BTRFS_INODE_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+       if (ret < 0) {
+               printf("Cannot lookup ino %llu\n", ino);
+               return ret;
+       }
+       if (ret > 0) {
+               printf("Ino %llu does not exist\n", ino);
+               ret = -ENOENT;
+               goto out;
+       }
+       ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                           struct btrfs_inode_item);
+       *size = btrfs_inode_size(path.nodes[0], ii);
+out:
+       btrfs_release_path(&path);
+       return ret;
 }
 
 int btrfs_read(const char *file, void *buf, loff_t offset, loff_t len,
               loff_t *actread)
 {
-       struct btrfs_root root = btrfs_info.fs_root;
-       struct btrfs_inode_item inode;
-       u64 inr, rd;
+       struct btrfs_fs_info *fs_info = current_fs_info;
+       struct btrfs_root *root;
+       loff_t real_size = 0;
+       u64 ino;
        u8 type;
-
-       inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode,
-                               40);
-
-       if (inr == -1ULL) {
-               printf("Cannot lookup file %s\n", file);
-               return -1;
+       int ret;
+
+       ASSERT(fs_info);
+       ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+                               file, &root, &ino, &type, 40);
+       if (ret < 0) {
+               error("Cannot lookup file %s", file);
+               return ret;
        }
 
        if (type != BTRFS_FT_REG_FILE) {
-               printf("Not a regular file: %s\n", file);
-               return -1;
+               error("Not a regular file: %s", file);
+               return -EINVAL;
        }
 
-       if (!len)
-               len = inode.size;
+       if (!len) {
+               ret = btrfs_size(file, &real_size);
+               if (ret < 0) {
+                       error("Failed to get inode size: %s", file);
+                       return ret;
+               }
+               len = real_size;
+       }
 
-       if (len > inode.size - offset)
-               len = inode.size - offset;
+       if (len > real_size - offset)
+               len = real_size - offset;
 
-       rd = btrfs_file_read(&root, inr, offset, len, buf);
-       if (rd == -1ULL) {
-               printf("An error occured while reading file %s\n", file);
-               return -1;
+       ret = btrfs_file_read(root, ino, offset, len, buf);
+       if (ret < 0) {
+               error("An error occured while reading file %s", file);
+               return ret;
        }
 
-       *actread = rd;
+       *actread = len;
        return 0;
 }
 
 void btrfs_close(void)
 {
-       btrfs_chunk_map_exit();
+       if (current_fs_info) {
+               close_ctree_fs_info(current_fs_info);
+               current_fs_info = NULL;
+       }
 }
 
 int btrfs_uuid(char *uuid_str)
 {
 #ifdef CONFIG_LIB_UUID
-       uuid_bin_to_str(btrfs_info.sb.fsid, uuid_str, UUID_STR_FORMAT_STD);
+       if (current_fs_info)
+               uuid_bin_to_str(current_fs_info->super_copy->fsid, uuid_str,
+                               UUID_STR_FORMAT_STD);
        return 0;
 #endif
        return -ENOSYS;
index 25a8cf6..7d8b395 100644 (file)
 #include <linux/rbtree.h>
 #include "conv-funcs.h"
 
-struct btrfs_info {
-       struct btrfs_super_block sb;
-
-       struct btrfs_root tree_root;
-       struct btrfs_root fs_root;
-       struct btrfs_root chunk_root;
-
-       struct rb_root chunks_root;
-};
-
 extern struct btrfs_info btrfs_info;
-
-/* hash.c */
-void btrfs_hash_init(void);
-u32 btrfs_crc32c(u32, const void *, size_t);
-u32 btrfs_csum_data(char *, u32, size_t);
-void btrfs_csum_final(u32, void *);
-
-static inline u64 btrfs_name_hash(const char *name, int len)
-{
-       return btrfs_crc32c((u32) ~1, name, len);
-}
-
-/* dev.c */
-extern struct blk_desc *btrfs_blk_desc;
-extern struct disk_partition *btrfs_part_info;
-
-int btrfs_devread(u64, int, void *);
-
-/* chunk-map.c */
-u64 btrfs_map_logical_to_physical(u64);
-int btrfs_chunk_map_init(void);
-void btrfs_chunk_map_exit(void);
-int btrfs_read_chunk_tree(void);
+extern struct btrfs_fs_info *current_fs_info;
 
 /* compression.c */
 u32 btrfs_decompress(u8 type, const char *, u32, char *, u32);
 
-/* super.c */
-int btrfs_read_superblock(void);
-
-/* dir-item.c */
-typedef int (*btrfs_readdir_callback_t)(const struct btrfs_root *,
-                                       struct btrfs_dir_item *);
-
-int btrfs_lookup_dir_item(const struct btrfs_root *, u64, const char *, int,
-                          struct btrfs_dir_item *);
-int btrfs_readdir(const struct btrfs_root *, u64, btrfs_readdir_callback_t);
-
-/* root.c */
-int btrfs_find_root(u64, struct btrfs_root *, struct btrfs_root_item *);
-u64 btrfs_lookup_root_ref(u64, struct btrfs_root_ref *, char *);
-
 /* inode.c */
-u64 btrfs_lookup_inode_ref(struct btrfs_root *, u64, struct btrfs_inode_ref *,
-                           char *);
-int btrfs_lookup_inode(const struct btrfs_root *, struct btrfs_key *,
-                       struct btrfs_inode_item *, struct btrfs_root *);
-int btrfs_readlink(const struct btrfs_root *, u64, char *);
-u64 btrfs_lookup_path(struct btrfs_root *, u64, const char *, u8 *,
-                      struct btrfs_inode_item *, int);
-u64 btrfs_file_read(const struct btrfs_root *, u64, u64, u64, char *);
+int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target);
+int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
+                   char *dest);
 
 /* subvolume.c */
 u64 btrfs_get_default_subvol_objectid(void);
 
-/* extent-io.c */
-u64 btrfs_read_extent_inline(struct btrfs_path *,
-                             struct btrfs_file_extent_item *, u64, u64,
-                             char *);
-u64 btrfs_read_extent_reg(struct btrfs_path *, struct btrfs_file_extent_item *,
-                          u64, u64, char *);
-
 #endif /* !__BTRFS_BTRFS_H__ */
diff --git a/fs/btrfs/btrfs_tree.h b/fs/btrfs/btrfs_tree.h
deleted file mode 100644 (file)
index aa0f3d6..0000000
+++ /dev/null
@@ -1,766 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * From linux/include/uapi/linux/btrfs_tree.h
- */
-
-#ifndef __BTRFS_BTRFS_TREE_H__
-#define __BTRFS_BTRFS_TREE_H__
-
-#include <common.h>
-
-#define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_NAME_MAX 255
-#define BTRFS_LABEL_SIZE 256
-#define BTRFS_FSID_SIZE 16
-#define BTRFS_UUID_SIZE 16
-
-/*
- * This header contains the structure definitions and constants used
- * by file system objects that can be retrieved using
- * the BTRFS_IOC_SEARCH_TREE ioctl.  That means basically anything that
- * is needed to describe a leaf node's key or item contents.
- */
-
-/* holds pointers to all of the tree roots */
-#define BTRFS_ROOT_TREE_OBJECTID 1ULL
-
-/* stores information about which extents are in use, and reference counts */
-#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
-
-/*
- * chunk tree stores translations from logical -> physical block numbering
- * the super block points to the chunk tree
- */
-#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
-
-/*
- * stores information about which areas of a given device are in use.
- * one per device.  The tree of tree roots points to the device tree
- */
-#define BTRFS_DEV_TREE_OBJECTID 4ULL
-
-/* one per subvolume, storing files and directories */
-#define BTRFS_FS_TREE_OBJECTID 5ULL
-
-/* directory objectid inside the root tree */
-#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
-
-/* holds checksums of all the data extents */
-#define BTRFS_CSUM_TREE_OBJECTID 7ULL
-
-/* holds quota configuration and tracking */
-#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
-
-/* for storing items that use the BTRFS_UUID_KEY* types */
-#define BTRFS_UUID_TREE_OBJECTID 9ULL
-
-/* tracks free space in block groups. */
-#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
-
-/* device stats in the device tree */
-#define BTRFS_DEV_STATS_OBJECTID 0ULL
-
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
-
-/* orhpan objectid for tracking unlinked/truncated files */
-#define BTRFS_ORPHAN_OBJECTID -5ULL
-
-/* does write ahead logging to speed up fsyncs */
-#define BTRFS_TREE_LOG_OBJECTID -6ULL
-#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
-
-/* for space balancing */
-#define BTRFS_TREE_RELOC_OBJECTID -8ULL
-#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
-
-/*
- * extent checksums all have this objectid
- * this allows them to share the logging tree
- * for fsyncs
- */
-#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
-
-/* For storing free space cache */
-#define BTRFS_FREE_SPACE_OBJECTID -11ULL
-
-/*
- * The inode number assigned to the special inode for storing
- * free ino cache
- */
-#define BTRFS_FREE_INO_OBJECTID -12ULL
-
-/* dummy objectid represents multiple objectids */
-#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
-
-/*
- * All files have objectids in this range.
- */
-#define BTRFS_FIRST_FREE_OBJECTID 256ULL
-#define BTRFS_LAST_FREE_OBJECTID -256ULL
-#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
-
-
-/*
- * the device items go into the chunk tree.  The key is in the form
- * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
- */
-#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
-
-#define BTRFS_BTREE_INODE_OBJECTID 1
-
-#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
-
-#define BTRFS_DEV_REPLACE_DEVID 0ULL
-
-/*
- * inode items have the data typically returned from stat and store other
- * info about object characteristics.  There is one for every file and dir in
- * the FS
- */
-#define BTRFS_INODE_ITEM_KEY           1
-#define BTRFS_INODE_REF_KEY            12
-#define BTRFS_INODE_EXTREF_KEY         13
-#define BTRFS_XATTR_ITEM_KEY           24
-#define BTRFS_ORPHAN_ITEM_KEY          48
-/* reserve 2-15 close to the inode for later flexibility */
-
-/*
- * dir items are the name -> inode pointers in a directory.  There is one
- * for every name in a directory.
- */
-#define BTRFS_DIR_LOG_ITEM_KEY  60
-#define BTRFS_DIR_LOG_INDEX_KEY 72
-#define BTRFS_DIR_ITEM_KEY     84
-#define BTRFS_DIR_INDEX_KEY    96
-/*
- * extent data is for file data
- */
-#define BTRFS_EXTENT_DATA_KEY  108
-
-/*
- * extent csums are stored in a separate tree and hold csums for
- * an entire extent on disk.
- */
-#define BTRFS_EXTENT_CSUM_KEY  128
-
-/*
- * root items point to tree roots.  They are typically in the root
- * tree used by the super block to find all the other trees
- */
-#define BTRFS_ROOT_ITEM_KEY    132
-
-/*
- * root backrefs tie subvols and snapshots to the directory entries that
- * reference them
- */
-#define BTRFS_ROOT_BACKREF_KEY 144
-
-/*
- * root refs make a fast index for listing all of the snapshots and
- * subvolumes referenced by a given root.  They point directly to the
- * directory item in the root that references the subvol
- */
-#define BTRFS_ROOT_REF_KEY     156
-
-/*
- * extent items are in the extent map tree.  These record which blocks
- * are used, and how many references there are to each block
- */
-#define BTRFS_EXTENT_ITEM_KEY  168
-
-/*
- * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
- * the length, so we save the level in key->offset instead of the length.
- */
-#define BTRFS_METADATA_ITEM_KEY        169
-
-#define BTRFS_TREE_BLOCK_REF_KEY       176
-
-#define BTRFS_EXTENT_DATA_REF_KEY      178
-
-#define BTRFS_EXTENT_REF_V0_KEY                180
-
-#define BTRFS_SHARED_BLOCK_REF_KEY     182
-
-#define BTRFS_SHARED_DATA_REF_KEY      184
-
-/*
- * block groups give us hints into the extent allocation trees.  Which
- * blocks are free etc etc
- */
-#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
-
-/*
- * Every block group is represented in the free space tree by a free space info
- * item, which stores some accounting information. It is keyed on
- * (block_group_start, FREE_SPACE_INFO, block_group_length).
- */
-#define BTRFS_FREE_SPACE_INFO_KEY 198
-
-/*
- * A free space extent tracks an extent of space that is free in a block group.
- * It is keyed on (start, FREE_SPACE_EXTENT, length).
- */
-#define BTRFS_FREE_SPACE_EXTENT_KEY 199
-
-/*
- * When a block group becomes very fragmented, we convert it to use bitmaps
- * instead of extents. A free space bitmap is keyed on
- * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
- * (length / sectorsize) bits.
- */
-#define BTRFS_FREE_SPACE_BITMAP_KEY 200
-
-#define BTRFS_DEV_EXTENT_KEY   204
-#define BTRFS_DEV_ITEM_KEY     216
-#define BTRFS_CHUNK_ITEM_KEY   228
-
-/*
- * Records the overall state of the qgroups.
- * There's only one instance of this key present,
- * (0, BTRFS_QGROUP_STATUS_KEY, 0)
- */
-#define BTRFS_QGROUP_STATUS_KEY         240
-/*
- * Records the currently used space of the qgroup.
- * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
- */
-#define BTRFS_QGROUP_INFO_KEY           242
-/*
- * Contains the user configured limits for the qgroup.
- * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
- */
-#define BTRFS_QGROUP_LIMIT_KEY          244
-/*
- * Records the child-parent relationship of qgroups. For
- * each relation, 2 keys are present:
- * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
- * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
- */
-#define BTRFS_QGROUP_RELATION_KEY       246
-
-/*
- * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
- */
-#define BTRFS_BALANCE_ITEM_KEY 248
-
-/*
- * The key type for tree items that are stored persistently, but do not need to
- * exist for extended period of time. The items can exist in any tree.
- *
- * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
- *
- * Existing items:
- *
- * - balance status item
- *   (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
- */
-#define BTRFS_TEMPORARY_ITEM_KEY       248
-
-/*
- * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
- */
-#define BTRFS_DEV_STATS_KEY            249
-
-/*
- * The key type for tree items that are stored persistently and usually exist
- * for a long period, eg. filesystem lifetime. The item kinds can be status
- * information, stats or preference values. The item can exist in any tree.
- *
- * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
- *
- * Existing items:
- *
- * - device statistics, store IO stats in the device tree, one key for all
- *   stats
- *   (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
- */
-#define BTRFS_PERSISTENT_ITEM_KEY      249
-
-/*
- * Persistantly stores the device replace state in the device tree.
- * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
- */
-#define BTRFS_DEV_REPLACE_KEY  250
-
-/*
- * Stores items that allow to quickly map UUIDs to something else.
- * These items are part of the filesystem UUID tree.
- * The key is built like this:
- * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
- */
-#if BTRFS_UUID_SIZE != 16
-#error "UUID items require BTRFS_UUID_SIZE == 16!"
-#endif
-#define BTRFS_UUID_KEY_SUBVOL  251     /* for UUIDs assigned to subvols */
-#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252     /* for UUIDs assigned to
-                                                * received subvols */
-
-/*
- * string items are for debugging.  They just store a short string of
- * data in the FS
- */
-#define BTRFS_STRING_ITEM_KEY  253
-
-
-
-/* 32 bytes in various csum fields */
-#define BTRFS_CSUM_SIZE 32
-
-/* csum types */
-#define BTRFS_CSUM_TYPE_CRC32  0
-
-/*
- * flags definitions for directory entry item type
- *
- * Used by:
- * struct btrfs_dir_item.type
- */
-#define BTRFS_FT_UNKNOWN       0
-#define BTRFS_FT_REG_FILE      1
-#define BTRFS_FT_DIR           2
-#define BTRFS_FT_CHRDEV                3
-#define BTRFS_FT_BLKDEV                4
-#define BTRFS_FT_FIFO          5
-#define BTRFS_FT_SOCK          6
-#define BTRFS_FT_SYMLINK       7
-#define BTRFS_FT_XATTR         8
-#define BTRFS_FT_MAX           9
-
-/*
- * The key defines the order in the tree, and so it also defines (optimal)
- * block layout.
- *
- * objectid corresponds to the inode number.
- *
- * type tells us things about the object, and is a kind of stream selector.
- * so for a given inode, keys with type of 1 might refer to the inode data,
- * type of 2 may point to file data in the btree and type == 3 may point to
- * extents.
- *
- * offset is the starting byte offset for this key in the stream.
- */
-
-struct btrfs_key {
-       __u64 objectid;
-       __u8 type;
-       __u64 offset;
-} __attribute__ ((__packed__));
-
-struct btrfs_dev_item {
-       /* the internal btrfs device id */
-       __u64 devid;
-
-       /* size of the device */
-       __u64 total_bytes;
-
-       /* bytes used */
-       __u64 bytes_used;
-
-       /* optimal io alignment for this device */
-       __u32 io_align;
-
-       /* optimal io width for this device */
-       __u32 io_width;
-
-       /* minimal io size for this device */
-       __u32 sector_size;
-
-       /* type and info about this device */
-       __u64 type;
-
-       /* expected generation for this device */
-       __u64 generation;
-
-       /*
-        * starting byte of this partition on the device,
-        * to allow for stripe alignment in the future
-        */
-       __u64 start_offset;
-
-       /* grouping information for allocation decisions */
-       __u32 dev_group;
-
-       /* seek speed 0-100 where 100 is fastest */
-       __u8 seek_speed;
-
-       /* bandwidth 0-100 where 100 is fastest */
-       __u8 bandwidth;
-
-       /* btrfs generated uuid for this device */
-       __u8 uuid[BTRFS_UUID_SIZE];
-
-       /* uuid of FS who owns this device */
-       __u8 fsid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_stripe {
-       __u64 devid;
-       __u64 offset;
-       __u8 dev_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_chunk {
-       /* size of this chunk in bytes */
-       __u64 length;
-
-       /* objectid of the root referencing this chunk */
-       __u64 owner;
-
-       __u64 stripe_len;
-       __u64 type;
-
-       /* optimal io alignment for this chunk */
-       __u32 io_align;
-
-       /* optimal io width for this chunk */
-       __u32 io_width;
-
-       /* minimal io size for this chunk */
-       __u32 sector_size;
-
-       /* 2^16 stripes is quite a lot, a second limit is the size of a single
-        * item in the btree
-        */
-       __u16 num_stripes;
-
-       /* sub stripes only matter for raid10 */
-       __u16 sub_stripes;
-       struct btrfs_stripe stripe;
-       /* additional stripes go here */
-} __attribute__ ((__packed__));
-
-#define BTRFS_FREE_SPACE_EXTENT        1
-#define BTRFS_FREE_SPACE_BITMAP        2
-
-struct btrfs_free_space_entry {
-       __u64 offset;
-       __u64 bytes;
-       __u8 type;
-} __attribute__ ((__packed__));
-
-struct btrfs_free_space_header {
-       struct btrfs_key location;
-       __u64 generation;
-       __u64 num_entries;
-       __u64 num_bitmaps;
-} __attribute__ ((__packed__));
-
-#define BTRFS_HEADER_FLAG_WRITTEN      (1ULL << 0)
-#define BTRFS_HEADER_FLAG_RELOC                (1ULL << 1)
-
-/* Super block flags */
-/* Errors detected */
-#define BTRFS_SUPER_FLAG_ERROR         (1ULL << 2)
-
-#define BTRFS_SUPER_FLAG_SEEDING       (1ULL << 32)
-#define BTRFS_SUPER_FLAG_METADUMP      (1ULL << 33)
-
-
-/*
- * items in the extent btree are used to record the objectid of the
- * owner of the block and the number of references
- */
-
-struct btrfs_extent_item {
-       __u64 refs;
-       __u64 generation;
-       __u64 flags;
-} __attribute__ ((__packed__));
-
-
-#define BTRFS_EXTENT_FLAG_DATA         (1ULL << 0)
-#define BTRFS_EXTENT_FLAG_TREE_BLOCK   (1ULL << 1)
-
-/* following flags only apply to tree blocks */
-
-/* use full backrefs for extent pointers in the block */
-#define BTRFS_BLOCK_FLAG_FULL_BACKREF  (1ULL << 8)
-
-/*
- * this flag is only used internally by scrub and may be changed at any time
- * it is only declared here to avoid collisions
- */
-#define BTRFS_EXTENT_FLAG_SUPER                (1ULL << 48)
-
-struct btrfs_tree_block_info {
-       struct btrfs_key key;
-       __u8 level;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_data_ref {
-       __u64 root;
-       __u64 objectid;
-       __u64 offset;
-       __u32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_shared_data_ref {
-       __u32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_inline_ref {
-       __u8 type;
-       __u64 offset;
-} __attribute__ ((__packed__));
-
-/* dev extents record free space on individual devices.  The owner
- * field points back to the chunk allocation mapping tree that allocated
- * the extent.  The chunk tree uuid field is a way to double check the owner
- */
-struct btrfs_dev_extent {
-       __u64 chunk_tree;
-       __u64 chunk_objectid;
-       __u64 chunk_offset;
-       __u64 length;
-       __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_ref {
-       __u64 index;
-       __u16 name_len;
-       /* name goes here */
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_extref {
-       __u64 parent_objectid;
-       __u64 index;
-       __u16 name_len;
-       __u8   name[0];
-       /* name goes here */
-} __attribute__ ((__packed__));
-
-struct btrfs_timespec {
-       __u64 sec;
-       __u32 nsec;
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_item {
-       /* nfs style generation number */
-       __u64 generation;
-       /* transid that last touched this inode */
-       __u64 transid;
-       __u64 size;
-       __u64 nbytes;
-       __u64 block_group;
-       __u32 nlink;
-       __u32 uid;
-       __u32 gid;
-       __u32 mode;
-       __u64 rdev;
-       __u64 flags;
-
-       /* modification sequence number for NFS */
-       __u64 sequence;
-
-       /*
-        * a little future expansion, for more than this we can
-        * just grow the inode item and version it
-        */
-       __u64 reserved[4];
-       struct btrfs_timespec atime;
-       struct btrfs_timespec ctime;
-       struct btrfs_timespec mtime;
-       struct btrfs_timespec otime;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_log_item {
-       __u64 end;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_item {
-       struct btrfs_key location;
-       __u64 transid;
-       __u16 data_len;
-       __u16 name_len;
-       __u8 type;
-} __attribute__ ((__packed__));
-
-#define BTRFS_ROOT_SUBVOL_RDONLY       (1ULL << 0)
-
-/*
- * Internal in-memory flag that a subvolume has been marked for deletion but
- * still visible as a directory
- */
-#define BTRFS_ROOT_SUBVOL_DEAD         (1ULL << 48)
-
-struct btrfs_root_item {
-       struct btrfs_inode_item inode;
-       __u64 generation;
-       __u64 root_dirid;
-       __u64 bytenr;
-       __u64 byte_limit;
-       __u64 bytes_used;
-       __u64 last_snapshot;
-       __u64 flags;
-       __u32 refs;
-       struct btrfs_key drop_progress;
-       __u8 drop_level;
-       __u8 level;
-
-       /*
-        * The following fields appear after subvol_uuids+subvol_times
-        * were introduced.
-        */
-
-       /*
-        * This generation number is used to test if the new fields are valid
-        * and up to date while reading the root item. Every time the root item
-        * is written out, the "generation" field is copied into this field. If
-        * anyone ever mounted the fs with an older kernel, we will have
-        * mismatching generation values here and thus must invalidate the
-        * new fields. See btrfs_update_root and btrfs_find_last_root for
-        * details.
-        * the offset of generation_v2 is also used as the start for the memset
-        * when invalidating the fields.
-        */
-       __u64 generation_v2;
-       __u8 uuid[BTRFS_UUID_SIZE];
-       __u8 parent_uuid[BTRFS_UUID_SIZE];
-       __u8 received_uuid[BTRFS_UUID_SIZE];
-       __u64 ctransid; /* updated when an inode changes */
-       __u64 otransid; /* trans when created */
-       __u64 stransid; /* trans when sent. non-zero for received subvol */
-       __u64 rtransid; /* trans when received. non-zero for received subvol */
-       struct btrfs_timespec ctime;
-       struct btrfs_timespec otime;
-       struct btrfs_timespec stime;
-       struct btrfs_timespec rtime;
-       __u64 reserved[8]; /* for future */
-} __attribute__ ((__packed__));
-
-/*
- * this is used for both forward and backward root refs
- */
-struct btrfs_root_ref {
-       __u64 dirid;
-       __u64 sequence;
-       __u16 name_len;
-} __attribute__ ((__packed__));
-
-#define BTRFS_FILE_EXTENT_INLINE 0
-#define BTRFS_FILE_EXTENT_REG 1
-#define BTRFS_FILE_EXTENT_PREALLOC 2
-
-enum btrfs_compression_type {
-       BTRFS_COMPRESS_NONE  = 0,
-       BTRFS_COMPRESS_ZLIB  = 1,
-       BTRFS_COMPRESS_LZO   = 2,
-       BTRFS_COMPRESS_ZSTD  = 3,
-       BTRFS_COMPRESS_TYPES = 3,
-       BTRFS_COMPRESS_LAST  = 4,
-};
-
-struct btrfs_file_extent_item {
-       /*
-        * transaction id that created this extent
-        */
-       __u64 generation;
-       /*
-        * max number of bytes to hold this extent in ram
-        * when we split a compressed extent we can't know how big
-        * each of the resulting pieces will be.  So, this is
-        * an upper limit on the size of the extent in ram instead of
-        * an exact limit.
-        */
-       __u64 ram_bytes;
-
-       /*
-        * 32 bits for the various ways we might encode the data,
-        * including compression and encryption.  If any of these
-        * are set to something a given disk format doesn't understand
-        * it is treated like an incompat flag for reading and writing,
-        * but not for stat.
-        */
-       __u8 compression;
-       __u8 encryption;
-       __u16 other_encoding; /* spare for later use */
-
-       /* are we inline data or a real extent? */
-       __u8 type;
-
-       /*
-        * disk space consumed by the extent, checksum blocks are included
-        * in these numbers
-        *
-        * At this offset in the structure, the inline extent data start.
-        */
-       __u64 disk_bytenr;
-       __u64 disk_num_bytes;
-       /*
-        * the logical offset in file blocks (no csums)
-        * this extent record is for.  This allows a file extent to point
-        * into the middle of an existing extent on disk, sharing it
-        * between two snapshots (useful if some bytes in the middle of the
-        * extent have changed
-        */
-       __u64 offset;
-       /*
-        * the logical number of file blocks (no csums included).  This
-        * always reflects the size uncompressed and without encoding.
-        */
-       __u64 num_bytes;
-
-} __attribute__ ((__packed__));
-
-struct btrfs_csum_item {
-       __u8 csum;
-} __attribute__ ((__packed__));
-
-/* different types of block groups (and chunks) */
-#define BTRFS_BLOCK_GROUP_DATA         (1ULL << 0)
-#define BTRFS_BLOCK_GROUP_SYSTEM       (1ULL << 1)
-#define BTRFS_BLOCK_GROUP_METADATA     (1ULL << 2)
-#define BTRFS_BLOCK_GROUP_RAID0                (1ULL << 3)
-#define BTRFS_BLOCK_GROUP_RAID1                (1ULL << 4)
-#define BTRFS_BLOCK_GROUP_DUP          (1ULL << 5)
-#define BTRFS_BLOCK_GROUP_RAID10       (1ULL << 6)
-#define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
-#define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
-#define BTRFS_BLOCK_GROUP_RESERVED     (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
-                                        BTRFS_SPACE_INFO_GLOBAL_RSV)
-
-enum btrfs_raid_types {
-       BTRFS_RAID_RAID10,
-       BTRFS_RAID_RAID1,
-       BTRFS_RAID_DUP,
-       BTRFS_RAID_RAID0,
-       BTRFS_RAID_SINGLE,
-       BTRFS_RAID_RAID5,
-       BTRFS_RAID_RAID6,
-       BTRFS_NR_RAID_TYPES
-};
-
-#define BTRFS_BLOCK_GROUP_TYPE_MASK    (BTRFS_BLOCK_GROUP_DATA |    \
-                                        BTRFS_BLOCK_GROUP_SYSTEM |  \
-                                        BTRFS_BLOCK_GROUP_METADATA)
-
-#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 |   \
-                                        BTRFS_BLOCK_GROUP_RAID1 |   \
-                                        BTRFS_BLOCK_GROUP_RAID5 |   \
-                                        BTRFS_BLOCK_GROUP_RAID6 |   \
-                                        BTRFS_BLOCK_GROUP_DUP |     \
-                                        BTRFS_BLOCK_GROUP_RAID10)
-#define BTRFS_BLOCK_GROUP_RAID56_MASK  (BTRFS_BLOCK_GROUP_RAID5 |   \
-                                        BTRFS_BLOCK_GROUP_RAID6)
-
-/*
- * We need a bit for restriper to be able to tell when chunks of type
- * SINGLE are available.  This "extended" profile format is used in
- * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
- * (on-disk).  The corresponding on-disk bit in chunk.type is reserved
- * to avoid remappings between two formats in future.
- */
-#define BTRFS_AVAIL_ALLOC_BIT_SINGLE   (1ULL << 48)
-
-/*
- * A fake block group type that is used to communicate global block reserve
- * size to userspace via the SPACE_INFO ioctl.
- */
-#define BTRFS_SPACE_INFO_GLOBAL_RSV    (1ULL << 49)
-
-#define BTRFS_EXTENDED_PROFILE_MASK    (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
-                                        BTRFS_AVAIL_ALLOC_BIT_SINGLE)
-
-#endif /* __BTRFS_BTRFS_TREE_H__ */
diff --git a/fs/btrfs/chunk-map.c b/fs/btrfs/chunk-map.c
deleted file mode 100644 (file)
index 2e5be65..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-#include <log.h>
-#include <malloc.h>
-
-struct chunk_map_item {
-       struct rb_node node;
-       u64 logical;
-       u64 length;
-       u64 physical;
-};
-
-static int add_chunk_mapping(struct btrfs_key *key, struct btrfs_chunk *chunk)
-{
-       struct btrfs_stripe *stripe;
-       u64 block_profile = chunk->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
-       struct rb_node **new = &(btrfs_info.chunks_root.rb_node), *prnt = NULL;
-       struct chunk_map_item *map_item;
-
-       if (block_profile && block_profile != BTRFS_BLOCK_GROUP_DUP) {
-               printf("%s: unsupported chunk profile %llu\n", __func__,
-                      block_profile);
-               return -1;
-       } else if (!chunk->length) {
-               printf("%s: zero length chunk\n", __func__);
-               return -1;
-       }
-
-       stripe = &chunk->stripe;
-       btrfs_stripe_to_cpu(stripe);
-
-       while (*new) {
-               struct chunk_map_item *this;
-
-               this = rb_entry(*new, struct chunk_map_item, node);
-
-               prnt = *new;
-               if (key->offset < this->logical) {
-                       new = &((*new)->rb_left);
-               } else if (key->offset > this->logical) {
-                       new = &((*new)->rb_right);
-               } else {
-                       debug("%s: Logical address %llu already in map!\n",
-                             __func__, key->offset);
-                       return 0;
-               }
-       }
-
-       map_item = malloc(sizeof(struct chunk_map_item));
-       if (!map_item)
-               return -1;
-
-       map_item->logical = key->offset;
-       map_item->length = chunk->length;
-       map_item->physical = le64_to_cpu(chunk->stripe.offset);
-       rb_link_node(&map_item->node, prnt, new);
-       rb_insert_color(&map_item->node, &btrfs_info.chunks_root);
-
-       debug("%s: Mapping %llu to %llu\n", __func__, map_item->logical,
-             map_item->physical);
-
-       return 0;
-}
-
-u64 btrfs_map_logical_to_physical(u64 logical)
-{
-       struct rb_node *node = btrfs_info.chunks_root.rb_node;
-
-       while (node) {
-               struct chunk_map_item *item;
-
-               item = rb_entry(node, struct chunk_map_item, node);
-
-               if (item->logical > logical)
-                       node = node->rb_left;
-               else if (logical >= item->logical + item->length)
-                       node = node->rb_right;
-               else
-                       return item->physical + logical - item->logical;
-       }
-
-       printf("%s: Cannot map logical address %llu to physical\n", __func__,
-              logical);
-
-       return -1ULL;
-}
-
-void btrfs_chunk_map_exit(void)
-{
-       struct rb_node *now, *next;
-       struct chunk_map_item *item;
-
-       for (now = rb_first_postorder(&btrfs_info.chunks_root); now; now = next)
-       {
-               item = rb_entry(now, struct chunk_map_item, node);
-               next = rb_next_postorder(now);
-               free(item);
-       }
-}
-
-int btrfs_chunk_map_init(void)
-{
-       u8 sys_chunk_array_copy[sizeof(btrfs_info.sb.sys_chunk_array)];
-       u8 * const start = sys_chunk_array_copy;
-       u8 * const end = start + btrfs_info.sb.sys_chunk_array_size;
-       u8 *cur;
-       struct btrfs_key *key;
-       struct btrfs_chunk *chunk;
-
-       btrfs_info.chunks_root = RB_ROOT;
-
-       memcpy(sys_chunk_array_copy, btrfs_info.sb.sys_chunk_array,
-              sizeof(sys_chunk_array_copy));
-
-       for (cur = start; cur < end;) {
-               key = (struct btrfs_key *) cur;
-               cur += sizeof(struct btrfs_key);
-               chunk = (struct btrfs_chunk *) cur;
-
-               btrfs_key_to_cpu(key);
-               btrfs_chunk_to_cpu(chunk);
-
-               if (key->type != BTRFS_CHUNK_ITEM_KEY) {
-                       printf("%s: invalid key type %u\n", __func__,
-                              key->type);
-                       return -1;
-               }
-
-               if (add_chunk_mapping(key, chunk))
-                       return -1;
-
-               cur += sizeof(struct btrfs_chunk);
-               cur += sizeof(struct btrfs_stripe) * (chunk->num_stripes - 1);
-       }
-
-       return 0;
-}
-
-int btrfs_read_chunk_tree(void)
-{
-       struct btrfs_path path;
-       struct btrfs_key key, *found_key;
-       struct btrfs_chunk *chunk;
-       int res = 0;
-
-       key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
-       key.type = BTRFS_CHUNK_ITEM_KEY;
-       key.offset = 0;
-
-       if (btrfs_search_tree(&btrfs_info.chunk_root, &key, &path))
-               return -1;
-
-       do {
-               found_key = btrfs_path_leaf_key(&path);
-               if (btrfs_comp_keys_type(&key, found_key))
-                       continue;
-
-               chunk = btrfs_path_item_ptr(&path, struct btrfs_chunk);
-               btrfs_chunk_to_cpu(chunk);
-               if (add_chunk_mapping(found_key, chunk)) {
-                       res = -1;
-                       break;
-               }
-       } while (!(res = btrfs_next_slot(&path)));
-
-       btrfs_free_path(&path);
-
-       if (res < 0)
-               return -1;
-
-       return 0;
-}
diff --git a/fs/btrfs/common/rbtree-utils.c b/fs/btrfs/common/rbtree-utils.c
new file mode 100644 (file)
index 0000000..7a7d7e8
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/errno.h>
+#include "rbtree-utils.h"
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+             rb_compare_nodes comp)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       int ret;
+
+       while(*p) {
+               parent = *p;
+
+               ret = comp(parent, node);
+               if (ret < 0)
+                       p = &(*p)->rb_left;
+               else if (ret > 0)
+                       p = &(*p)->rb_right;
+               else
+                       return -EEXIST;
+       }
+
+       rb_link_node(node, parent, p);
+       rb_insert_color(node, root);
+       return 0;
+}
+
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+                         struct rb_node **next_ret)
+{
+       struct rb_node *n = root->rb_node;
+       struct rb_node *parent = NULL;
+       int ret = 0;
+
+       while(n) {
+               parent = n;
+
+               ret = comp(n, key);
+               if (ret < 0)
+                       n = n->rb_left;
+               else if (ret > 0)
+                       n = n->rb_right;
+               else
+                       return n;
+       }
+
+       if (!next_ret)
+               return NULL;
+
+       if (parent && ret > 0)
+               parent = rb_next(parent);
+
+       *next_ret = parent;
+       return NULL;
+}
+
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node)
+{
+       struct rb_node *node;
+
+       while ((node = rb_first(root))) {
+               rb_erase(node, root);
+               free_node(node);
+       }
+}
diff --git a/fs/btrfs/common/rbtree-utils.h b/fs/btrfs/common/rbtree-utils.h
new file mode 100644 (file)
index 0000000..d977cfd
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __RBTREE_UTILS__
+#define __RBTREE_UTILS__
+
+#include <linux/rbtree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The common insert/search/free functions */
+typedef int (*rb_compare_nodes)(struct rb_node *node1, struct rb_node *node2);
+typedef int (*rb_compare_keys)(struct rb_node *node, void *key);
+typedef void (*rb_free_node)(struct rb_node *node);
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+             rb_compare_nodes comp);
+/*
+ * In some cases, we need return the next node if we don't find the node we
+ * specify. At this time, we can use next_ret.
+ */
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+                         struct rb_node **next_ret);
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node);
+
+#define FREE_RB_BASED_TREE(name, free_func)            \
+static void free_##name##_tree(struct rb_root *root)   \
+{                                                      \
+       rb_free_nodes(root, free_func);                 \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
new file mode 100644 (file)
index 0000000..9cf8a10
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#ifndef __BTRFS_COMPAT_H__
+#define __BTRFS_COMPAT_H__
+
+#include <linux/errno.h>
+#include <fs_internal.h>
+#include <uuid.h>
+
+/* Provide a compatibility layer to make code syncing easier */
+
+/* A simple wraper to for error() used in btrfs-progs */
+#define error(fmt, ...)                pr_err("BTRFS: " fmt "\n", ##__VA_ARGS__)
+
+#define ASSERT(c) assert(c)
+
+#define BTRFS_UUID_UNPARSED_SIZE       37
+
+/* No <linux/limits.h> so have to define it here */
+#define XATTR_NAME_MAX         255
+#define PATH_MAX               4096
+
+/*
+ * Macros to generate set/get funcs for the struct fields
+ * assume there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+/*
+ * Macros to generate set/get funcs for the struct fields
+ * assume there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+#define get_unaligned_le8(p) (*((u8 *)(p)))
+#define get_unaligned_8(p) (*((u8 *)(p)))
+#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val))
+#define put_unaligned_8(val,p) ((*((u8 *)(p))) = (val))
+
+/*
+ * Read data from device specified by @desc and @part
+ *
+ * U-boot equivalent of pread().
+ *
+ * Return the bytes of data read.
+ * Return <0 for error.
+ */
+static inline int __btrfs_devread(struct blk_desc *desc,
+                                 struct disk_partition *part,
+                                 void *buf, size_t size, u64 offset)
+{
+       lbaint_t sector;
+       int byte_offset;
+       int ret;
+
+       sector = offset >> desc->log2blksz;
+       byte_offset = offset % desc->blksz;
+
+       /* fs_devread() return 0 for error, >0 for success */
+       ret = fs_devread(desc, part, sector, byte_offset, size, buf);
+       if (!ret)
+               return -EIO;
+       return size;
+}
+
+static inline void uuid_unparse(const u8 *uuid, char *out)
+{
+       return uuid_bin_to_str((unsigned char *)uuid, out, 0);
+}
+
+static inline int is_power_of_2(unsigned long n)
+{
+       return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+#endif
index 59e4a94..23efefa 100644 (file)
@@ -115,7 +115,7 @@ static u32 decompress_zlib(const u8 *_cbuf, u32 clen, u8 *dbuf, u32 dlen)
        while (stream.total_in < clen) {
                stream.next_in = cbuf + stream.total_in;
                stream.avail_in = min((u32) (clen - stream.total_in),
-                                     (u32) btrfs_info.sb.sectorsize);
+                                       current_fs_info->sectorsize);
 
                ret = inflate(&stream, Z_NO_FLUSH);
                if (ret != Z_OK)
diff --git a/fs/btrfs/crypto/hash.c b/fs/btrfs/crypto/hash.c
new file mode 100644 (file)
index 0000000..fb51f63
--- /dev/null
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/xxhash.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/types.h>
+#include <u-boot/sha256.h>
+#include <u-boot/crc.h>
+
+static u32 btrfs_crc32c_table[256];
+
+void btrfs_hash_init(void)
+{
+       static int inited = 0;
+
+       if (!inited) {
+               crc32c_init(btrfs_crc32c_table, 0x82F63B78);
+               inited = 1;
+       }
+}
+
+int hash_sha256(const u8 *buf, size_t length, u8 *out)
+{
+       sha256_context ctx;
+
+       sha256_starts(&ctx);
+       sha256_update(&ctx, buf, length);
+       sha256_finish(&ctx, out);
+
+       return 0;
+}
+
+int hash_xxhash(const u8 *buf, size_t length, u8 *out)
+{
+       u64 hash;
+
+       hash = xxh64(buf, length, 0);
+       put_unaligned_le64(hash, out);
+
+       return 0;
+}
+
+int hash_crc32c(const u8 *buf, size_t length, u8 *out)
+{
+       u32 crc;
+
+       crc = crc32c_cal((u32)~0, (char *)buf, length, btrfs_crc32c_table);
+       put_unaligned_le32(~crc, out);
+
+       return 0;
+}
+
+u32 crc32c(u32 seed, const void * data, size_t len)
+{
+       return crc32c_cal(seed, data, len, btrfs_crc32c_table);
+}
diff --git a/fs/btrfs/crypto/hash.h b/fs/btrfs/crypto/hash.h
new file mode 100644 (file)
index 0000000..d1ba1fa
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef CRYPTO_HASH_H
+#define CRYPTO_HASH_H
+
+#include <linux/types.h>
+
+#define CRYPTO_HASH_SIZE_MAX   32
+
+void btrfs_hash_init(void);
+int hash_crc32c(const u8 *buf, size_t length, u8 *out);
+int hash_xxhash(const u8 *buf, size_t length, u8 *out);
+int hash_sha256(const u8 *buf, size_t length, u8 *out);
+
+u32 crc32c(u32 seed, const void * data, size_t len);
+
+/* Blake2B is not yet supported due to lack of library */
+
+#endif
index 28f98d4..5ffced9 100644 (file)
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
+#include <linux/kernel.h>
 #include <log.h>
 #include <malloc.h>
 #include <memalign.h>
+#include "btrfs.h"
+#include "disk-io.h"
+
+static const struct btrfs_csum {
+       u16 size;
+       const char name[14];
+} btrfs_csums[] = {
+       [BTRFS_CSUM_TYPE_CRC32]         = {  4, "crc32c" },
+       [BTRFS_CSUM_TYPE_XXHASH]        = {  8, "xxhash64" },
+       [BTRFS_CSUM_TYPE_SHA256]        = { 32, "sha256" },
+       [BTRFS_CSUM_TYPE_BLAKE2]        = { 32, "blake2" },
+};
+
+u16 btrfs_super_csum_size(const struct btrfs_super_block *sb)
+{
+       const u16 csum_type = btrfs_super_csum_type(sb);
+
+       return btrfs_csums[csum_type].size;
+}
+
+const char *btrfs_super_csum_name(u16 csum_type)
+{
+       return btrfs_csums[csum_type].name;
+}
+
+size_t btrfs_super_num_csums(void)
+{
+       return ARRAY_SIZE(btrfs_csums);
+}
+
+u16 btrfs_csum_type_size(u16 csum_type)
+{
+       return btrfs_csums[csum_type].size;
+}
+
+struct btrfs_path *btrfs_alloc_path(void)
+{
+       struct btrfs_path *path;
+       path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS);
+       return path;
+}
+
+void btrfs_free_path(struct btrfs_path *p)
+{
+       if (!p)
+               return;
+       btrfs_release_path(p);
+       kfree(p);
+}
+
+void btrfs_release_path(struct btrfs_path *p)
+{
+       int i;
+       for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+               if (!p->nodes[i])
+                       continue;
+               free_extent_buffer(p->nodes[i]);
+       }
+       memset(p, 0, sizeof(*p));
+}
 
-int btrfs_comp_keys(struct btrfs_key *a, struct btrfs_key *b)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
 {
-       if (a->objectid > b->objectid)
+       if (k1->objectid > k2->objectid)
                return 1;
-       if (a->objectid < b->objectid)
+       if (k1->objectid < k2->objectid)
                return -1;
-       if (a->type > b->type)
+       if (k1->type > k2->type)
                return 1;
-       if (a->type < b->type)
+       if (k1->type < k2->type)
                return -1;
-       if (a->offset > b->offset)
+       if (k1->offset > k2->offset)
                return 1;
-       if (a->offset < b->offset)
+       if (k1->offset < k2->offset)
                return -1;
        return 0;
 }
 
-int btrfs_comp_keys_type(struct btrfs_key *a, struct btrfs_key *b)
+static int btrfs_comp_keys(struct btrfs_disk_key *disk,
+                            const struct btrfs_key *k2)
 {
-       if (a->objectid > b->objectid)
-               return 1;
-       if (a->objectid < b->objectid)
-               return -1;
-       if (a->type > b->type)
-               return 1;
-       if (a->type < b->type)
-               return -1;
-       return 0;
+       struct btrfs_key k1;
+
+       btrfs_disk_key_to_cpu(&k1, disk);
+       return btrfs_comp_cpu_keys(&k1, k2);
 }
 
-static int generic_bin_search(void *addr, int item_size, struct btrfs_key *key,
-                             int max, int *slot)
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_fs_info *fs_info,
+                struct btrfs_disk_key *parent_key, struct extent_buffer *buf)
 {
-       int low = 0, high = max, mid, ret;
-       struct btrfs_key *tmp;
+       int i;
+       struct btrfs_key cpukey;
+       struct btrfs_disk_key key;
+       u32 nritems = btrfs_header_nritems(buf);
+       enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+       if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(fs_info))
+               goto fail;
+
+       ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+       if (parent_key && parent_key->type) {
+               btrfs_node_key(buf, &key, 0);
+               if (memcmp(parent_key, &key, sizeof(key)))
+                       goto fail;
+       }
+       ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+       for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+               btrfs_node_key(buf, &key, i);
+               btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+               if (btrfs_comp_keys(&key, &cpukey) >= 0)
+                       goto fail;
+       }
+       return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+       return ret;
+}
+
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_fs_info *fs_info,
+                struct btrfs_disk_key *parent_key, struct extent_buffer *buf)
+{
+       int i;
+       struct btrfs_key cpukey;
+       struct btrfs_disk_key key;
+       u32 nritems = btrfs_header_nritems(buf);
+       enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+       if (nritems * sizeof(struct btrfs_item) > buf->len)  {
+               fprintf(stderr, "invalid number of items %llu\n",
+                       (unsigned long long)buf->start);
+               goto fail;
+       }
+
+       if (btrfs_header_level(buf) != 0) {
+               ret = BTRFS_TREE_BLOCK_INVALID_LEVEL;
+               fprintf(stderr, "leaf is not a leaf %llu\n",
+                      (unsigned long long)btrfs_header_bytenr(buf));
+               goto fail;
+       }
+       if (btrfs_leaf_free_space(buf) < 0) {
+               ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE;
+               fprintf(stderr, "leaf free space incorrect %llu %d\n",
+                       (unsigned long long)btrfs_header_bytenr(buf),
+                       btrfs_leaf_free_space(buf));
+               goto fail;
+       }
 
-       while (low < high) {
+       if (nritems == 0)
+               return BTRFS_TREE_BLOCK_CLEAN;
+
+       btrfs_item_key(buf, &key, 0);
+       if (parent_key && parent_key->type &&
+           memcmp(parent_key, &key, sizeof(key))) {
+               ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+               fprintf(stderr, "leaf parent key incorrect %llu\n",
+                      (unsigned long long)btrfs_header_bytenr(buf));
+               goto fail;
+       }
+       for (i = 0; nritems > 1 && i < nritems - 1; i++) {
+               btrfs_item_key(buf, &key, i);
+               btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+               if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+                       ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+                       fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+                       goto fail;
+               }
+               if (btrfs_item_offset_nr(buf, i) !=
+                       btrfs_item_end_nr(buf, i + 1)) {
+                       ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+                       fprintf(stderr, "incorrect offsets %u %u\n",
+                               btrfs_item_offset_nr(buf, i),
+                               btrfs_item_end_nr(buf, i + 1));
+                       goto fail;
+               }
+               if (i == 0 && btrfs_item_end_nr(buf, i) !=
+                   BTRFS_LEAF_DATA_SIZE(fs_info)) {
+                       ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+                       fprintf(stderr, "bad item end %u wanted %u\n",
+                               btrfs_item_end_nr(buf, i),
+                               (unsigned)BTRFS_LEAF_DATA_SIZE(fs_info));
+                       goto fail;
+               }
+       }
+
+       for (i = 0; i < nritems; i++) {
+               if (btrfs_item_end_nr(buf, i) >
+                               BTRFS_LEAF_DATA_SIZE(fs_info)) {
+                       btrfs_item_key(buf, &key, 0);
+                       ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+                       fprintf(stderr, "slot end outside of leaf %llu > %llu\n",
+                               (unsigned long long)btrfs_item_end_nr(buf, i),
+                               (unsigned long long)BTRFS_LEAF_DATA_SIZE(
+                                       fs_info));
+                       goto fail;
+               }
+       }
+
+       return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+       return ret;
+}
+
+static int noinline check_block(struct btrfs_fs_info *fs_info,
+                               struct btrfs_path *path, int level)
+{
+       struct btrfs_disk_key key;
+       struct btrfs_disk_key *key_ptr = NULL;
+       struct extent_buffer *parent;
+       enum btrfs_tree_block_status ret;
+
+       if (path->nodes[level + 1]) {
+               parent = path->nodes[level + 1];
+               btrfs_node_key(parent, &key, path->slots[level + 1]);
+               key_ptr = &key;
+       }
+       if (level == 0)
+               ret = btrfs_check_leaf(fs_info, key_ptr, path->nodes[0]);
+       else
+               ret = btrfs_check_node(fs_info, key_ptr, path->nodes[level]);
+       if (ret == BTRFS_TREE_BLOCK_CLEAN)
+               return 0;
+       return -EIO;
+}
+
+/*
+ * search for key in the extent_buffer.  The items start at offset p,
+ * and they are item_size apart.  There are 'max' items in p.
+ *
+ * the slot in the array is returned via slot, and it points to
+ * the place where you would insert key if it is not found in
+ * the array.
+ *
+ * slot may point to max if the key is bigger than all of the keys
+ */
+static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
+                             int item_size, const struct btrfs_key *key,
+                             int max, int *slot)
+{
+       int low = 0;
+       int high = max;
+       int mid;
+       int ret;
+       unsigned long offset;
+       struct btrfs_disk_key *tmp;
+
+       while(low < high) {
                mid = (low + high) / 2;
+               offset = p + mid * item_size;
 
-               tmp = (struct btrfs_key *) ((u8 *) addr + mid*item_size);
+               tmp = (struct btrfs_disk_key *)(eb->data + offset);
                ret = btrfs_comp_keys(tmp, key);
 
-               if (ret < 0) {
+               if (ret < 0)
                        low = mid + 1;
-               } else if (ret > 0) {
+               else if (ret > 0)
                        high = mid;
-               else {
+               else {
                        *slot = mid;
                        return 0;
                }
        }
-
        *slot = low;
        return 1;
 }
 
-int btrfs_bin_search(union btrfs_tree_node *p, struct btrfs_key *key,
+/*
+ * simple bin_search frontend that does the right thing for
+ * leaves vs nodes
+ */
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
                     int *slot)
 {
-       void *addr;
-       unsigned long size;
-
-       if (p->header.level) {
-               addr = p->node.ptrs;
-               size = sizeof(struct btrfs_key_ptr);
-       } else {
-               addr = p->leaf.items;
-               size = sizeof(struct btrfs_item);
-       }
-
-       return generic_bin_search(addr, size, key, p->header.nritems, slot);
+       if (btrfs_header_level(eb) == 0)
+               return generic_bin_search(eb,
+                                         offsetof(struct btrfs_leaf, items),
+                                         sizeof(struct btrfs_item),
+                                         key, btrfs_header_nritems(eb),
+                                         slot);
+       else
+               return generic_bin_search(eb,
+                                         offsetof(struct btrfs_node, ptrs),
+                                         sizeof(struct btrfs_key_ptr),
+                                         key, btrfs_header_nritems(eb),
+                                         slot);
 }
 
-static void clear_path(struct btrfs_path *p)
+struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info,
+                                  struct extent_buffer *parent, int slot)
 {
-       int i;
-
-       for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
-               p->nodes[i] = NULL;
-               p->slots[i] = 0;
+       struct extent_buffer *ret;
+       int level = btrfs_header_level(parent);
+
+       if (slot < 0)
+               return NULL;
+       if (slot >= btrfs_header_nritems(parent))
+               return NULL;
+
+       if (level == 0)
+               return NULL;
+
+       ret = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
+                      btrfs_node_ptr_generation(parent, slot));
+       if (!extent_buffer_uptodate(ret))
+               return ERR_PTR(-EIO);
+
+       if (btrfs_header_level(ret) != level - 1) {
+               error("child eb corrupted: parent bytenr=%llu item=%d parent level=%d child level=%d",
+                     btrfs_header_bytenr(parent), slot,
+                     btrfs_header_level(parent), btrfs_header_level(ret));
+               free_extent_buffer(ret);
+               return ERR_PTR(-EIO);
        }
+       return ret;
 }
 
-void btrfs_free_path(struct btrfs_path *p)
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+               u64 iobjectid, u64 ioff, u8 key_type,
+               struct btrfs_key *found_key)
 {
-       int i;
+       int ret;
+       struct btrfs_key key;
+       struct extent_buffer *eb;
+       struct btrfs_path *path;
+
+       key.type = key_type;
+       key.objectid = iobjectid;
+       key.offset = ioff;
+
+       if (found_path == NULL) {
+               path = btrfs_alloc_path();
+               if (!path)
+                       return -ENOMEM;
+       } else
+               path = found_path;
+
+       ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+       if ((ret < 0) || (found_key == NULL))
+               goto out;
+
+       eb = path->nodes[0];
+       if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+               ret = btrfs_next_leaf(fs_root, path);
+               if (ret)
+                       goto out;
+               eb = path->nodes[0];
+       }
 
-       for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
-               if (p->nodes[i])
-                       free(p->nodes[i]);
+       btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+       if (found_key->type != key.type ||
+                       found_key->objectid != key.objectid) {
+               ret = 1;
+               goto out;
        }
 
-       clear_path(p);
+out:
+       if (path != found_path)
+               btrfs_free_path(path);
+       return ret;
 }
 
-static int read_tree_node(u64 physical, union btrfs_tree_node **buf)
+/*
+ * look for key in the tree.  path is filled in with nodes along the way
+ * if key is found, we return zero and you can find the item in the leaf
+ * level of the path (level 0)
+ *
+ * If the key isn't found, the path points to the slot where it should
+ * be inserted, and 1 is returned.  If there are other errors during the
+ * search a negative error number is returned.
+ *
+ * if ins_len > 0, nodes and leaves will be split as we walk down the
+ * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
+ * possible)
+ *
+ * NOTE: This version has no COW ability, thus we expect trans == NULL,
+ * ins_len == 0 and cow == 0.
+ */
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+               struct btrfs_root *root, const struct btrfs_key *key,
+               struct btrfs_path *p, int ins_len, int cow)
 {
-       ALLOC_CACHE_ALIGN_BUFFER(struct btrfs_header, hdr,
-                                sizeof(struct btrfs_header));
-       unsigned long size, offset = sizeof(*hdr);
-       union btrfs_tree_node *res;
-       u32 i;
-
-       if (!btrfs_devread(physical, sizeof(*hdr), hdr))
-               return -1;
-
-       btrfs_header_to_cpu(hdr);
-
-       if (hdr->level)
-               size = sizeof(struct btrfs_node)
-                      + hdr->nritems * sizeof(struct btrfs_key_ptr);
-       else
-               size = btrfs_info.sb.nodesize;
-
-       res = malloc_cache_aligned(size);
-       if (!res) {
-               debug("%s: malloc failed\n", __func__);
-               return -1;
-       }
-
-       if (!btrfs_devread(physical + offset, size - offset,
-                          ((u8 *) res) + offset)) {
-               free(res);
-               return -1;
+       struct extent_buffer *b;
+       int slot;
+       int ret;
+       int level;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       u8 lowest_level = 0;
+
+       assert(trans == NULL && ins_len == 0 && cow == 0);
+       lowest_level = p->lowest_level;
+       WARN_ON(lowest_level && ins_len > 0);
+       WARN_ON(p->nodes[0] != NULL);
+
+       b = root->node;
+       extent_buffer_get(b);
+       while (b) {
+               level = btrfs_header_level(b);
+               /*
+               if (cow) {
+                       int wret;
+                       wret = btrfs_cow_block(trans, root, b,
+                                              p->nodes[level + 1],
+                                              p->slots[level + 1],
+                                              &b);
+                       if (wret) {
+                               free_extent_buffer(b);
+                               return wret;
+                       }
+               }
+               */
+               BUG_ON(!cow && ins_len);
+               if (level != btrfs_header_level(b))
+                       WARN_ON(1);
+               level = btrfs_header_level(b);
+               p->nodes[level] = b;
+               ret = check_block(fs_info, p, level);
+               if (ret)
+                       return -1;
+               ret = btrfs_bin_search(b, key, &slot);
+               if (level != 0) {
+                       if (ret && slot > 0)
+                               slot -= 1;
+                       p->slots[level] = slot;
+                       /*
+                       if ((p->search_for_split || ins_len > 0) &&
+                           btrfs_header_nritems(b) >=
+                           BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
+                               int sret = split_node(trans, root, p, level);
+                               BUG_ON(sret > 0);
+                               if (sret)
+                                       return sret;
+                               b = p->nodes[level];
+                               slot = p->slots[level];
+                       } else if (ins_len < 0) {
+                               int sret = balance_level(trans, root, p,
+                                                        level);
+                               if (sret)
+                                       return sret;
+                               b = p->nodes[level];
+                               if (!b) {
+                                       btrfs_release_path(p);
+                                       goto again;
+                               }
+                               slot = p->slots[level];
+                               BUG_ON(btrfs_header_nritems(b) == 1);
+                       }
+                       */
+                       /* this is only true while dropping a snapshot */
+                       if (level == lowest_level)
+                               break;
+
+                       b = read_node_slot(fs_info, b, slot);
+                       if (!extent_buffer_uptodate(b))
+                               return -EIO;
+               } else {
+                       p->slots[level] = slot;
+                       /*
+                       if (ins_len > 0 &&
+                           ins_len > btrfs_leaf_free_space(b)) {
+                               int sret = split_leaf(trans, root, key,
+                                                     p, ins_len, ret == 0);
+                               BUG_ON(sret > 0);
+                               if (sret)
+                                       return sret;
+                       }
+                       */
+                       return ret;
+               }
        }
-
-       memcpy(&res->header, hdr, sizeof(*hdr));
-       if (hdr->level)
-               for (i = 0; i < hdr->nritems; ++i)
-                       btrfs_key_ptr_to_cpu(&res->node.ptrs[i]);
-       else
-               for (i = 0; i < hdr->nritems; ++i)
-                       btrfs_item_to_cpu(&res->leaf.items[i]);
-
-       *buf = res;
-
-       return 0;
+       return 1;
 }
 
-int btrfs_search_tree(const struct btrfs_root *root, struct btrfs_key *key,
-                     struct btrfs_path *p)
+/*
+ * Helper to use instead of search slot if no exact match is needed but
+ * instead the next or previous item should be returned.
+ * When find_higher is true, the next higher item is returned, the next lower
+ * otherwise.
+ * When return_any and find_higher are both true, and no higher item is found,
+ * return the next lower instead.
+ * When return_any is true and find_higher is false, and no lower item is found,
+ * return the next higher instead.
+ * It returns 0 if any item is found, 1 if none is found (tree empty), and
+ * < 0 on error
+ */
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+                              const struct btrfs_key *key,
+                              struct btrfs_path *p, int find_higher,
+                              int return_any)
 {
-       u8 lvl, prev_lvl;
-       int i, slot, ret;
-       u64 logical, physical;
-       union btrfs_tree_node *buf;
-
-       clear_path(p);
-
-       logical = root->bytenr;
-
-       for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
-               physical = btrfs_map_logical_to_physical(logical);
-               if (physical == -1ULL)
-                       goto err;
-
-               if (read_tree_node(physical, &buf))
-                       goto err;
-
-               lvl = buf->header.level;
-               if (i && prev_lvl != lvl + 1) {
-                       printf("%s: invalid level in header at %llu\n",
-                              __func__, logical);
-                       goto err;
+       int ret;
+       struct extent_buffer *leaf;
+
+again:
+       ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
+       if (ret <= 0)
+                return ret;
+       /*
+        * A return value of 1 means the path is at the position where the item
+        * should be inserted. Normally this is the next bigger item, but in
+        * case the previous item is the last in a leaf, path points to the
+        * first free slot in the previous leaf, i.e. at an invalid item.
+        */
+       leaf = p->nodes[0];
+
+       if (find_higher) {
+               if (p->slots[0] >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, p);
+                       if (ret <= 0)
+                               return ret;
+                       if (!return_any)
+                               return 1;
+                       /*
+                        * No higher item found, return the next lower instead
+                        */
+                       return_any = 0;
+                       find_higher = 0;
+                       btrfs_release_path(p);
+                       goto again;
                }
-               prev_lvl = lvl;
-
-               ret = btrfs_bin_search(buf, key, &slot);
-               if (ret < 0)
-                       goto err;
-               if (ret && slot > 0 && lvl)
-                       slot -= 1;
-
-               p->slots[lvl] = slot;
-               p->nodes[lvl] = buf;
-
-               if (lvl) {
-                       logical = buf->node.ptrs[slot].blockptr;
-               } else {
+       } else {
+               if (p->slots[0] == 0) {
+                       ret = btrfs_prev_leaf(root, p);
+                       if (ret < 0)
+                               return ret;
+                       if (!ret) {
+                               leaf = p->nodes[0];
+                               if (p->slots[0] == btrfs_header_nritems(leaf))
+                                       p->slots[0]--;
+                               return 0;
+                       }
+                       if (!return_any)
+                               return 1;
                        /*
-                        * The path might be invalid if:
-                        *   cur leaf max < searched value < next leaf min
-                        *
-                        * Jump to the next valid element if it exists.
+                        * No lower item found, return the next higher instead
                         */
-                       if (slot >= buf->header.nritems)
-                               if (btrfs_next_slot(p) < 0)
-                                       goto err;
-                       break;
+                       return_any = 0;
+                       find_higher = 1;
+                       btrfs_release_path(p);
+                       goto again;
+               } else {
+                       --p->slots[0];
                }
        }
-
        return 0;
-err:
-       btrfs_free_path(p);
-       return -1;
 }
 
-static int jump_leaf(struct btrfs_path *path, int dir)
+/*
+ * how many bytes are required to store the items in a leaf.  start
+ * and nr indicate which items in the leaf to check.  This totals up the
+ * space used both by the item structs and the item data
+ */
+static int leaf_space_used(struct extent_buffer *l, int start, int nr)
 {
-       struct btrfs_path p;
-       u32 slot;
-       int level = 1, from_level, i;
-
-       dir = dir >= 0 ? 1 : -1;
+       int data_len;
+       int nritems = btrfs_header_nritems(l);
+       int end = min(nritems, start + nr) - 1;
+
+       if (!nr)
+               return 0;
+       data_len = btrfs_item_end_nr(l, start);
+       data_len = data_len - btrfs_item_offset_nr(l, end);
+       data_len += sizeof(struct btrfs_item) * nr;
+       WARN_ON(data_len < 0);
+       return data_len;
+}
 
-       p = *path;
+/*
+ * The space between the end of the leaf items and
+ * the start of the leaf data.  IOW, how much room
+ * the leaf has left for both items and data
+ */
+int btrfs_leaf_free_space(struct extent_buffer *leaf)
+{
+       int nritems = btrfs_header_nritems(leaf);
+       u32 leaf_data_size;
+       int ret;
+
+       BUG_ON(leaf->fs_info && leaf->fs_info->nodesize != leaf->len);
+       leaf_data_size = __BTRFS_LEAF_DATA_SIZE(leaf->len);
+       ret = leaf_data_size - leaf_space_used(leaf, 0 ,nritems);
+       if (ret < 0) {
+               printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n",
+                      ret, leaf_data_size, leaf_space_used(leaf, 0, nritems),
+                      nritems);
+       }
+       return ret;
+}
 
-       while (level < BTRFS_MAX_LEVEL) {
-               if (!p.nodes[level])
+/*
+ * walk up the tree as far as required to find the previous leaf.
+ * returns 0 if it found something or 1 if there are no lesser leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+       int slot;
+       int level = 1;
+       struct extent_buffer *c;
+       struct extent_buffer *next = NULL;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+
+       while(level < BTRFS_MAX_LEVEL) {
+               if (!path->nodes[level])
                        return 1;
 
-               slot = p.slots[level];
-               if ((dir > 0 && slot + dir >= p.nodes[level]->header.nritems)
-                   || (dir < 0 && !slot))
+               slot = path->slots[level];
+               c = path->nodes[level];
+               if (slot == 0) {
                        level++;
-               else
-                       break;
-       }
-
-       if (level == BTRFS_MAX_LEVEL)
-               return 1;
-
-       p.slots[level] = slot + dir;
-       level--;
-       from_level = level;
-
-       while (level >= 0) {
-               u64 logical, physical;
-
-               slot = p.slots[level + 1];
-               logical = p.nodes[level + 1]->node.ptrs[slot].blockptr;
-               physical = btrfs_map_logical_to_physical(logical);
-               if (physical == -1ULL)
-                       goto err;
-
-               if (read_tree_node(physical, &p.nodes[level]))
-                       goto err;
+                       if (level == BTRFS_MAX_LEVEL)
+                               return 1;
+                       continue;
+               }
+               slot--;
 
-               if (dir > 0)
-                       p.slots[level] = 0;
-               else
-                       p.slots[level] = p.nodes[level]->header.nritems - 1;
+               next = read_node_slot(fs_info, c, slot);
+               if (!extent_buffer_uptodate(next)) {
+                       if (IS_ERR(next))
+                               return PTR_ERR(next);
+                       return -EIO;
+               }
+               break;
+       }
+       path->slots[level] = slot;
+       while(1) {
                level--;
+               c = path->nodes[level];
+               free_extent_buffer(c);
+               slot = btrfs_header_nritems(next);
+               if (slot != 0)
+                       slot--;
+               path->nodes[level] = next;
+               path->slots[level] = slot;
+               if (!level)
+                       break;
+               next = read_node_slot(fs_info, next, slot);
+               if (!extent_buffer_uptodate(next)) {
+                       if (IS_ERR(next))
+                               return PTR_ERR(next);
+                       return -EIO;
+               }
        }
-
-       /* Free rewritten nodes in path */
-       for (i = 0; i <= from_level; ++i)
-               free(path->nodes[i]);
-
-       *path = p;
        return 0;
-
-err:
-       /* Free rewritten nodes in p */
-       for (i = level + 1; i <= from_level; ++i)
-               free(p.nodes[i]);
-       return -1;
 }
 
-int btrfs_prev_slot(struct btrfs_path *p)
+/*
+ * Walk up the tree as far as necessary to find the next sibling tree block.
+ * More generic version of btrfs_next_leaf(), as it could find sibling nodes
+ * if @path->lowest_level is not 0.
+ *
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+                                 struct btrfs_path *path)
 {
-       if (!p->slots[0])
-               return jump_leaf(p, -1);
+       int slot;
+       int level = path->lowest_level + 1;
+       struct extent_buffer *c;
+       struct extent_buffer *next = NULL;
+
+       BUG_ON(path->lowest_level + 1 >= BTRFS_MAX_LEVEL);
+       do {
+               if (!path->nodes[level])
+                       return 1;
 
-       p->slots[0]--;
+               slot = path->slots[level] + 1;
+               c = path->nodes[level];
+               if (slot >= btrfs_header_nritems(c)) {
+                       level++;
+                       if (level == BTRFS_MAX_LEVEL)
+                               return 1;
+                       continue;
+               }
+
+               next = read_node_slot(fs_info, c, slot);
+               if (!extent_buffer_uptodate(next))
+                       return -EIO;
+               break;
+       } while (level < BTRFS_MAX_LEVEL);
+       path->slots[level] = slot;
+       while(1) {
+               level--;
+               c = path->nodes[level];
+               free_extent_buffer(c);
+               path->nodes[level] = next;
+               path->slots[level] = 0;
+               if (level == path->lowest_level)
+                       break;
+               next = read_node_slot(fs_info, next, 0);
+               if (!extent_buffer_uptodate(next))
+                       return -EIO;
+       }
        return 0;
 }
 
-int btrfs_next_slot(struct btrfs_path *p)
+int btrfs_previous_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid,
+                       int type)
 {
-       struct btrfs_leaf *leaf = &p->nodes[0]->leaf;
-
-       if (p->slots[0] + 1 >= leaf->header.nritems)
-               return jump_leaf(p, 1);
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
+       u32 nritems;
+       int ret;
+
+       while(1) {
+               if (path->slots[0] == 0) {
+                       ret = btrfs_prev_leaf(root, path);
+                       if (ret != 0)
+                               return ret;
+               } else {
+                       path->slots[0]--;
+               }
+               leaf = path->nodes[0];
+               nritems = btrfs_header_nritems(leaf);
+               if (nritems == 0)
+                       return 1;
+               if (path->slots[0] == nritems)
+                       path->slots[0]--;
 
-       p->slots[0]++;
-       return 0;
+               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+               if (found_key.objectid < min_objectid)
+                       break;
+               if (found_key.type == type)
+                       return 0;
+               if (found_key.objectid == min_objectid &&
+                   found_key.type < type)
+                       break;
+       }
+       return 1;
 }
index 65c152a..219c410 100644 (file)
 
 #include <common.h>
 #include <compiler.h>
-#include "btrfs_tree.h"
-
-#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
+#include <linux/rbtree.h>
+#include <linux/bug.h>
+#include <linux/unaligned/le_byteshift.h>
+#include <u-boot/crc.h>
+#include "kernel-shared/btrfs_tree.h"
+#include "crypto/hash.h"
+#include "compat.h"
+#include "extent-io.h"
 
 #define BTRFS_MAX_MIRRORS 3
 
-#define BTRFS_MAX_LEVEL 8
-
-#define BTRFS_COMPAT_EXTENT_TREE_V0
-
 /*
  * the max metadata block size.  This limit is somewhat artificial,
  * but the memmove costs go through the roof for larger blocks.
 #define BTRFS_MAX_METADATA_BLOCKSIZE 65536
 
 /*
- * we can actually store much bigger names, but lets not confuse the rest
- * of linux
- */
-#define BTRFS_NAME_LEN 255
-
-/*
  * Theoretical limit is larger, but we keep this down to a sane
  * value. That should limit greatly the possibility of collisions on
  * inode ref items.
  */
 #define BTRFS_LINK_MAX 65535U
 
-static const int btrfs_csum_sizes[] = { 4 };
-
 /* four bytes for CRC32 */
 #define BTRFS_EMPTY_DIR_SIZE 0
 
+struct btrfs_mapping_tree {
+       struct cache_tree cache_tree;
+};
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+       BUG_ON(num_stripes == 0);
+       return sizeof(struct btrfs_chunk) +
+               sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
+#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
+#define BTRFS_LEAF_DATA_SIZE(fs_info) \
+                               (__BTRFS_LEAF_DATA_SIZE(fs_info->nodesize))
+
+struct btrfs_path {
+       struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
+       int slots[BTRFS_MAX_LEVEL];
+
+       /* keep some upper locks as we walk down */
+       u8 lowest_level;
+};
+
 /* ioprio of readahead is set to idle */
 #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
 
@@ -52,6 +68,92 @@ static const int btrfs_csum_sizes[] = { 4 };
 
 #define BTRFS_MAX_EXTENT_SIZE SZ_128M
 
+enum btrfs_tree_block_status {
+       BTRFS_TREE_BLOCK_CLEAN,
+       BTRFS_TREE_BLOCK_INVALID_NRITEMS,
+       BTRFS_TREE_BLOCK_INVALID_PARENT_KEY,
+       BTRFS_TREE_BLOCK_BAD_KEY_ORDER,
+       BTRFS_TREE_BLOCK_INVALID_LEVEL,
+       BTRFS_TREE_BLOCK_INVALID_FREE_SPACE,
+       BTRFS_TREE_BLOCK_INVALID_OFFSETS,
+};
+
+struct btrfs_root {
+       struct extent_buffer *node;
+       struct btrfs_root_item root_item;
+       struct btrfs_key root_key;
+       struct btrfs_fs_info *fs_info;
+       u64 objectid;
+       u64 last_trans;
+
+       int ref_cows;
+       int track_dirty;
+
+       u32 type;
+       u64 last_inode_alloc;
+
+       struct rb_node rb_node;
+};
+
+struct btrfs_trans_handle;
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info {
+       u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+       u8 *new_chunk_tree_uuid;
+       struct btrfs_root *fs_root;
+       struct btrfs_root *tree_root;
+       struct btrfs_root *chunk_root;
+       struct btrfs_root *csum_root;
+
+       struct rb_root fs_root_tree;
+
+       struct extent_io_tree extent_cache;
+
+       /* logical->physical extent mapping */
+       struct btrfs_mapping_tree mapping_tree;
+
+       u64 last_trans_committed;
+
+       struct btrfs_super_block *super_copy;
+
+       struct btrfs_fs_devices *fs_devices;
+
+       /* Cached block sizes */
+       u32 nodesize;
+       u32 sectorsize;
+       u32 stripesize;
+};
+
+static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
+{
+       return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info)
+{
+       return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_EXTENT_BUFFER(const struct extent_buffer *eb)
+{
+       BUG_ON(eb->fs_info && eb->fs_info->nodesize != eb->len);
+       return __BTRFS_LEAF_DATA_SIZE(eb->len) / sizeof(struct btrfs_key_ptr);
+}
+
+#define BTRFS_FILE_EXTENT_INLINE_DATA_START            \
+       (offsetof(struct btrfs_file_extent_item, disk_bytenr))
+static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info)
+{
+       return BTRFS_MAX_ITEM_SIZE(info) -
+               BTRFS_FILE_EXTENT_INLINE_DATA_START;
+}
+
+static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
+{
+       return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
+}
+
 /*
  * File system states
  */
@@ -61,273 +163,1136 @@ static const int btrfs_csum_sizes[] = { 4 };
 #define BTRFS_FS_STATE_DEV_REPLACING   3
 #define BTRFS_FS_STATE_DUMMY_FS_INFO   4
 
-#define BTRFS_BACKREF_REV_MAX          256
-#define BTRFS_BACKREF_REV_SHIFT                56
-#define BTRFS_BACKREF_REV_MASK         (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
-                                        BTRFS_BACKREF_REV_SHIFT)
+#define read_eb_member(eb, ptr, type, member, result) (                        \
+       read_extent_buffer(eb, (char *)(result),                        \
+                          ((unsigned long)(ptr)) +                     \
+                           offsetof(type, member),                     \
+                          sizeof(((type *)0)->member)))
 
-#define BTRFS_OLD_BACKREF_REV          0
-#define BTRFS_MIXED_BACKREF_REV                1
+#define write_eb_member(eb, ptr, type, member, result) (               \
+       write_extent_buffer(eb, (char *)(result),                       \
+                          ((unsigned long)(ptr)) +                     \
+                           offsetof(type, member),                     \
+                          sizeof(((type *)0)->member)))
 
-/*
- * every tree block (leaf or node) starts with this header.
- */
-struct btrfs_header {
-       /* these first four must match the super block */
-       __u8 csum[BTRFS_CSUM_SIZE];
-       __u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
-       __u64 bytenr; /* which block this node is supposed to live in */
-       __u64 flags;
-
-       /* allowed to be different from the super from here on down */
-       __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
-       __u64 generation;
-       __u64 owner;
-       __u32 nritems;
-       __u8 level;
-} __attribute__ ((__packed__));
+#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)            \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb)     \
+{                                                                      \
+       const struct btrfs_header *h = (struct btrfs_header *)eb->data; \
+       return le##bits##_to_cpu(h->member);                            \
+}                                                                      \
+static inline void btrfs_set_##name(struct extent_buffer *eb,          \
+                                   u##bits val)                        \
+{                                                                      \
+       struct btrfs_header *h = (struct btrfs_header *)eb->data;       \
+       h->member = cpu_to_le##bits(val);                               \
+}
 
-/*
- * this is a very generous portion of the super block, giving us
- * room to translate 14 chunks with 3 stripes each.
- */
-#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+#define BTRFS_SETGET_FUNCS(name, type, member, bits)                   \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb,     \
+                                  const type *s)                       \
+{                                                                      \
+       unsigned long offset = (unsigned long)s;                        \
+       const type *p = (type *) (eb->data + offset);                   \
+       return get_unaligned_le##bits(&p->member);                      \
+}                                                                      \
+static inline void btrfs_set_##name(struct extent_buffer *eb,          \
+                                   type *s, u##bits val)               \
+{                                                                      \
+       unsigned long offset = (unsigned long)s;                        \
+       type *p = (type *) (eb->data + offset);                         \
+       put_unaligned_le##bits(val, &p->member);                        \
+}
 
-/*
- * just in case we somehow lose the roots and are not able to mount,
- * we store an array of the roots from previous transactions
- * in the super.
- */
-#define BTRFS_NUM_BACKUP_ROOTS 4
-struct btrfs_root_backup {
-       __u64 tree_root;
-       __u64 tree_root_gen;
-
-       __u64 chunk_root;
-       __u64 chunk_root_gen;
-
-       __u64 extent_root;
-       __u64 extent_root_gen;
-
-       __u64 fs_root;
-       __u64 fs_root_gen;
-
-       __u64 dev_root;
-       __u64 dev_root_gen;
-
-       __u64 csum_root;
-       __u64 csum_root_gen;
-
-       __u64 total_bytes;
-       __u64 bytes_used;
-       __u64 num_devices;
-       /* future */
-       __u64 unused_64[4];
-
-       __u8 tree_root_level;
-       __u8 chunk_root_level;
-       __u8 extent_root_level;
-       __u8 fs_root_level;
-       __u8 dev_root_level;
-       __u8 csum_root_level;
-       /* future and to align */
-       __u8 unused_8[10];
-} __attribute__ ((__packed__));
+#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits)             \
+static inline u##bits btrfs_##name(const type *s)                      \
+{                                                                      \
+       return le##bits##_to_cpu(s->member);                            \
+}                                                                      \
+static inline void btrfs_set_##name(type *s, u##bits val)              \
+{                                                                      \
+       s->member = cpu_to_le##bits(val);                               \
+}
 
-/*
- * the super block basically lists the main trees of the FS
- * it currently lacks any block count etc etc
- */
-struct btrfs_super_block {
-       __u8 csum[BTRFS_CSUM_SIZE];
-       /* the first 4 fields must match struct btrfs_header */
-       __u8 fsid[BTRFS_FSID_SIZE];    /* FS specific uuid */
-       __u64 bytenr; /* this block number */
-       __u64 flags;
-
-       /* allowed to be different from the btrfs_header from here own down */
-       __u64 magic;
-       __u64 generation;
-       __u64 root;
-       __u64 chunk_root;
-       __u64 log_root;
-
-       /* this will help find the new super based on the log root */
-       __u64 log_root_transid;
-       __u64 total_bytes;
-       __u64 bytes_used;
-       __u64 root_dir_objectid;
-       __u64 num_devices;
-       __u32 sectorsize;
-       __u32 nodesize;
-       __u32 __unused_leafsize;
-       __u32 stripesize;
-       __u32 sys_chunk_array_size;
-       __u64 chunk_root_generation;
-       __u64 compat_flags;
-       __u64 compat_ro_flags;
-       __u64 incompat_flags;
-       __u16 csum_type;
-       __u8 root_level;
-       __u8 chunk_root_level;
-       __u8 log_root_level;
-       struct btrfs_dev_item dev_item;
-
-       char label[BTRFS_LABEL_SIZE];
-
-       __u64 cache_generation;
-       __u64 uuid_tree_generation;
-
-       /* future expansion */
-       __u64 reserved[30];
-       __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
-       struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
-} __attribute__ ((__packed__));
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item,
+                  start_offset, 64);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
+BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
+BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
+BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64);
 
-/*
- * Compat flags that we support.  If any incompat flags are set other than the
- * ones specified below then we will fail to mount
- */
-#define BTRFS_FEATURE_COMPAT_SUPP              0ULL
-#define BTRFS_FEATURE_COMPAT_SAFE_SET          0ULL
-#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR                0ULL
-
-#define BTRFS_FEATURE_COMPAT_RO_SUPP                   \
-       (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |      \
-        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
-
-#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET       0ULL
-#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR     0ULL
-
-#define BTRFS_FEATURE_INCOMPAT_SUPP                    \
-       (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
-        BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
-        BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |          \
-        BTRFS_FEATURE_INCOMPAT_BIG_METADATA |          \
-        BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |          \
-        BTRFS_FEATURE_INCOMPAT_RAID56 |                \
-        BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |         \
-        BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |       \
-        BTRFS_FEATURE_INCOMPAT_NO_HOLES)
-
-#define BTRFS_FEATURE_INCOMPAT_SAFE_SET                        \
-       (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
-#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR              0ULL
+BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
+                        total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
+                        bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
+                        io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
+                        io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
+                        sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
+                        dev_group, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
+                        seek_speed, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
+                        bandwidth, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
+                        generation, 64);
 
-/*
- * A leaf is full of items. offset and size tell us where to find
- * the item in the leaf (relative to the start of the data area)
- */
-struct btrfs_item {
-       struct btrfs_key key;
-       __u32 offset;
-       __u32 size;
-} __attribute__ ((__packed__));
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+       return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
 
-/*
- * leaves have an item area and a data area:
- * [item0, item1....itemN] [free space] [dataN...data1, data0]
- *
- * The data is separate from the items to get the keys closer together
- * during searches.
- */
-struct btrfs_leaf {
-       struct btrfs_header header;
-       struct btrfs_item items[];
-} __attribute__ ((__packed__));
+static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
+{
+       return (char *)d + offsetof(struct btrfs_dev_item, fsid);
+}
+
+BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
+{
+       return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
+}
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+                        stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+                        io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+                        io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+                        sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+                        num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
+                        sub_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+                                                  int nr)
+{
+       unsigned long offset = (unsigned long)c;
+       offset += offsetof(struct btrfs_chunk, stripe);
+       offset += nr * sizeof(struct btrfs_stripe);
+       return (struct btrfs_stripe *)offset;
+}
+
+static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+                                        struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+                                            struct btrfs_chunk *c, int nr,
+                                            u64 val)
+{
+       btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+                                        struct btrfs_chunk *c, int nr)
+{
+       return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+                                            struct btrfs_chunk *c, int nr,
+                                            u64 val)
+{
+       btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+/* struct btrfs_block_group_item */
+BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
+                        used, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
+                        used, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+                       struct btrfs_block_group_item, chunk_objectid, 64);
+
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
+                  struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+                  struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+                       struct btrfs_block_group_item, flags, 64);
+
+/* struct btrfs_free_space_info */
+BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+                  extent_count, 32);
+BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+
+/* struct btrfs_inode_ref */
+BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
+
+/* struct btrfs_inode_extref */
+BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref,
+                  parent_objectid, 64);
+BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref,
+                  name_len, 16);
+BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64);
+
+/* struct btrfs_inode_item */
+BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_inode_generation,
+                        struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence,
+                        struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_transid,
+                        struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_size,
+                        struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes,
+                        struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group,
+                        struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink,
+                        struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_uid,
+                        struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_gid,
+                        struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_mode,
+                        struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev,
+                        struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_flags,
+                        struct btrfs_inode_item, flags, 64);
+
+static inline struct btrfs_timespec *
+btrfs_inode_atime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, atime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, mtime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, ctime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_otime(struct btrfs_inode_item *inode_item)
+{
+       unsigned long ptr = (unsigned long)inode_item;
+       ptr += offsetof(struct btrfs_inode_item, otime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec,
+                        sec, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec,
+                        nsec, 32);
+
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
+                  chunk_tree, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
+                  chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
+                  chunk_offset, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_length, struct btrfs_dev_extent,
+                        length, 64);
+
+static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+{
+       unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
+       return (u8 *)((unsigned long)dev + ptr);
+}
+
+
+/* struct btrfs_extent_item */
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
+                  generation, 64);
+BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_flags, struct btrfs_extent_item, flags, 64);
+
+BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
+
+BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+
+static inline void btrfs_tree_block_key(struct extent_buffer *eb,
+                                       struct btrfs_tree_block_info *item,
+                                       struct btrfs_disk_key *key)
+{
+       read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
+                                           struct btrfs_tree_block_info *item,
+                                           struct btrfs_disk_key *key)
+{
+       write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
+                  root, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
+                  objectid, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
+                  offset, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
+                  count, 32);
+
+BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
+                  count, 32);
+
+BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
+                  type, 8);
+BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
+                  offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_type,
+                        struct btrfs_extent_inline_ref, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_offset,
+                        struct btrfs_extent_inline_ref, offset, 64);
+
+static inline u32 btrfs_extent_inline_ref_size(int type)
+{
+       if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+           type == BTRFS_SHARED_BLOCK_REF_KEY)
+               return sizeof(struct btrfs_extent_inline_ref);
+       if (type == BTRFS_SHARED_DATA_REF_KEY)
+               return sizeof(struct btrfs_shared_data_ref) +
+                      sizeof(struct btrfs_extent_inline_ref);
+       if (type == BTRFS_EXTENT_DATA_REF_KEY)
+               return sizeof(struct btrfs_extent_data_ref) +
+                      offsetof(struct btrfs_extent_inline_ref, offset);
+       BUG();
+       return 0;
+}
+
+BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
+                  generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
+
+/* struct btrfs_node */
+BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
+BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+
+static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
+                                          int nr, u64 val)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
+                                                int nr, u64 val)
+{
+       unsigned long ptr;
+       ptr = offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+       btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline unsigned long btrfs_node_key_ptr_offset(int nr)
+{
+       return offsetof(struct btrfs_node, ptrs) +
+               sizeof(struct btrfs_key_ptr) * nr;
+}
+
+static inline void btrfs_node_key(struct extent_buffer *eb,
+                                 struct btrfs_disk_key *disk_key, int nr)
+{
+       unsigned long ptr;
+       ptr = btrfs_node_key_ptr_offset(nr);
+       read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+                      struct btrfs_key_ptr, key, disk_key);
+}
+
+static inline void btrfs_set_node_key(struct extent_buffer *eb,
+                                     struct btrfs_disk_key *disk_key, int nr)
+{
+       unsigned long ptr;
+       ptr = btrfs_node_key_ptr_offset(nr);
+       write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+                      struct btrfs_key_ptr, key, disk_key);
+}
+
+/* struct btrfs_item */
+BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+
+static inline unsigned long btrfs_item_nr_offset(int nr)
+{
+       return offsetof(struct btrfs_leaf, items) +
+               sizeof(struct btrfs_item) * nr;
+}
+
+static inline struct btrfs_item *btrfs_item_nr(int nr)
+{
+       return (struct btrfs_item *)btrfs_item_nr_offset(nr);
+}
+
+static inline u32 btrfs_item_end(struct extent_buffer *eb,
+                                struct btrfs_item *item)
+{
+       return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
+}
+
+static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+{
+       return btrfs_item_end(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr)
+{
+       return btrfs_item_offset(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+{
+       return btrfs_item_size(eb, btrfs_item_nr(nr));
+}
+
+static inline void btrfs_item_key(struct extent_buffer *eb,
+                          struct btrfs_disk_key *disk_key, int nr)
+{
+       struct btrfs_item *item = btrfs_item_nr(nr);
+       read_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+static inline void btrfs_set_item_key(struct extent_buffer *eb,
+                              struct btrfs_disk_key *disk_key, int nr)
+{
+       struct btrfs_item *item = btrfs_item_nr(nr);
+       write_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
 
 /*
- * all non-leaf blocks are nodes, they hold only keys and pointers to
- * other blocks
+ * struct btrfs_root_ref
  */
-struct btrfs_key_ptr {
-       struct btrfs_key key;
-       __u64 blockptr;
-       __u64 generation;
-} __attribute__ ((__packed__));
+BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16);
 
-struct btrfs_node {
-       struct btrfs_header header;
-       struct btrfs_key_ptr ptrs[];
-} __attribute__ ((__packed__));
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16);
 
-union btrfs_tree_node {
-       struct btrfs_header header;
-       struct btrfs_leaf leaf;
-       struct btrfs_node node;
-};
+/* struct btrfs_dir_item */
+BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
 
-typedef __u8 u8;
-typedef __u16 u16;
-typedef __u32 u32;
-typedef __u64 u64;
+BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64);
 
-struct btrfs_path {
-       union btrfs_tree_node *nodes[BTRFS_MAX_LEVEL];
-       u32 slots[BTRFS_MAX_LEVEL];
-};
+static inline void btrfs_dir_item_key(struct extent_buffer *eb,
+                                     struct btrfs_dir_item *item,
+                                     struct btrfs_disk_key *key)
+{
+       read_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
 
-struct btrfs_root {
-       u64 objectid;
-       u64 bytenr;
-       u64 root_dirid;
-};
+static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
+                                         struct btrfs_dir_item *item,
+                                         struct btrfs_disk_key *key)
+{
+       write_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+/* struct btrfs_free_space_header */
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+                  num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+                  num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+                  generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+                                       struct btrfs_free_space_header *h,
+                                       struct btrfs_disk_key *key)
+{
+       read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+                                           struct btrfs_free_space_header *h,
+                                           struct btrfs_disk_key *key)
+{
+       write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+/* struct btrfs_disk_key */
+BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
+                        objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
+
+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
+                                        struct btrfs_disk_key *disk)
+{
+       cpu->offset = le64_to_cpu(disk->offset);
+       cpu->type = disk->type;
+       cpu->objectid = le64_to_cpu(disk->objectid);
+}
+
+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
+                                        const struct btrfs_key *cpu)
+{
+       disk->offset = cpu_to_le64(cpu->offset);
+       disk->type = cpu->type;
+       disk->objectid = cpu_to_le64(cpu->objectid);
+}
+
+static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
+                                 struct btrfs_key *key, int nr)
+{
+       struct btrfs_disk_key disk_key;
+       btrfs_node_key(eb, &disk_key, nr);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
+                                 struct btrfs_key *key, int nr)
+{
+       struct btrfs_disk_key disk_key;
+       btrfs_item_key(eb, &disk_key, nr);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
+                                     struct btrfs_dir_item *item,
+                                     struct btrfs_key *key)
+{
+       struct btrfs_disk_key disk_key;
+       btrfs_dir_item_key(eb, item, &disk_key);
+       btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+/* struct btrfs_header */
+BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
+                         generation, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
+BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems,
+                        32);
+BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
+                        generation, 64);
+
+static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+{
+       return (btrfs_header_flags(eb) & flag) == flag;
+}
+
+static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+{
+       u64 flags = btrfs_header_flags(eb);
+       btrfs_set_header_flags(eb, flags | flag);
+       return (flags & flag) == flag;
+}
+
+static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+{
+       u64 flags = btrfs_header_flags(eb);
+       btrfs_set_header_flags(eb, flags & ~flag);
+       return (flags & flag) == flag;
+}
+
+static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+{
+       u64 flags = btrfs_header_flags(eb);
+       return flags >> BTRFS_BACKREF_REV_SHIFT;
+}
+
+static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
+                                               int rev)
+{
+       u64 flags = btrfs_header_flags(eb);
+       flags &= ~BTRFS_BACKREF_REV_MASK;
+       flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
+       btrfs_set_header_flags(eb, flags);
+}
+
+static inline unsigned long btrfs_header_fsid(void)
+{
+       return offsetof(struct btrfs_header, fsid);
+}
+
+static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+{
+       return offsetof(struct btrfs_header, chunk_tree_uuid);
+}
 
-int btrfs_comp_keys(struct btrfs_key *, struct btrfs_key *);
-int btrfs_comp_keys_type(struct btrfs_key *, struct btrfs_key *);
-int btrfs_bin_search(union btrfs_tree_node *, struct btrfs_key *, int *);
-void btrfs_free_path(struct btrfs_path *);
-int btrfs_search_tree(const struct btrfs_root *, struct btrfs_key *,
-                     struct btrfs_path *);
-int btrfs_prev_slot(struct btrfs_path *);
-int btrfs_next_slot(struct btrfs_path *);
+static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
+{
+       unsigned long ptr = offsetof(struct btrfs_header, csum);
+       return (u8 *)ptr;
+}
+
+static inline int btrfs_is_leaf(struct extent_buffer *eb)
+{
+       return (btrfs_header_level(eb) == 0);
+}
+
+/* struct btrfs_root_item */
+BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item,
+                  generation, 64);
+BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
+                        generation, 64);
+BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
+BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
+BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
+                        last_snapshot, 64);
+BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item,
+                        generation_v2, 64);
+BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item,
+                        ctransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item,
+                        otransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
+                        stransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
+                        rtransid, 64);
+
+static inline struct btrfs_timespec* btrfs_root_ctime(
+               struct btrfs_root_item *root_item)
+{
+       unsigned long ptr = (unsigned long)root_item;
+       ptr += offsetof(struct btrfs_root_item, ctime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_otime(
+               struct btrfs_root_item *root_item)
+{
+       unsigned long ptr = (unsigned long)root_item;
+       ptr += offsetof(struct btrfs_root_item, otime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_stime(
+               struct btrfs_root_item *root_item)
+{
+       unsigned long ptr = (unsigned long)root_item;
+       ptr += offsetof(struct btrfs_root_item, stime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_rtime(
+               struct btrfs_root_item *root_item)
+{
+       unsigned long ptr = (unsigned long)root_item;
+       ptr += offsetof(struct btrfs_root_item, rtime);
+       return (struct btrfs_timespec *)ptr;
+}
+
+/* struct btrfs_root_backup */
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
+                  tree_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup,
+                  tree_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup,
+                  tree_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup,
+                  chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup,
+                  chunk_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup,
+                  chunk_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup,
+                  extent_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup,
+                  extent_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup,
+                  extent_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup,
+                  fs_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup,
+                  fs_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup,
+                  fs_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup,
+                  dev_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup,
+                  dev_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup,
+                  dev_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup,
+                  csum_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup,
+                  csum_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup,
+                  csum_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup,
+                  total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
+                  bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
+                  num_devices, 64);
+
+/* struct btrfs_super_block */
+
+BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
+                        generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+                        struct btrfs_super_block, sys_chunk_array_size, 32);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation,
+                        struct btrfs_super_block, chunk_root_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
+                        root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+                        chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+                        chunk_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
+                        log_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
+                        log_root_transid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
+                        log_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
+                        total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
+                        bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
+                        sectorsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
+                        nodesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
+                        stripesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
+                        root_dir_objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
+                        num_devices, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
+                        compat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
+                        compat_ro_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
+                        incompat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
+                        csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+                        cache_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+                        uuid_tree_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
+
+static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
+{
+       return offsetof(struct btrfs_leaf, items);
+}
+
+/* struct btrfs_file_extent_item */
+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8);
+
+static inline unsigned long btrfs_file_extent_inline_start(struct
+                                                  btrfs_file_extent_item *e)
+{
+       unsigned long offset = (unsigned long)e;
+       offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
+       return offset;
+}
+
+static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
+{
+       return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
+}
+
+BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
+                  disk_bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item,
+                  disk_bytenr, 64);
+BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
+                  generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item,
+                  generation, 64);
+BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
+                  disk_num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
+                 offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item,
+                 offset, 64);
+BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
+                  num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item,
+                  num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
+                  ram_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item,
+                  ram_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
+                  compression, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item,
+                  compression, 8);
+BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
+                  encryption, 8);
+BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
+                  other_encoding, 16);
+
+/* btrfs_qgroup_status_item */
+BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
+                  version, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
+                  generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
+                  flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+                  rescan, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_version,
+                        struct btrfs_qgroup_status_item, version, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_generation,
+                        struct btrfs_qgroup_status_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_flags,
+                        struct btrfs_qgroup_status_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_rescan,
+                        struct btrfs_qgroup_status_item, rescan, 64);
+
+/* btrfs_qgroup_info_item */
+BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
+                  generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced, struct btrfs_qgroup_info_item,
+                  rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced_compressed,
+                  struct btrfs_qgroup_info_item, rfer_cmpr, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive, struct btrfs_qgroup_info_item,
+                  excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive_compressed,
+                  struct btrfs_qgroup_info_item, excl_cmpr, 64);
 
-static inline struct btrfs_key *btrfs_path_leaf_key(struct btrfs_path *p) {
-       return &p->nodes[0]->leaf.items[p->slots[0]].key;
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
+                        struct btrfs_qgroup_info_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced,
+                        struct btrfs_qgroup_info_item, rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced_compressed,
+                  struct btrfs_qgroup_info_item, rfer_cmpr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive,
+                        struct btrfs_qgroup_info_item, excl, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive_compressed,
+                  struct btrfs_qgroup_info_item, excl_cmpr, 64);
+
+/* btrfs_qgroup_limit_item */
+BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
+                  flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_referenced, struct btrfs_qgroup_limit_item,
+                  max_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_exclusive, struct btrfs_qgroup_limit_item,
+                  max_excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_referenced, struct btrfs_qgroup_limit_item,
+                  rsv_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_exclusive, struct btrfs_qgroup_limit_item,
+                  rsv_excl, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_flags,
+                        struct btrfs_qgroup_limit_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_referenced,
+                        struct btrfs_qgroup_limit_item, max_rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_exclusive,
+                        struct btrfs_qgroup_limit_item, max_excl, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_referenced,
+                        struct btrfs_qgroup_limit_item, rsv_rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_exclusive,
+                        struct btrfs_qgroup_limit_item, rsv_excl, 64);
+
+/* btrfs_balance_item */
+BTRFS_SETGET_FUNCS(balance_item_flags, struct btrfs_balance_item, flags, 64);
+
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_data(
+               struct extent_buffer *eb, struct btrfs_balance_item *bi)
+{
+       unsigned long offset = (unsigned long)bi;
+       struct btrfs_balance_item *p;
+       p = (struct btrfs_balance_item *)(eb->data + offset);
+       return &p->data;
 }
 
-static inline struct btrfs_key *
-btrfs_search_tree_key_type(const struct btrfs_root *root, u64 objectid,
-                          u8 type, struct btrfs_path *path)
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_meta(
+               struct extent_buffer *eb, struct btrfs_balance_item *bi)
 {
-       struct btrfs_key key, *res;
+       unsigned long offset = (unsigned long)bi;
+       struct btrfs_balance_item *p;
+       p = (struct btrfs_balance_item *)(eb->data + offset);
+       return &p->meta;
+}
 
-       key.objectid = objectid;
-       key.type = type;
-       key.offset = 0;
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_sys(
+               struct extent_buffer *eb, struct btrfs_balance_item *bi)
+{
+       unsigned long offset = (unsigned long)bi;
+       struct btrfs_balance_item *p;
+       p = (struct btrfs_balance_item *)(eb->data + offset);
+       return &p->sys;
+}
 
-       if (btrfs_search_tree(root, &key, path))
-               return NULL;
+static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
+                                       const struct btrfs_dev_stats_item *ptr,
+                                       int index)
+{
+       u64 val;
 
-       res = btrfs_path_leaf_key(path);
-       if (btrfs_comp_keys_type(&key, res)) {
-               btrfs_free_path(path);
-               return NULL;
-       }
+       read_extent_buffer(eb, &val,
+                          offsetof(struct btrfs_dev_stats_item, values) +
+                           ((unsigned long)ptr) + (index * sizeof(u64)),
+                          sizeof(val));
+       return val;
+}
 
-       return res;
+/*
+ * this returns the number of bytes used by the item on disk, minus the
+ * size of any extent headers.  If a file is compressed on disk, this is
+ * the compressed size
+ */
+static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
+                                                   struct btrfs_item *e)
+{
+       unsigned long offset;
+       offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
+       return btrfs_item_size(eb, e) - offset;
 }
 
-static inline u32 btrfs_path_item_size(struct btrfs_path *p)
+#define btrfs_fs_incompat(fs_info, opt) \
+       __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
 {
-       return p->nodes[0]->leaf.items[p->slots[0]].size;
+       struct btrfs_super_block *disk_super;
+       disk_super = fs_info->super_copy;
+       return !!(btrfs_super_incompat_flags(disk_super) & flag);
 }
 
-static inline void *btrfs_leaf_data(struct btrfs_leaf *leaf, u32 slot)
+#define btrfs_fs_compat_ro(fs_info, opt) \
+       __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
 {
-       return ((u8 *) leaf) + sizeof(struct btrfs_header)
-              + leaf->items[slot].offset;
+       struct btrfs_super_block *disk_super;
+       disk_super = fs_info->super_copy;
+       return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
 }
 
-static inline void *btrfs_path_leaf_data(struct btrfs_path *p)
+/* helper function to cast into the data area of the leaf. */
+#define btrfs_item_ptr(leaf, slot, type) \
+       ((type *)(btrfs_leaf_data(leaf) + \
+       btrfs_item_offset_nr(leaf, slot)))
+
+#define btrfs_item_ptr_offset(leaf, slot) \
+       ((unsigned long)(btrfs_leaf_data(leaf) + \
+       btrfs_item_offset_nr(leaf, slot)))
+
+static inline u64 btrfs_name_hash(const char *name, int len)
 {
-       return btrfs_leaf_data(&p->nodes[0]->leaf, p->slots[0]);
+       return (u64)crc32c((u32)~1, (u8 *)name, len);
 }
 
-#define btrfs_item_ptr(l,s,t)                  \
-       ((t *) btrfs_leaf_data((l),(s)))
+/*
+ * Figure the key offset of an extended inode ref
+ */
+static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
+                                   int len)
+{
+       return crc32(parent_objectid, (u8 *)name, len);
+}
+
+union btrfs_tree_node {
+       struct btrfs_header header;
+       struct btrfs_leaf leaf;
+       struct btrfs_node node;
+};
 
 #define btrfs_path_item_ptr(p,t)               \
        ((t *) btrfs_path_leaf_data((p)))
 
+u16 btrfs_super_csum_size(const struct btrfs_super_block *s);
+const char *btrfs_super_csum_name(u16 csum_type);
+u16 btrfs_csum_type_size(u16 csum_type);
+size_t btrfs_super_num_csums(void);
+
+/* root-tree.c */
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+                       struct btrfs_root_item *item, struct btrfs_key *key);
+
+/* dir-item.c */
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+                                            const char *name, int name_len,
+                                            int mod);
+typedef int (*btrfs_iter_dir_callback_t)(struct btrfs_root *root,
+                                        struct extent_buffer *eb,
+                                        struct btrfs_dir_item *di);
+int btrfs_iter_dir(struct btrfs_root *root, u64 ino,
+                  btrfs_iter_dir_callback_t callback);
+/* inode.c */
+int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
+                       struct btrfs_root **root_ret, u64 *ino_ret,
+                       u8 *type_ret, int symlink_limit);
+int btrfs_read_extent_inline(struct btrfs_path *path,
+                            struct btrfs_file_extent_item *fi, char *dest);
+int btrfs_read_extent_reg(struct btrfs_path *path,
+                         struct btrfs_file_extent_item *fi, u64 offset,
+                         int len, char *dest);
+
+/* ctree.c */
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_fs_info *fs_info,
+                struct btrfs_disk_key *parent_key, struct extent_buffer *buf);
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_fs_info *fs_info,
+                struct btrfs_disk_key *parent_key, struct extent_buffer *buf);
+struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info,
+                                  struct extent_buffer *parent, int slot);
+int btrfs_previous_item(struct btrfs_root *root,
+                       struct btrfs_path *path, u64 min_objectid,
+                       int type);
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+                                 struct btrfs_path *path);
+/*
+ * Walk up the tree as far as necessary to find the next leaf.
+ *
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+static inline int btrfs_next_leaf(struct btrfs_root *root,
+                                 struct btrfs_path *path)
+{
+       path->lowest_level = 0;
+       return btrfs_next_sibling_tree_block(root->fs_info, path);
+}
+
+static inline int btrfs_next_item(struct btrfs_root *root,
+                                 struct btrfs_path *p)
+{
+       ++p->slots[0];
+       if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
+               return btrfs_next_leaf(root, p);
+       return 0;
+}
+
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_leaf_free_space(struct extent_buffer *leaf);
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+               struct btrfs_root *root, const struct btrfs_key *key,
+               struct btrfs_path *p, int ins_len, int cow);
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+                              const struct btrfs_key *key,
+                              struct btrfs_path *p, int find_higher,
+                              int return_any);
+void btrfs_release_path(struct btrfs_path *p);
+struct btrfs_path *btrfs_alloc_path(void);
+void btrfs_free_path(struct btrfs_path *p);
+static inline void btrfs_init_path(struct btrfs_path *p)
+{
+       memset(p, 0, sizeof(*p));
+}
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
+                    int *slot);
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+               u64 iobjectid, u64 ioff, u8 key_type,
+               struct btrfs_key *found_key);
 #endif /* __BTRFS_CTREE_H__ */
index 63b5bf0..aab197a 100644 (file)
  */
 
 #include "btrfs.h"
+#include "disk-io.h"
 
-static int verify_dir_item(struct btrfs_dir_item *item, u32 start, u32 total)
+static int verify_dir_item(struct btrfs_root *root,
+                   struct extent_buffer *leaf,
+                   struct btrfs_dir_item *dir_item)
 {
-       u16 max_len = BTRFS_NAME_LEN;
-       u32 end;
+       u16 namelen = BTRFS_NAME_LEN;
+       u8 type = btrfs_dir_type(leaf, dir_item);
 
-       if (item->type >= BTRFS_FT_MAX) {
-               printf("%s: invalid dir item type: %i\n", __func__, item->type);
+       if (type == BTRFS_FT_XATTR)
+               namelen = XATTR_NAME_MAX;
+
+       if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
+               fprintf(stderr, "invalid dir item name len: %u\n",
+                       (unsigned)btrfs_dir_data_len(leaf, dir_item));
                return 1;
        }
 
-       if (item->type == BTRFS_FT_XATTR)
-               max_len = 255; /* XATTR_NAME_MAX */
-
-       end = start + sizeof(*item) + item->name_len;
-       if (item->name_len > max_len || end > total) {
-               printf("%s: invalid dir item name len: %u\n", __func__,
-                      item->name_len);
+       /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
+       if ((btrfs_dir_data_len(leaf, dir_item) +
+            btrfs_dir_name_len(leaf, dir_item)) >
+                       BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
+               fprintf(stderr, "invalid dir item name + data len: %u + %u\n",
+                       (unsigned)btrfs_dir_name_len(leaf, dir_item),
+                       (unsigned)btrfs_dir_data_len(leaf, dir_item));
                return 1;
        }
 
        return 0;
 }
 
-static struct btrfs_dir_item *
-btrfs_match_dir_item_name(struct btrfs_path *path, const char *name,
-                         int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+                             struct btrfs_path *path,
+                             const char *name, int name_len)
 {
-       struct btrfs_dir_item *item;
-       u32 total_len, cur = 0, this_len;
-       const char *name_ptr;
-
-       item = btrfs_path_item_ptr(path, struct btrfs_dir_item);
-
-       total_len = btrfs_path_item_size(path);
+       struct btrfs_dir_item *dir_item;
+       unsigned long name_ptr;
+       u32 total_len;
+       u32 cur = 0;
+       u32 this_len;
+       struct extent_buffer *leaf;
+
+       leaf = path->nodes[0];
+       dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+       total_len = btrfs_item_size_nr(leaf, path->slots[0]);
+       if (verify_dir_item(root, leaf, dir_item))
+               return NULL;
+
+       while(cur < total_len) {
+               this_len = sizeof(*dir_item) +
+                       btrfs_dir_name_len(leaf, dir_item) +
+                       btrfs_dir_data_len(leaf, dir_item);
+               if (this_len > (total_len - cur)) {
+                       fprintf(stderr, "invalid dir item size\n");
+                       return NULL;
+               }
 
-       while (cur < total_len) {
-               btrfs_dir_item_to_cpu(item);
-               this_len = sizeof(*item) + item->name_len + item->data_len;
-               name_ptr = (const char *) (item + 1);
+               name_ptr = (unsigned long)(dir_item + 1);
 
-               if (verify_dir_item(item, cur, total_len))
-                       return NULL;
-               if (item->name_len == name_len && !memcmp(name_ptr, name,
-                                                         name_len))
-                       return item;
+               if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
+                   memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
+                       return dir_item;
 
                cur += this_len;
-               item = (struct btrfs_dir_item *) ((u8 *) item + this_len);
+               dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+                                                    this_len);
        }
-
        return NULL;
 }
 
-int btrfs_lookup_dir_item(const struct btrfs_root *root, u64 dir,
-                         const char *name, int name_len,
-                         struct btrfs_dir_item *item)
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            struct btrfs_path *path, u64 dir,
+                                            const char *name, int name_len,
+                                            int mod)
 {
-       struct btrfs_path path;
+       int ret;
        struct btrfs_key key;
-       struct btrfs_dir_item *res = NULL;
+       int ins_len = mod < 0 ? -1 : 0;
+       int cow = mod != 0;
+       struct btrfs_key found_key;
+       struct extent_buffer *leaf;
 
        key.objectid = dir;
        key.type = BTRFS_DIR_ITEM_KEY;
+
        key.offset = btrfs_name_hash(name, name_len);
 
-       if (btrfs_search_tree(root, &key, &path))
-               return -1;
+       ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       if (ret > 0) {
+               if (path->slots[0] == 0)
+                       return NULL;
+               path->slots[0]--;
+       }
 
-       if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
-               goto out;
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-       res = btrfs_match_dir_item_name(&path, name, name_len);
-       if (res)
-               *item = *res;
-out:
-       btrfs_free_path(&path);
-       return res ? 0 : -1;
+       if (found_key.objectid != dir ||
+           found_key.type != BTRFS_DIR_ITEM_KEY ||
+           found_key.offset != key.offset)
+               return NULL;
+
+       return btrfs_match_dir_item_name(root, path, name, name_len);
 }
 
-int btrfs_readdir(const struct btrfs_root *root, u64 dir,
-                 btrfs_readdir_callback_t callback)
+int btrfs_iter_dir(struct btrfs_root *root, u64 ino,
+                  btrfs_iter_dir_callback_t callback)
 {
        struct btrfs_path path;
-       struct btrfs_key key, *found_key;
-       struct btrfs_dir_item *item;
-       int res = 0;
+       struct btrfs_key key;
+       int ret;
 
-       key.objectid = dir;
+       btrfs_init_path(&path);
+       key.objectid = ino;
        key.type = BTRFS_DIR_INDEX_KEY;
        key.offset = 0;
 
-       if (btrfs_search_tree(root, &key, &path))
-               return -1;
-
+       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+       if (ret < 0)
+               return ret;
+       /* Should not happen */
+       if (ret == 0) {
+               ret = -EUCLEAN;
+               goto out;
+       }
+       if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
+               ret = btrfs_next_leaf(root, &path);
+               if (ret < 0)
+                       goto out;
+               if (ret > 0) {
+                       ret = 0;
+                       goto out;
+               }
+       }
        do {
-               found_key = btrfs_path_leaf_key(&path);
-               if (btrfs_comp_keys_type(&key, found_key))
-                       break;
-
-               item = btrfs_path_item_ptr(&path, struct btrfs_dir_item);
-               btrfs_dir_item_to_cpu(item);
-
-               if (verify_dir_item(item, 0, sizeof(*item) + item->name_len))
-                       continue;
-               if (item->type == BTRFS_FT_XATTR)
-                       continue;
+               struct btrfs_dir_item *di;
 
-               if (callback(root, item))
+               btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+               if (key.objectid != ino || key.type != BTRFS_DIR_INDEX_KEY)
                        break;
-       } while (!(res = btrfs_next_slot(&path)));
-
-       btrfs_free_path(&path);
-
-       return res < 0 ? -1 : 0;
+               di = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                   struct btrfs_dir_item);
+               if (verify_dir_item(root, path.nodes[0], di)) {
+                       ret = -EUCLEAN;
+                       goto out;
+               }
+               ret = callback(root, path.nodes[0], di);
+               if (ret < 0)
+                       goto out;
+       } while (!(ret = btrfs_next_item(root, &path)));
+
+       if (ret > 0)
+               ret = 0;
+out:
+       btrfs_release_path(&path);
+       return ret;
 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
new file mode 100644 (file)
index 0000000..01e7cee
--- /dev/null
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <common.h>
+#include <fs_internal.h>
+#include <uuid.h>
+#include <memalign.h>
+#include "kernel-shared/btrfs_tree.h"
+#include "common/rbtree-utils.h"
+#include "disk-io.h"
+#include "ctree.h"
+#include "btrfs.h"
+#include "volumes.h"
+#include "extent-io.h"
+#include "crypto/hash.h"
+
+/* specified errno for check_tree_block */
+#define BTRFS_BAD_BYTENR               (-1)
+#define BTRFS_BAD_FSID                 (-2)
+#define BTRFS_BAD_LEVEL                        (-3)
+#define BTRFS_BAD_NRITEMS              (-4)
+
+/* Calculate max possible nritems for a leaf/node */
+static u32 max_nritems(u8 level, u32 nodesize)
+{
+
+       if (level == 0)
+               return ((nodesize - sizeof(struct btrfs_header)) /
+                       sizeof(struct btrfs_item));
+       return ((nodesize - sizeof(struct btrfs_header)) /
+               sizeof(struct btrfs_key_ptr));
+}
+
+static int check_tree_block(struct btrfs_fs_info *fs_info,
+                           struct extent_buffer *buf)
+{
+
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       u32 nodesize = fs_info->nodesize;
+       bool fsid_match = false;
+       int ret = BTRFS_BAD_FSID;
+
+       if (buf->start != btrfs_header_bytenr(buf))
+               return BTRFS_BAD_BYTENR;
+       if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
+               return BTRFS_BAD_LEVEL;
+       if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
+                                                   nodesize))
+               return BTRFS_BAD_NRITEMS;
+
+       /* Only leaf can be empty */
+       if (btrfs_header_nritems(buf) == 0 &&
+           btrfs_header_level(buf) != 0)
+               return BTRFS_BAD_NRITEMS;
+
+       while (fs_devices) {
+               /*
+                * Checking the incompat flag is only valid for the current
+                * fs. For seed devices it's forbidden to have their uuid
+                * changed so reading ->fsid in this case is fine
+                */
+               if (fs_devices == fs_info->fs_devices &&
+                   btrfs_fs_incompat(fs_info, METADATA_UUID))
+                       fsid_match = !memcmp_extent_buffer(buf,
+                                                  fs_devices->metadata_uuid,
+                                                  btrfs_header_fsid(),
+                                                  BTRFS_FSID_SIZE);
+               else
+                       fsid_match = !memcmp_extent_buffer(buf,
+                                                   fs_devices->fsid,
+                                                   btrfs_header_fsid(),
+                                                   BTRFS_FSID_SIZE);
+
+
+               if (fsid_match) {
+                       ret = 0;
+                       break;
+               }
+               fs_devices = fs_devices->seed;
+       }
+       return ret;
+}
+
+static void print_tree_block_error(struct btrfs_fs_info *fs_info,
+                               struct extent_buffer *eb,
+                               int err)
+{
+       char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+       char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+       u8 buf[BTRFS_UUID_SIZE];
+
+       if (!err)
+               return;
+
+       fprintf(stderr, "bad tree block %llu, ", eb->start);
+       switch (err) {
+       case BTRFS_BAD_FSID:
+               read_extent_buffer(eb, buf, btrfs_header_fsid(),
+                                  BTRFS_UUID_SIZE);
+               uuid_unparse(buf, found_uuid);
+               uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
+               fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
+                       fs_uuid, found_uuid);
+               break;
+       case BTRFS_BAD_BYTENR:
+               fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
+                       eb->start, btrfs_header_bytenr(eb));
+               break;
+       case BTRFS_BAD_LEVEL:
+               fprintf(stderr, "bad level, %u > %d\n",
+                       btrfs_header_level(eb), BTRFS_MAX_LEVEL);
+               break;
+       case BTRFS_BAD_NRITEMS:
+               fprintf(stderr, "invalid nr_items: %u\n",
+                       btrfs_header_nritems(eb));
+               break;
+       }
+}
+
+int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
+{
+       memset(out, 0, BTRFS_CSUM_SIZE);
+
+       switch (csum_type) {
+       case BTRFS_CSUM_TYPE_CRC32:
+               return hash_crc32c(data, len, out);
+       case BTRFS_CSUM_TYPE_XXHASH:
+               return hash_xxhash(data, len, out);
+       case BTRFS_CSUM_TYPE_SHA256:
+               return hash_sha256(data, len, out);
+       default:
+               printf("Unknown csum type %d\n", csum_type);
+               return -EINVAL;
+       }
+}
+
+/*
+ * Check if the super is valid:
+ * - nodesize/sectorsize - minimum, maximum, alignment
+ * - tree block starts   - alignment
+ * - number of devices   - something sane
+ * - sys array size      - maximum
+ */
+static int btrfs_check_super(struct btrfs_super_block *sb)
+{
+       u8 result[BTRFS_CSUM_SIZE];
+       u16 csum_type;
+       int csum_size;
+       u8 *metadata_uuid;
+
+       if (btrfs_super_magic(sb) != BTRFS_MAGIC)
+               return -EIO;
+
+       csum_type = btrfs_super_csum_type(sb);
+       if (csum_type >= btrfs_super_num_csums()) {
+               error("unsupported checksum algorithm %u", csum_type);
+               return -EIO;
+       }
+       csum_size = btrfs_super_csum_size(sb);
+
+       btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
+                       result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+
+       if (memcmp(result, sb->csum, csum_size)) {
+               error("superblock checksum mismatch");
+               return -EIO;
+       }
+       if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+               error("tree_root level too big: %d >= %d",
+                       btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
+               goto error_out;
+       }
+       if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+               error("chunk_root level too big: %d >= %d",
+                       btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
+               goto error_out;
+       }
+       if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+               error("log_root level too big: %d >= %d",
+                       btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
+               goto error_out;
+       }
+
+       if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
+               error("tree_root block unaligned: %llu", btrfs_super_root(sb));
+               goto error_out;
+       }
+       if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
+               error("chunk_root block unaligned: %llu",
+                       btrfs_super_chunk_root(sb));
+               goto error_out;
+       }
+       if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
+               error("log_root block unaligned: %llu",
+                       btrfs_super_log_root(sb));
+               goto error_out;
+       }
+       if (btrfs_super_nodesize(sb) < 4096) {
+               error("nodesize too small: %u < 4096",
+                       btrfs_super_nodesize(sb));
+               goto error_out;
+       }
+       if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
+               error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
+               goto error_out;
+       }
+       if (btrfs_super_sectorsize(sb) < 4096) {
+               error("sectorsize too small: %u < 4096",
+                       btrfs_super_sectorsize(sb));
+               goto error_out;
+       }
+       if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
+               error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
+               goto error_out;
+       }
+       if (btrfs_super_total_bytes(sb) == 0) {
+               error("invalid total_bytes 0");
+               goto error_out;
+       }
+       if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+               error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
+               goto error_out;
+       }
+       if ((btrfs_super_stripesize(sb) != 4096)
+               && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
+               error("invalid stripesize %u", btrfs_super_stripesize(sb));
+               goto error_out;
+       }
+
+       if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
+               metadata_uuid = sb->metadata_uuid;
+       else
+               metadata_uuid = sb->fsid;
+
+       if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
+               char fsid[BTRFS_UUID_UNPARSED_SIZE];
+               char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
+
+               uuid_unparse(sb->metadata_uuid, fsid);
+               uuid_unparse(sb->dev_item.fsid, dev_fsid);
+               error("dev_item UUID does not match fsid: %s != %s",
+                       dev_fsid, fsid);
+               goto error_out;
+       }
+
+       /*
+        * Hint to catch really bogus numbers, bitflips or so
+        */
+       if (btrfs_super_num_devices(sb) > (1UL << 31)) {
+               error("suspicious number of devices: %llu",
+                       btrfs_super_num_devices(sb));
+       }
+
+       if (btrfs_super_num_devices(sb) == 0) {
+               error("number of devices is 0");
+               goto error_out;
+       }
+
+       /*
+        * Obvious sys_chunk_array corruptions, it must hold at least one key
+        * and one chunk
+        */
+       if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+               error("system chunk array too big %u > %u",
+                     btrfs_super_sys_array_size(sb),
+                     BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+               goto error_out;
+       }
+       if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+                       + sizeof(struct btrfs_chunk)) {
+               error("system chunk array too small %u < %zu",
+                     btrfs_super_sys_array_size(sb),
+                     sizeof(struct btrfs_disk_key) +
+                     sizeof(struct btrfs_chunk));
+               goto error_out;
+       }
+
+       return 0;
+
+error_out:
+       error("superblock checksum matches but it has invalid members");
+       return -EIO;
+}
+
+/*
+ * btrfs_read_dev_super - read a valid primary superblock from a block device
+ * @desc,@part:        file descriptor of the device
+ * @sb:                buffer where the superblock is going to be read in
+ *
+ * Unlike the btrfs-progs/kernel version, here we ony care about the first
+ * super block, thus it's much simpler.
+ */
+int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
+                        struct btrfs_super_block *sb)
+{
+       char tmp[BTRFS_SUPER_INFO_SIZE];
+       struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
+       int ret;
+
+       ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
+                             BTRFS_SUPER_INFO_OFFSET);
+       if (ret < BTRFS_SUPER_INFO_SIZE)
+               return -EIO;
+
+       if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
+               return -EIO;
+
+       if (btrfs_check_super(buf))
+               return -EIO;
+
+       memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
+       return 0;
+}
+
+static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
+                                 int verify, int silent, u16 csum_type)
+{
+       u8 result[BTRFS_CSUM_SIZE];
+       u32 len;
+
+       len = buf->len - BTRFS_CSUM_SIZE;
+       btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
+                       result, len);
+
+       if (verify) {
+               if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
+                       /* FIXME: format */
+                       if (!silent)
+                               printk("checksum verify failed on %llu found %08X wanted %08X\n",
+                                      (unsigned long long)buf->start,
+                                      result[0],
+                                      buf->data[0]);
+                       return 1;
+               }
+       } else {
+               write_extent_buffer(buf, result, 0, csum_size);
+       }
+       return 0;
+}
+
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
+                        u16 csum_type)
+{
+       return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
+}
+
+static int csum_tree_block(struct btrfs_fs_info *fs_info,
+                          struct extent_buffer *buf, int verify)
+{
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+       u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
+
+       return csum_tree_block_size(buf, csum_size, verify, csum_type);
+}
+
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
+                                           u64 bytenr, u32 blocksize)
+{
+       return find_extent_buffer(&fs_info->extent_cache,
+                                 bytenr, blocksize);
+}
+
+struct extent_buffer* btrfs_find_create_tree_block(
+               struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+       return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
+}
+
+static int verify_parent_transid(struct extent_io_tree *io_tree,
+                                struct extent_buffer *eb, u64 parent_transid,
+                                int ignore)
+{
+       int ret;
+
+       if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
+               return 0;
+
+       if (extent_buffer_uptodate(eb) &&
+           btrfs_header_generation(eb) == parent_transid) {
+               ret = 0;
+               goto out;
+       }
+       printk("parent transid verify failed on %llu wanted %llu found %llu\n",
+              (unsigned long long)eb->start,
+              (unsigned long long)parent_transid,
+              (unsigned long long)btrfs_header_generation(eb));
+       if (ignore) {
+               eb->flags |= EXTENT_BAD_TRANSID;
+               printk("Ignoring transid failure\n");
+               return 0;
+       }
+
+       ret = 1;
+out:
+       clear_extent_buffer_uptodate(eb);
+       return ret;
+
+}
+
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
+{
+       unsigned long offset = 0;
+       struct btrfs_multi_bio *multi = NULL;
+       struct btrfs_device *device;
+       int ret = 0;
+       u64 read_len;
+       unsigned long bytes_left = eb->len;
+
+       while (bytes_left) {
+               read_len = bytes_left;
+               device = NULL;
+
+               ret = btrfs_map_block(info, READ, eb->start + offset,
+                                     &read_len, &multi, mirror, NULL);
+               if (ret) {
+                       printk("Couldn't map the block %Lu\n", eb->start + offset);
+                       kfree(multi);
+                       return -EIO;
+               }
+               device = multi->stripes[0].dev;
+
+               if (!device->desc || !device->part) {
+                       kfree(multi);
+                       return -EIO;
+               }
+
+               if (read_len > bytes_left)
+                       read_len = bytes_left;
+
+               ret = read_extent_from_disk(device->desc, device->part,
+                                           multi->stripes[0].physical, eb,
+                                           offset, read_len);
+               kfree(multi);
+               multi = NULL;
+
+               if (ret)
+                       return -EIO;
+               offset += read_len;
+               bytes_left -= read_len;
+       }
+       return 0;
+}
+
+struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
+               u64 parent_transid)
+{
+       int ret;
+       struct extent_buffer *eb;
+       u64 best_transid = 0;
+       u32 sectorsize = fs_info->sectorsize;
+       int mirror_num = 1;
+       int good_mirror = 0;
+       int candidate_mirror = 0;
+       int num_copies;
+       int ignore = 0;
+
+       /*
+        * Don't even try to create tree block for unaligned tree block
+        * bytenr.
+        * Such unaligned tree block will free overlapping extent buffer,
+        * causing use-after-free bugs for fuzzed images.
+        */
+       if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
+               error("tree block bytenr %llu is not aligned to sectorsize %u",
+                     bytenr, sectorsize);
+               return ERR_PTR(-EIO);
+       }
+
+       eb = btrfs_find_create_tree_block(fs_info, bytenr);
+       if (!eb)
+               return ERR_PTR(-ENOMEM);
+
+       if (btrfs_buffer_uptodate(eb, parent_transid))
+               return eb;
+
+       num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
+       while (1) {
+               ret = read_whole_eb(fs_info, eb, mirror_num);
+               if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
+                   check_tree_block(fs_info, eb) == 0 &&
+                   verify_parent_transid(&fs_info->extent_cache, eb,
+                                         parent_transid, ignore) == 0) {
+                       /*
+                        * check_tree_block() is less strict to allow btrfs
+                        * check to get raw eb with bad key order and fix it.
+                        * But we still need to try to get a good copy if
+                        * possible, or bad key order can go into tools like
+                        * btrfs ins dump-tree.
+                        */
+                       if (btrfs_header_level(eb))
+                               ret = btrfs_check_node(fs_info, NULL, eb);
+                       else
+                               ret = btrfs_check_leaf(fs_info, NULL, eb);
+                       if (!ret || candidate_mirror == mirror_num) {
+                               btrfs_set_buffer_uptodate(eb);
+                               return eb;
+                       }
+                       if (candidate_mirror <= 0)
+                               candidate_mirror = mirror_num;
+               }
+               if (ignore) {
+                       if (candidate_mirror > 0) {
+                               mirror_num = candidate_mirror;
+                               continue;
+                       }
+                       if (check_tree_block(fs_info, eb))
+                               print_tree_block_error(fs_info, eb,
+                                               check_tree_block(fs_info, eb));
+                       else
+                               fprintf(stderr, "Csum didn't match\n");
+                       ret = -EIO;
+                       break;
+               }
+               if (num_copies == 1) {
+                       ignore = 1;
+                       continue;
+               }
+               if (btrfs_header_generation(eb) > best_transid) {
+                       best_transid = btrfs_header_generation(eb);
+                       good_mirror = mirror_num;
+               }
+               mirror_num++;
+               if (mirror_num > num_copies) {
+                       if (candidate_mirror > 0)
+                               mirror_num = candidate_mirror;
+                       else
+                               mirror_num = good_mirror;
+                       ignore = 1;
+                       continue;
+               }
+       }
+       /*
+        * We failed to read this tree block, it be should deleted right now
+        * to avoid stale cache populate the cache.
+        */
+       free_extent_buffer(eb);
+       return ERR_PTR(ret);
+}
+
+int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
+                    u64 *len, int mirror)
+{
+       u64 offset = 0;
+       struct btrfs_multi_bio *multi = NULL;
+       struct btrfs_device *device;
+       int ret = 0;
+       u64 max_len = *len;
+
+       ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror,
+                             NULL);
+       if (ret) {
+               fprintf(stderr, "Couldn't map the block %llu\n",
+                               logical + offset);
+               goto err;
+       }
+       device = multi->stripes[0].dev;
+
+       if (*len > max_len)
+               *len = max_len;
+       if (!device->desc || !device->part) {
+               ret = -EIO;
+               goto err;
+       }
+
+       ret = __btrfs_devread(device->desc, device->part, data, *len,
+                             multi->stripes[0].physical);
+       if (ret != *len)
+               ret = -EIO;
+       else
+               ret = 0;
+err:
+       kfree(multi);
+       return ret;
+}
+
+void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
+                     u64 objectid)
+{
+       root->node = NULL;
+       root->track_dirty = 0;
+
+       root->fs_info = fs_info;
+       root->objectid = objectid;
+       root->last_trans = 0;
+       root->last_inode_alloc = 0;
+
+       memset(&root->root_key, 0, sizeof(root->root_key));
+       memset(&root->root_item, 0, sizeof(root->root_item));
+       root->root_key.objectid = objectid;
+}
+
+static int find_and_setup_root(struct btrfs_root *tree_root,
+                              struct btrfs_fs_info *fs_info,
+                              u64 objectid, struct btrfs_root *root)
+{
+       int ret;
+       u64 generation;
+
+       btrfs_setup_root(root, fs_info, objectid);
+       ret = btrfs_find_last_root(tree_root, objectid,
+                                  &root->root_item, &root->root_key);
+       if (ret)
+               return ret;
+
+       generation = btrfs_root_generation(&root->root_item);
+       root->node = read_tree_block(fs_info,
+                       btrfs_root_bytenr(&root->root_item), generation);
+       if (!extent_buffer_uptodate(root->node))
+               return -EIO;
+
+       return 0;
+}
+
+int btrfs_free_fs_root(struct btrfs_root *root)
+{
+       if (root->node)
+               free_extent_buffer(root->node);
+       kfree(root);
+       return 0;
+}
+
+static void __free_fs_root(struct rb_node *node)
+{
+       struct btrfs_root *root;
+
+       root = container_of(node, struct btrfs_root, rb_node);
+       btrfs_free_fs_root(root);
+}
+
+FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
+
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+                                              struct btrfs_key *location)
+{
+       struct btrfs_root *root;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_path *path;
+       struct extent_buffer *l;
+       u64 generation;
+       int ret = 0;
+
+       root = calloc(1, sizeof(*root));
+       if (!root)
+               return ERR_PTR(-ENOMEM);
+       if (location->offset == (u64)-1) {
+               ret = find_and_setup_root(tree_root, fs_info,
+                                         location->objectid, root);
+               if (ret) {
+                       free(root);
+                       return ERR_PTR(ret);
+               }
+               goto insert;
+       }
+
+       btrfs_setup_root(root, fs_info,
+                        location->objectid);
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               free(root);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
+       if (ret != 0) {
+               if (ret > 0)
+                       ret = -ENOENT;
+               goto out;
+       }
+       l = path->nodes[0];
+       read_extent_buffer(l, &root->root_item,
+              btrfs_item_ptr_offset(l, path->slots[0]),
+              sizeof(root->root_item));
+       memcpy(&root->root_key, location, sizeof(*location));
+
+       /* If this root is already an orphan, no need to read */
+       if (btrfs_root_refs(&root->root_item) == 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       if (ret) {
+               free(root);
+               return ERR_PTR(ret);
+       }
+       generation = btrfs_root_generation(&root->root_item);
+       root->node = read_tree_block(fs_info,
+                       btrfs_root_bytenr(&root->root_item), generation);
+       if (!extent_buffer_uptodate(root->node)) {
+               free(root);
+               return ERR_PTR(-EIO);
+       }
+insert:
+       root->ref_cows = 1;
+       return root;
+}
+
+static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
+                                           void *data)
+{
+       u64 objectid = *((u64 *)data);
+       struct btrfs_root *root;
+
+       root = rb_entry(node, struct btrfs_root, rb_node);
+       if (objectid > root->objectid)
+               return 1;
+       else if (objectid < root->objectid)
+               return -1;
+       else
+               return 0;
+}
+
+int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
+{
+       struct btrfs_root *root;
+
+       root = rb_entry(node2, struct btrfs_root, rb_node);
+       return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
+}
+
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+                                     struct btrfs_key *location)
+{
+       struct btrfs_root *root;
+       struct rb_node *node;
+       int ret;
+       u64 objectid = location->objectid;
+
+       if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
+               return fs_info->tree_root;
+       if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
+               return fs_info->chunk_root;
+       if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
+               return fs_info->csum_root;
+       BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
+              location->offset != (u64)-1);
+
+       node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
+                        btrfs_fs_roots_compare_objectids, NULL);
+       if (node)
+               return container_of(node, struct btrfs_root, rb_node);
+
+       root = btrfs_read_fs_root_no_cache(fs_info, location);
+       if (IS_ERR(root))
+               return root;
+
+       ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
+                       btrfs_fs_roots_compare_roots);
+       BUG_ON(ret);
+       return root;
+}
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
+{
+       free(fs_info->tree_root);
+       free(fs_info->chunk_root);
+       free(fs_info->csum_root);
+       free(fs_info->super_copy);
+       free(fs_info);
+}
+
+struct btrfs_fs_info *btrfs_new_fs_info(void)
+{
+       struct btrfs_fs_info *fs_info;
+
+       fs_info = calloc(1, sizeof(struct btrfs_fs_info));
+       if (!fs_info)
+               return NULL;
+
+       fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
+       fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
+       fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
+       fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
+
+       if (!fs_info->tree_root || !fs_info->chunk_root ||
+           !fs_info->csum_root || !fs_info->super_copy)
+               goto free_all;
+
+       extent_io_tree_init(&fs_info->extent_cache);
+
+       fs_info->fs_root_tree = RB_ROOT;
+       cache_tree_init(&fs_info->mapping_tree.cache_tree);
+
+       mutex_init(&fs_info->fs_mutex);
+
+       return fs_info;
+free_all:
+       btrfs_free_fs_info(fs_info);
+       return NULL;
+}
+
+static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
+                                     struct btrfs_root *info_root,
+                                     u64 objectid, char *str)
+{
+       struct btrfs_root *root = fs_info->tree_root;
+       int ret;
+
+       ret = find_and_setup_root(root, fs_info, objectid, info_root);
+       if (ret) {
+               error("could not setup %s tree", str);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_super_block *sb = fs_info->super_copy;
+       struct btrfs_root *root;
+       struct btrfs_key key;
+       u64 root_tree_bytenr;
+       u64 generation;
+       int ret;
+
+       root = fs_info->tree_root;
+       btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
+       generation = btrfs_super_generation(sb);
+
+       root_tree_bytenr = btrfs_super_root(sb);
+
+       root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
+       if (!extent_buffer_uptodate(root->node)) {
+               fprintf(stderr, "Couldn't read tree root\n");
+               return -EIO;
+       }
+
+       ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
+                                        BTRFS_CSUM_TREE_OBJECTID, "csum");
+       if (ret)
+               return ret;
+       fs_info->csum_root->track_dirty = 1;
+
+       fs_info->last_trans_committed = generation;
+
+       key.objectid = BTRFS_FS_TREE_OBJECTID;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+       fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
+
+       if (IS_ERR(fs_info->fs_root))
+               return -EIO;
+       return 0;
+}
+
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
+{
+       if (fs_info->csum_root)
+               free_extent_buffer(fs_info->csum_root->node);
+       if (fs_info->tree_root)
+               free_extent_buffer(fs_info->tree_root->node);
+       if (fs_info->chunk_root)
+               free_extent_buffer(fs_info->chunk_root->node);
+}
+
+static void free_map_lookup(struct cache_extent *ce)
+{
+       struct map_lookup *map;
+
+       map = container_of(ce, struct map_lookup, ce);
+       kfree(map);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
+
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
+{
+       free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
+       extent_io_tree_cleanup(&fs_info->extent_cache);
+}
+
+static int btrfs_scan_fs_devices(struct blk_desc *desc,
+                                struct disk_partition *part,
+                                struct btrfs_fs_devices **fs_devices)
+{
+       u64 total_devs;
+       int ret;
+
+       if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
+                    desc->blksz) > (part->size << desc->log2blksz)) {
+               error("superblock end %u is larger than device size " LBAFU,
+                               BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
+                               part->size << desc->log2blksz);
+               return -EINVAL;
+       }
+
+       ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
+       if (ret) {
+               fprintf(stderr, "No valid Btrfs found\n");
+               return ret;
+       }
+       return 0;
+}
+
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
+{
+       u64 features;
+
+       features = btrfs_super_incompat_flags(sb) &
+                  ~BTRFS_FEATURE_INCOMPAT_SUPP;
+       if (features) {
+               printk("couldn't open because of unsupported "
+                      "option features (%llx).\n",
+                      (unsigned long long)features);
+               return -ENOTSUPP;
+       }
+
+       features = btrfs_super_incompat_flags(sb);
+       if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+               features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+               btrfs_set_super_incompat_flags(sb, features);
+       }
+
+       return 0;
+}
+
+static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_super_block *sb = fs_info->super_copy;
+       u64 chunk_root_bytenr;
+       u64 generation;
+       int ret;
+
+       btrfs_setup_root(fs_info->chunk_root, fs_info,
+                       BTRFS_CHUNK_TREE_OBJECTID);
+
+       ret = btrfs_read_sys_array(fs_info);
+       if (ret)
+               return ret;
+
+       generation = btrfs_super_chunk_root_generation(sb);
+       chunk_root_bytenr = btrfs_super_chunk_root(sb);
+
+       fs_info->chunk_root->node = read_tree_block(fs_info,
+                                                   chunk_root_bytenr,
+                                                   generation);
+       if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
+               error("cannot read chunk root");
+               return -EIO;
+       }
+
+       ret = btrfs_read_chunk_tree(fs_info);
+       if (ret) {
+               fprintf(stderr, "Couldn't read chunk tree\n");
+               return ret;
+       }
+       return 0;
+}
+
+struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
+                                        struct disk_partition *part)
+{
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_super_block *disk_super;
+       struct btrfs_fs_devices *fs_devices = NULL;
+       struct extent_buffer *eb;
+       int ret;
+
+       fs_info = btrfs_new_fs_info();
+       if (!fs_info) {
+               fprintf(stderr, "Failed to allocate memory for fs_info\n");
+               return NULL;
+       }
+
+       ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
+       if (ret)
+               goto out;
+
+       fs_info->fs_devices = fs_devices;
+
+       ret = btrfs_open_devices(fs_devices);
+       if (ret)
+               goto out;
+
+       disk_super = fs_info->super_copy;
+       ret = btrfs_read_dev_super(desc, part, disk_super);
+       if (ret) {
+               printk("No valid btrfs found\n");
+               goto out_devices;
+       }
+
+       if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
+               fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
+               goto out_devices;
+       }
+
+       ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
+       if (btrfs_fs_incompat(fs_info, METADATA_UUID))
+               ASSERT(!memcmp(disk_super->metadata_uuid,
+                              fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
+
+       fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
+       fs_info->nodesize = btrfs_super_nodesize(disk_super);
+       fs_info->stripesize = btrfs_super_stripesize(disk_super);
+
+       ret = btrfs_check_fs_compatibility(fs_info->super_copy);
+       if (ret)
+               goto out_devices;
+
+       ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
+       if (ret)
+               goto out_chunk;
+
+       /* Chunk tree root is unable to read, return directly */
+       if (!fs_info->chunk_root)
+               return fs_info;
+
+       eb = fs_info->chunk_root->node;
+       read_extent_buffer(eb, fs_info->chunk_tree_uuid,
+                          btrfs_header_chunk_tree_uuid(eb),
+                          BTRFS_UUID_SIZE);
+
+       ret = btrfs_setup_all_roots(fs_info);
+       if (ret)
+               goto out_chunk;
+
+       return fs_info;
+
+out_chunk:
+       btrfs_release_all_roots(fs_info);
+       btrfs_cleanup_all_caches(fs_info);
+out_devices:
+       btrfs_close_devices(fs_devices);
+out:
+       btrfs_free_fs_info(fs_info);
+       return NULL;
+}
+
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
+{
+       int ret;
+       int err = 0;
+
+       free_fs_roots_tree(&fs_info->fs_root_tree);
+
+       btrfs_release_all_roots(fs_info);
+       ret = btrfs_close_devices(fs_info->fs_devices);
+       btrfs_cleanup_all_caches(fs_info);
+       btrfs_free_fs_info(fs_info);
+       if (!err)
+               err = ret;
+       return err;
+}
+
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
+{
+       int ret;
+
+       ret = extent_buffer_uptodate(buf);
+       if (!ret)
+               return ret;
+
+       ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
+                                   parent_transid, 1);
+       return !ret;
+}
+
+int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
+{
+       return set_extent_buffer_uptodate(eb);
+}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
new file mode 100644 (file)
index 0000000..a347912
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0+
+#ifndef __BTRFS_DISK_IO_H__
+#define __BTRFS_DISK_IO_H__
+
+#include <linux/sizes.h>
+#include <fs_internal.h>
+#include "ctree.h"
+#include "disk-io.h"
+
+#define BTRFS_SUPER_INFO_OFFSET SZ_64K
+#define BTRFS_SUPER_INFO_SIZE  SZ_4K
+
+/* From btrfs-progs */
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror);
+struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
+               u64 parent_transid);
+
+int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
+                    u64 *len, int mirror);
+struct extent_buffer* btrfs_find_create_tree_block(
+               struct btrfs_fs_info *fs_info, u64 bytenr);
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
+                                           u64 bytenr, u32 blocksize);
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+                                              struct btrfs_key *location);
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+                                     struct btrfs_key *location);
+
+void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
+                     u64 objectid);
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
+struct btrfs_fs_info *btrfs_new_fs_info(void);
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb);
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info);
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info);
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info);
+
+struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
+                                        struct disk_partition *part);
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info);
+
+int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
+                        struct btrfs_super_block *sb);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
+int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
+int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len);
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_sectorsize,
+                        int verify, u16 csum_type);
+#endif
diff --git a/fs/btrfs/extent-cache.c b/fs/btrfs/extent-cache.c
new file mode 100644 (file)
index 0000000..bc8cf3a
--- /dev/null
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from the same named file of btrfs-progs.
+ *
+ * Minor modification to include headers.
+ */
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <stdlib.h>
+#include "extent-cache.h"
+#include "common/rbtree-utils.h"
+
+struct cache_extent_search_range {
+       u64 objectid;
+       u64 start;
+       u64 size;
+};
+
+static int cache_tree_comp_range(struct rb_node *node, void *data)
+{
+       struct cache_extent *entry;
+       struct cache_extent_search_range *range;
+
+       range = (struct cache_extent_search_range *)data;
+       entry = rb_entry(node, struct cache_extent, rb_node);
+
+       if (entry->start + entry->size <= range->start)
+               return 1;
+       else if (range->start + range->size <= entry->start)
+               return -1;
+       else
+               return 0;
+}
+
+static int cache_tree_comp_nodes(struct rb_node *node1, struct rb_node *node2)
+{
+       struct cache_extent *entry;
+       struct cache_extent_search_range range;
+
+       entry = rb_entry(node2, struct cache_extent, rb_node);
+       range.start = entry->start;
+       range.size = entry->size;
+
+       return cache_tree_comp_range(node1, (void *)&range);
+}
+
+static int cache_tree_comp_range2(struct rb_node *node, void *data)
+{
+       struct cache_extent *entry;
+       struct cache_extent_search_range *range;
+
+       range = (struct cache_extent_search_range *)data;
+       entry = rb_entry(node, struct cache_extent, rb_node);
+
+       if (entry->objectid < range->objectid)
+               return 1;
+       else if (entry->objectid > range->objectid)
+               return -1;
+       else if (entry->start + entry->size <= range->start)
+               return 1;
+       else if (range->start + range->size <= entry->start)
+               return -1;
+       else
+               return 0;
+}
+
+static int cache_tree_comp_nodes2(struct rb_node *node1, struct rb_node *node2)
+{
+       struct cache_extent *entry;
+       struct cache_extent_search_range range;
+
+       entry = rb_entry(node2, struct cache_extent, rb_node);
+       range.objectid = entry->objectid;
+       range.start = entry->start;
+       range.size = entry->size;
+
+       return cache_tree_comp_range2(node1, (void *)&range);
+}
+
+void cache_tree_init(struct cache_tree *tree)
+{
+       tree->root = RB_ROOT;
+}
+
+static struct cache_extent *alloc_cache_extent(u64 start, u64 size)
+{
+       struct cache_extent *pe = malloc(sizeof(*pe));
+
+       if (!pe)
+               return pe;
+
+       pe->objectid = 0;
+       pe->start = start;
+       pe->size = size;
+       return pe;
+}
+
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+       struct cache_extent *pe = alloc_cache_extent(start, size);
+       int ret;
+
+       if (!pe)
+               return -ENOMEM;
+
+       ret = insert_cache_extent(tree, pe);
+       if (ret)
+               free(pe);
+
+       return ret;
+}
+
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+       return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes);
+}
+
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe)
+{
+       return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes2);
+}
+
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+                                        u64 start, u64 size)
+{
+       struct rb_node *node;
+       struct cache_extent *entry;
+       struct cache_extent_search_range range;
+
+       range.start = start;
+       range.size = size;
+       node = rb_search(&tree->root, &range, cache_tree_comp_range, NULL);
+       if (!node)
+               return NULL;
+
+       entry = rb_entry(node, struct cache_extent, rb_node);
+       return entry;
+}
+
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+                                        u64 objectid, u64 start, u64 size)
+{
+       struct rb_node *node;
+       struct cache_extent *entry;
+       struct cache_extent_search_range range;
+
+       range.objectid = objectid;
+       range.start = start;
+       range.size = size;
+       node = rb_search(&tree->root, &range, cache_tree_comp_range2, NULL);
+       if (!node)
+               return NULL;
+
+       entry = rb_entry(node, struct cache_extent, rb_node);
+       return entry;
+}
+
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start)
+{
+       struct rb_node *next;
+       struct rb_node *node;
+       struct cache_extent *entry;
+       struct cache_extent_search_range range;
+
+       range.start = start;
+       range.size = 1;
+       node = rb_search(&tree->root, &range, cache_tree_comp_range, &next);
+       if (!node)
+               node = next;
+       if (!node)
+               return NULL;
+
+       entry = rb_entry(node, struct cache_extent, rb_node);
+       return entry;
+}
+
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+                                        u64 objectid, u64 start)
+{
+       struct rb_node *next;
+       struct rb_node *node;
+       struct cache_extent *entry;
+       struct cache_extent_search_range range;
+
+       range.objectid = objectid;
+       range.start = start;
+       range.size = 1;
+       node = rb_search(&tree->root, &range, cache_tree_comp_range2, &next);
+       if (!node)
+               node = next;
+       if (!node)
+               return NULL;
+
+       entry = rb_entry(node, struct cache_extent, rb_node);
+       return entry;
+}
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree)
+{
+       struct rb_node *node = rb_first(&tree->root);
+
+       if (!node)
+               return NULL;
+       return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *last_cache_extent(struct cache_tree *tree)
+{
+       struct rb_node *node = rb_last(&tree->root);
+
+       if (!node)
+               return NULL;
+       return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *prev_cache_extent(struct cache_extent *pe)
+{
+       struct rb_node *node = rb_prev(&pe->rb_node);
+
+       if (!node)
+               return NULL;
+       return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *next_cache_extent(struct cache_extent *pe)
+{
+       struct rb_node *node = rb_next(&pe->rb_node);
+
+       if (!node)
+               return NULL;
+       return rb_entry(node, struct cache_extent, rb_node);
+}
+
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+       rb_erase(&pe->rb_node, &tree->root);
+}
+
+void cache_tree_free_extents(struct cache_tree *tree,
+                            free_cache_extent free_func)
+{
+       struct cache_extent *ce;
+
+       while ((ce = first_cache_extent(tree))) {
+               remove_cache_extent(tree, ce);
+               free_func(ce);
+       }
+}
+
+static void free_extent_cache(struct cache_extent *pe)
+{
+       free(pe);
+}
+
+void free_extent_cache_tree(struct cache_tree *tree)
+{
+       cache_tree_free_extents(tree, free_extent_cache);
+}
+
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+       struct cache_extent *cache;
+       struct cache_extent *next = NULL;
+       struct cache_extent *prev = NULL;
+       int next_merged = 0;
+       int prev_merged = 0;
+       int ret = 0;
+
+       if (cache_tree_empty(tree))
+               goto insert;
+
+       cache = search_cache_extent(tree, start);
+       if (!cache) {
+               /*
+                * Either the tree is completely empty, or the no range after
+                * start.
+                * Either way, the last cache_extent should be prev.
+                */
+               prev = last_cache_extent(tree);
+       } else if (start <= cache->start) {
+               next = cache;
+               prev = prev_cache_extent(cache);
+       } else {
+               prev = cache;
+               next = next_cache_extent(cache);
+       }
+
+       /*
+        * Ensure the range to be inserted won't cover with existings
+        * Or we will need extra loop to do merge
+        */
+       BUG_ON(next && start + size > next->start);
+       BUG_ON(prev && prev->start + prev->size > start);
+
+       if (next && start + size == next->start) {
+               next_merged = 1;
+               next->size = next->start + next->size - start;
+               next->start = start;
+       }
+       if (prev && prev->start + prev->size == start) {
+               prev_merged = 1;
+               if (next_merged) {
+                       next->size = next->start + next->size - prev->start;
+                       next->start = prev->start;
+                       remove_cache_extent(tree, prev);
+                       free(prev);
+               } else {
+                       prev->size = start + size - prev->start;
+               }
+       }
+insert:
+       if (!prev_merged && !next_merged)
+               ret = add_cache_extent(tree, start, size);
+       return ret;
+}
diff --git a/fs/btrfs/extent-cache.h b/fs/btrfs/extent-cache.h
new file mode 100644 (file)
index 0000000..2fee81a
--- /dev/null
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from the same named file of btrfs-progs.
+ *
+ * Minor modification to include headers.
+ */
+#ifndef __BTRFS_EXTENT_CACHE_H__
+#define __BTRFS_EXTENT_CACHE_H__
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct cache_tree {
+       struct rb_root root;
+};
+
+struct cache_extent {
+       struct rb_node rb_node;
+       u64 objectid;
+       u64 start;
+       u64 size;
+};
+
+void cache_tree_init(struct cache_tree *tree);
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree);
+struct cache_extent *last_cache_extent(struct cache_tree *tree);
+struct cache_extent *prev_cache_extent(struct cache_extent *pe);
+struct cache_extent *next_cache_extent(struct cache_extent *pe);
+
+/*
+ * Find a cache_extent which covers start.
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start);
+
+/*
+ * Find a cache_extent which restrictly covers start.
+ *
+ * If not found, return NULL.
+ */
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+                                        u64 start, u64 size);
+
+/*
+ * Add an non-overlap extent into cache tree
+ *
+ * If [start, start+size) overlap with existing one, it will return -EEXIST.
+ */
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+
+/*
+ * Same with add_cache_extent, but with cache_extent strcut.
+ */
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+
+static inline int cache_tree_empty(struct cache_tree *tree)
+{
+       return RB_EMPTY_ROOT(&tree->root);
+}
+
+typedef void (*free_cache_extent)(struct cache_extent *pe);
+
+void cache_tree_free_extents(struct cache_tree *tree,
+                            free_cache_extent free_func);
+
+#define FREE_EXTENT_CACHE_BASED_TREE(name, free_func)          \
+static void free_##name##_tree(struct cache_tree *tree)                \
+{                                                              \
+       cache_tree_free_extents(tree, free_func);               \
+}
+
+void free_extent_cache_tree(struct cache_tree *tree);
+
+/*
+ * Search a cache_extent with same objectid, and covers start.
+ *
+ * If not found, return next if possible.
+ */
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+                                         u64 objectid, u64 start);
+/*
+ * Search a cache_extent with same objectid, and covers the range
+ * [start, start + size)
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+                                         u64 objectid, u64 start, u64 size);
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe);
+
+/*
+ * Insert a cache_extent range [start, start + size).
+ *
+ * This function may merge with existing cache_extent.
+ * NOTE: caller must ensure the inserted range won't cover with any existing
+ * range.
+ */
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+
+#endif
index 2e4599c..774e29e 100644 (file)
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
+#include <linux/kernel.h>
+#include <linux/bug.h>
 #include <malloc.h>
 #include <memalign.h>
+#include "btrfs.h"
+#include "ctree.h"
+#include "extent-io.h"
+#include "disk-io.h"
+
+void extent_io_tree_init(struct extent_io_tree *tree)
+{
+       cache_tree_init(&tree->state);
+       cache_tree_init(&tree->cache);
+       tree->cache_size = 0;
+}
+
+static struct extent_state *alloc_extent_state(void)
+{
+       struct extent_state *state;
+
+       state = malloc(sizeof(*state));
+       if (!state)
+               return NULL;
+       state->cache_node.objectid = 0;
+       state->refs = 1;
+       state->state = 0;
+       state->xprivate = 0;
+       return state;
+}
+
+static void btrfs_free_extent_state(struct extent_state *state)
+{
+       state->refs--;
+       BUG_ON(state->refs < 0);
+       if (state->refs == 0)
+               free(state);
+}
 
-u64 btrfs_read_extent_inline(struct btrfs_path *path,
-                            struct btrfs_file_extent_item *extent, u64 offset,
-                            u64 size, char *out)
+static void free_extent_state_func(struct cache_extent *cache)
 {
-       u32 clen, dlen, orig_size = size, res;
-       const char *cbuf;
-       char *dbuf;
-       const int data_off = offsetof(struct btrfs_file_extent_item,
-                                     disk_bytenr);
+       struct extent_state *es;
+
+       es = container_of(cache, struct extent_state, cache_node);
+       btrfs_free_extent_state(es);
+}
 
-       clen = btrfs_path_item_size(path) - data_off;
-       cbuf = (const char *) extent + data_off;
-       dlen = extent->ram_bytes;
+static void free_extent_buffer_final(struct extent_buffer *eb);
+void extent_io_tree_cleanup(struct extent_io_tree *tree)
+{
+       cache_tree_free_extents(&tree->state, free_extent_state_func);
+}
 
-       if (offset > dlen)
-               return -1ULL;
+static inline void update_extent_state(struct extent_state *state)
+{
+       state->cache_node.start = state->start;
+       state->cache_node.size = state->end + 1 - state->start;
+}
+
+/*
+ * Utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree. Extents with EXTENT_IO in their state field are
+ * not merged
+ */
+static int merge_state(struct extent_io_tree *tree,
+                      struct extent_state *state)
+{
+       struct extent_state *other;
+       struct cache_extent *other_node;
 
-       if (size > dlen - offset)
-               size = dlen - offset;
+       if (state->state & EXTENT_IOBITS)
+               return 0;
 
-       if (extent->compression == BTRFS_COMPRESS_NONE) {
-               memcpy(out, cbuf + offset, size);
-               return size;
+       other_node = prev_cache_extent(&state->cache_node);
+       if (other_node) {
+               other = container_of(other_node, struct extent_state,
+                                    cache_node);
+               if (other->end == state->start - 1 &&
+                   other->state == state->state) {
+                       state->start = other->start;
+                       update_extent_state(state);
+                       remove_cache_extent(&tree->state, &other->cache_node);
+                       btrfs_free_extent_state(other);
+               }
        }
+       other_node = next_cache_extent(&state->cache_node);
+       if (other_node) {
+               other = container_of(other_node, struct extent_state,
+                                    cache_node);
+               if (other->start == state->end + 1 &&
+                   other->state == state->state) {
+                       other->start = state->start;
+                       update_extent_state(other);
+                       remove_cache_extent(&tree->state, &state->cache_node);
+                       btrfs_free_extent_state(state);
+               }
+       }
+       return 0;
+}
+
+/*
+ * insert an extent_state struct into the tree.  'bits' are set on the
+ * struct before it is inserted.
+ */
+static int insert_state(struct extent_io_tree *tree,
+                       struct extent_state *state, u64 start, u64 end,
+                       int bits)
+{
+       int ret;
+
+       BUG_ON(end < start);
+       state->state |= bits;
+       state->start = start;
+       state->end = end;
+       update_extent_state(state);
+       ret = insert_cache_extent(&tree->state, &state->cache_node);
+       BUG_ON(ret);
+       merge_state(tree, state);
+       return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half.  'split' indicates an
+ * offset inside 'orig' where it should be split.
+ */
+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
+                      struct extent_state *prealloc, u64 split)
+{
+       int ret;
+       prealloc->start = orig->start;
+       prealloc->end = split - 1;
+       prealloc->state = orig->state;
+       update_extent_state(prealloc);
+       orig->start = split;
+       update_extent_state(orig);
+       ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
+       BUG_ON(ret);
+       return 0;
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+static int clear_state_bit(struct extent_io_tree *tree,
+                           struct extent_state *state, int bits)
+{
+       int ret = state->state & bits;
 
-       if (dlen > orig_size) {
-               dbuf = malloc(dlen);
-               if (!dbuf)
-                       return -1ULL;
+       state->state &= ~bits;
+       if (state->state == 0) {
+               remove_cache_extent(&tree->state, &state->cache_node);
+               btrfs_free_extent_state(state);
        } else {
-               dbuf = out;
+               merge_state(tree, state);
+       }
+       return ret;
+}
+
+/*
+ * extent_buffer_bitmap_set - set an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to set
+ */
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+                             unsigned long pos, unsigned long len)
+{
+       u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+       const unsigned int size = pos + len;
+       int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+       u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+       while (len >= bits_to_set) {
+               *p |= mask_to_set;
+               len -= bits_to_set;
+               bits_to_set = BITS_PER_BYTE;
+               mask_to_set = ~0;
+               p++;
+       }
+       if (len) {
+               mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+               *p |= mask_to_set;
+       }
+}
+
+/*
+ * extent_buffer_bitmap_clear - clear an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to clear
+ */
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+                               unsigned long pos, unsigned long len)
+{
+       u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+       const unsigned int size = pos + len;
+       int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+       u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+       while (len >= bits_to_clear) {
+               *p &= ~mask_to_clear;
+               len -= bits_to_clear;
+               bits_to_clear = BITS_PER_BYTE;
+               mask_to_clear = ~0;
+               p++;
+       }
+       if (len) {
+               mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+               *p &= ~mask_to_clear;
+       }
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+       struct extent_state *state;
+       struct extent_state *prealloc = NULL;
+       struct cache_extent *node;
+       u64 last_end;
+       int err;
+       int set = 0;
+
+again:
+       if (!prealloc) {
+               prealloc = alloc_extent_state();
+               if (!prealloc)
+                       return -ENOMEM;
+       }
+
+       /*
+        * this search will find the extents that end after
+        * our range starts
+        */
+       node = search_cache_extent(&tree->state, start);
+       if (!node)
+               goto out;
+       state = container_of(node, struct extent_state, cache_node);
+       if (state->start > end)
+               goto out;
+       last_end = state->end;
+
+       /*
+        *     | ---- desired range ---- |
+        *  | state | or
+        *  | ------------- state -------------- |
+        *
+        * We need to split the extent we found, and may flip
+        * bits on second half.
+        *
+        * If the extent we found extends past our range, we
+        * just split and search again.  It'll get split again
+        * the next time though.
+        *
+        * If the extent we found is inside our range, we clear
+        * the desired bit on it.
+        */
+       if (state->start < start) {
+               err = split_state(tree, state, prealloc, start);
+               BUG_ON(err == -EEXIST);
+               prealloc = NULL;
+               if (err)
+                       goto out;
+               if (state->end <= end) {
+                       set |= clear_state_bit(tree, state, bits);
+                       if (last_end == (u64)-1)
+                               goto out;
+                       start = last_end + 1;
+               } else {
+                       start = state->start;
+               }
+               goto search_again;
+       }
+       /*
+        * | ---- desired range ---- |
+        *                        | state |
+        * We need to split the extent, and clear the bit
+        * on the first half
+        */
+       if (state->start <= end && state->end > end) {
+               err = split_state(tree, state, prealloc, end + 1);
+               BUG_ON(err == -EEXIST);
+
+               set |= clear_state_bit(tree, prealloc, bits);
+               prealloc = NULL;
+               goto out;
+       }
+
+       start = state->end + 1;
+       set |= clear_state_bit(tree, state, bits);
+       if (last_end == (u64)-1)
+               goto out;
+       start = last_end + 1;
+       goto search_again;
+out:
+       if (prealloc)
+               btrfs_free_extent_state(prealloc);
+       return set;
+
+search_again:
+       if (start > end)
+               goto out;
+       goto again;
+}
+
+/*
+ * set some bits on a range in the tree.
+ */
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+       struct extent_state *state;
+       struct extent_state *prealloc = NULL;
+       struct cache_extent *node;
+       int err = 0;
+       u64 last_start;
+       u64 last_end;
+again:
+       if (!prealloc) {
+               prealloc = alloc_extent_state();
+               if (!prealloc)
+                       return -ENOMEM;
+       }
+
+       /*
+        * this search will find the extents that end after
+        * our range starts
+        */
+       node = search_cache_extent(&tree->state, start);
+       if (!node) {
+               err = insert_state(tree, prealloc, start, end, bits);
+               BUG_ON(err == -EEXIST);
+               prealloc = NULL;
+               goto out;
+       }
+
+       state = container_of(node, struct extent_state, cache_node);
+       last_start = state->start;
+       last_end = state->end;
+
+       /*
+        * | ---- desired range ---- |
+        * | state |
+        *
+        * Just lock what we found and keep going
+        */
+       if (state->start == start && state->end <= end) {
+               state->state |= bits;
+               merge_state(tree, state);
+               if (last_end == (u64)-1)
+                       goto out;
+               start = last_end + 1;
+               goto search_again;
+       }
+       /*
+        *     | ---- desired range ---- |
+        * | state |
+        *   or
+        * | ------------- state -------------- |
+        *
+        * We need to split the extent we found, and may flip bits on
+        * second half.
+        *
+        * If the extent we found extends past our
+        * range, we just split and search again.  It'll get split
+        * again the next time though.
+        *
+        * If the extent we found is inside our range, we set the
+        * desired bit on it.
+        */
+       if (state->start < start) {
+               err = split_state(tree, state, prealloc, start);
+               BUG_ON(err == -EEXIST);
+               prealloc = NULL;
+               if (err)
+                       goto out;
+               if (state->end <= end) {
+                       state->state |= bits;
+                       start = state->end + 1;
+                       merge_state(tree, state);
+                       if (last_end == (u64)-1)
+                               goto out;
+                       start = last_end + 1;
+               } else {
+                       start = state->start;
+               }
+               goto search_again;
+       }
+       /*
+        * | ---- desired range ---- |
+        *     | state | or               | state |
+        *
+        * There's a hole, we need to insert something in it and
+        * ignore the extent we found.
+        */
+       if (state->start > start) {
+               u64 this_end;
+               if (end < last_start)
+                       this_end = end;
+               else
+                       this_end = last_start -1;
+               err = insert_state(tree, prealloc, start, this_end,
+                               bits);
+               BUG_ON(err == -EEXIST);
+               prealloc = NULL;
+               if (err)
+                       goto out;
+               start = this_end + 1;
+               goto search_again;
+       }
+       /*
+        * | ---- desired range ---- |
+        * | ---------- state ---------- |
+        * We need to split the extent, and set the bit
+        * on the first half
+        */
+       err = split_state(tree, state, prealloc, end + 1);
+       BUG_ON(err == -EEXIST);
+
+       state->state |= bits;
+       merge_state(tree, prealloc);
+       prealloc = NULL;
+out:
+       if (prealloc)
+               btrfs_free_extent_state(prealloc);
+       return err;
+search_again:
+       if (start > end)
+               goto out;
+       goto again;
+}
+
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       return set_extent_bits(tree, start, end, EXTENT_DIRTY);
+}
+
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       return clear_extent_bits(tree, start, end, EXTENT_DIRTY);
+}
+
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+                         u64 *start_ret, u64 *end_ret, int bits)
+{
+       struct cache_extent *node;
+       struct extent_state *state;
+       int ret = 1;
+
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = search_cache_extent(&tree->state, start);
+       if (!node)
+               goto out;
+
+       while(1) {
+               state = container_of(node, struct extent_state, cache_node);
+               if (state->end >= start && (state->state & bits)) {
+                       *start_ret = state->start;
+                       *end_ret = state->end;
+                       ret = 0;
+                       break;
+               }
+               node = next_cache_extent(node);
+               if (!node)
+                       break;
+       }
+out:
+       return ret;
+}
+
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  int bits, int filled)
+{
+       struct extent_state *state = NULL;
+       struct cache_extent *node;
+       int bitset = 0;
+
+       node = search_cache_extent(&tree->state, start);
+       while (node && start <= end) {
+               state = container_of(node, struct extent_state, cache_node);
+
+               if (filled && state->start > start) {
+                       bitset = 0;
+                       break;
+               }
+               if (state->start > end)
+                       break;
+               if (state->state & bits) {
+                       bitset = 1;
+                       if (!filled)
+                               break;
+               } else if (filled) {
+                       bitset = 0;
+                       break;
+               }
+               start = state->end + 1;
+               if (start > end)
+                       break;
+               node = next_cache_extent(node);
+               if (!node) {
+                       if (filled)
+                               bitset = 0;
+                       break;
+               }
+       }
+       return bitset;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+{
+       struct cache_extent *node;
+       struct extent_state *state;
+       int ret = 0;
+
+       node = search_cache_extent(&tree->state, start);
+       if (!node) {
+               ret = -ENOENT;
+               goto out;
+       }
+       state = container_of(node, struct extent_state, cache_node);
+       if (state->start != start) {
+               ret = -ENOENT;
+               goto out;
+       }
+       state->xprivate = private;
+out:
+       return ret;
+}
+
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+{
+       struct cache_extent *node;
+       struct extent_state *state;
+       int ret = 0;
+
+       node = search_cache_extent(&tree->state, start);
+       if (!node) {
+               ret = -ENOENT;
+               goto out;
+       }
+       state = container_of(node, struct extent_state, cache_node);
+       if (state->start != start) {
+               ret = -ENOENT;
+               goto out;
+       }
+       *private = state->xprivate;
+out:
+       return ret;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *info,
+                                                  u64 bytenr, u32 blocksize)
+{
+       struct extent_buffer *eb;
+
+       eb = calloc(1, sizeof(struct extent_buffer));
+       if (!eb)
+               return NULL;
+       eb->data = malloc_cache_aligned(blocksize);
+       if (!eb->data) {
+               free(eb);
+               return NULL;
+       }
+
+       eb->start = bytenr;
+       eb->len = blocksize;
+       eb->refs = 1;
+       eb->flags = 0;
+       eb->cache_node.start = bytenr;
+       eb->cache_node.size = blocksize;
+       eb->fs_info = info;
+       memset_extent_buffer(eb, 0, 0, blocksize);
+
+       return eb;
+}
+
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+{
+       struct extent_buffer *new;
+
+       new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
+       if (!new)
+               return NULL;
+
+       copy_extent_buffer(new, src, 0, 0, src->len);
+       new->flags |= EXTENT_BUFFER_DUMMY;
+
+       return new;
+}
+
+static void free_extent_buffer_final(struct extent_buffer *eb)
+{
+       BUG_ON(eb->refs);
+       if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
+               struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+
+               remove_cache_extent(&tree->cache, &eb->cache_node);
+               BUG_ON(tree->cache_size < eb->len);
+               tree->cache_size -= eb->len;
        }
+       free(eb->data);
+       free(eb);
+}
 
-       res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen);
-       if (res == -1 || res != dlen)
-               goto err;
+static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now)
+{
+       if (!eb || IS_ERR(eb))
+               return;
 
-       if (dlen > orig_size) {
-               memcpy(out, dbuf + offset, size);
-               free(dbuf);
-       } else if (offset) {
-               memmove(out, dbuf + offset, size);
+       eb->refs--;
+       BUG_ON(eb->refs < 0);
+       if (eb->refs == 0) {
+               if (eb->flags & EXTENT_DIRTY) {
+                       error(
+                       "dirty eb leak (aborted trans): start %llu len %u",
+                               eb->start, eb->len);
+               }
+               if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
+                       free_extent_buffer_final(eb);
        }
+}
+
+void free_extent_buffer(struct extent_buffer *eb)
+{
+       free_extent_buffer_internal(eb, 1);
+}
 
-       return size;
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+                                        u64 bytenr, u32 blocksize)
+{
+       struct extent_buffer *eb = NULL;
+       struct cache_extent *cache;
 
-err:
-       if (dlen > orig_size)
-               free(dbuf);
-       return -1ULL;
+       cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+       if (cache && cache->start == bytenr &&
+           cache->size == blocksize) {
+               eb = container_of(cache, struct extent_buffer, cache_node);
+               eb->refs++;
+       }
+       return eb;
 }
 
-u64 btrfs_read_extent_reg(struct btrfs_path *path,
-                         struct btrfs_file_extent_item *extent, u64 offset,
-                         u64 size, char *out)
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+                                              u64 start)
 {
-       u64 physical, clen, dlen, orig_size = size;
-       u32 res;
-       char *cbuf, *dbuf;
+       struct extent_buffer *eb = NULL;
+       struct cache_extent *cache;
 
-       clen = extent->disk_num_bytes;
-       dlen = extent->num_bytes;
+       cache = search_cache_extent(&tree->cache, start);
+       if (cache) {
+               eb = container_of(cache, struct extent_buffer, cache_node);
+               eb->refs++;
+       }
+       return eb;
+}
 
-       if (offset > dlen)
-               return -1ULL;
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+                                         u64 bytenr, u32 blocksize)
+{
+       struct extent_buffer *eb;
+       struct extent_io_tree *tree = &fs_info->extent_cache;
+       struct cache_extent *cache;
 
-       if (size > dlen - offset)
-               size = dlen - offset;
+       cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+       if (cache && cache->start == bytenr &&
+           cache->size == blocksize) {
+               eb = container_of(cache, struct extent_buffer, cache_node);
+               eb->refs++;
+       } else {
+               int ret;
 
-       /* sparse extent */
-       if (extent->disk_bytenr == 0) {
-               memset(out, 0, size);
-               return size;
+               if (cache) {
+                       eb = container_of(cache, struct extent_buffer,
+                                         cache_node);
+                       free_extent_buffer(eb);
+               }
+               eb = __alloc_extent_buffer(fs_info, bytenr, blocksize);
+               if (!eb)
+                       return NULL;
+               ret = insert_cache_extent(&tree->cache, &eb->cache_node);
+               if (ret) {
+                       free(eb);
+                       return NULL;
+               }
+               tree->cache_size += blocksize;
        }
+       return eb;
+}
 
-       physical = btrfs_map_logical_to_physical(extent->disk_bytenr);
-       if (physical == -1ULL)
-               return -1ULL;
+/*
+ * Allocate a dummy extent buffer which won't be inserted into extent buffer
+ * cache.
+ *
+ * This mostly allows super block read write using existing eb infrastructure
+ * without pulluting the eb cache.
+ *
+ * This is especially important to avoid injecting eb->start == SZ_64K, as
+ * fuzzed image could have invalid tree bytenr covers super block range,
+ * and cause ref count underflow.
+ */
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+                                               u64 bytenr, u32 blocksize)
+{
+       struct extent_buffer *ret;
 
-       if (extent->compression == BTRFS_COMPRESS_NONE) {
-               physical += extent->offset + offset;
-               if (!btrfs_devread(physical, size, out))
-                       return -1ULL;
+       ret = __alloc_extent_buffer(fs_info, bytenr, blocksize);
+       if (!ret)
+               return NULL;
+
+       ret->flags |= EXTENT_BUFFER_DUMMY;
+
+       return ret;
+}
+
+int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part,
+                         u64 physical, struct extent_buffer *eb,
+                         unsigned long offset, unsigned long len)
+{
+       int ret;
 
-               return size;
+       ret = __btrfs_devread(desc, part, eb->data + offset, len, physical);
+       if (ret < 0)
+               goto out;
+       if (ret != len) {
+               ret = -EIO;
+               goto out;
        }
+       ret = 0;
+out:
+       return ret;
+}
+
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+                        unsigned long start, unsigned long len)
+{
+       return memcmp(eb->data + start, ptrv, len);
+}
 
-       cbuf = malloc_cache_aligned(dlen > size ? clen + dlen : clen);
-       if (!cbuf)
-               return -1ULL;
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+                       unsigned long start, unsigned long len)
+{
+       memcpy(dst, eb->data + start, len);
+}
 
-       if (dlen > orig_size)
-               dbuf = cbuf + clen;
-       else
-               dbuf = out;
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+                        unsigned long start, unsigned long len)
+{
+       memcpy(eb->data + start, src, len);
+}
+
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+                       unsigned long dst_offset, unsigned long src_offset,
+                       unsigned long len)
+{
+       memcpy(dst->data + dst_offset, src->data + src_offset, len);
+}
 
-       if (!btrfs_devread(physical, clen, cbuf))
-               goto err;
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len)
+{
+       memmove(dst->data + dst_offset, dst->data + src_offset, len);
+}
 
-       res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen);
-       if (res == -1)
-               goto err;
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+                         unsigned long start, unsigned long len)
+{
+       memset(eb->data + start, c, len);
+}
 
-       if (dlen > orig_size)
-               memcpy(out, dbuf + offset, size);
-       else
-               memmove(out, dbuf + offset, size);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+                          unsigned long nr)
+{
+       return le_test_bit(nr, (u8 *)eb->data + start);
+}
 
-       free(cbuf);
-       return res;
+int set_extent_buffer_dirty(struct extent_buffer *eb)
+{
+       struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+       if (!(eb->flags & EXTENT_DIRTY)) {
+               eb->flags |= EXTENT_DIRTY;
+               set_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
+               extent_buffer_get(eb);
+       }
+       return 0;
+}
 
-err:
-       free(cbuf);
-       return -1ULL;
+int clear_extent_buffer_dirty(struct extent_buffer *eb)
+{
+       struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+       if (eb->flags & EXTENT_DIRTY) {
+               eb->flags &= ~EXTENT_DIRTY;
+               clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
+               free_extent_buffer(eb);
+       }
+       return 0;
 }
diff --git a/fs/btrfs/extent-io.h b/fs/btrfs/extent-io.h
new file mode 100644 (file)
index 0000000..6b0c87d
--- /dev/null
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from btrfs-progs/extent_io.h
+ *
+ * Modification includes:
+ * - extent_buffer:data
+ *   Use pointer to provide better alignment.
+ * - Remove max_cache_size related interfaces
+ *   Includes free_extent_buffer_nocache()
+ *   As we don't cache eb in U-boot.
+ * - Include headers
+ *
+ * Write related functions are kept as we still need to modify dummy extent
+ * buffers even in RO environment.
+ */
+#ifndef __BTRFS_EXTENT_IO_H__
+#define __BTRFS_EXTENT_IO_H__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <fs_internal.h>
+#include "extent-cache.h"
+
+#define EXTENT_DIRTY           (1U << 0)
+#define EXTENT_WRITEBACK       (1U << 1)
+#define EXTENT_UPTODATE                (1U << 2)
+#define EXTENT_LOCKED          (1U << 3)
+#define EXTENT_NEW             (1U << 4)
+#define EXTENT_DELALLOC                (1U << 5)
+#define EXTENT_DEFRAG          (1U << 6)
+#define EXTENT_DEFRAG_DONE     (1U << 7)
+#define EXTENT_BUFFER_FILLED   (1U << 8)
+#define EXTENT_CSUM            (1U << 9)
+#define EXTENT_BAD_TRANSID     (1U << 10)
+#define EXTENT_BUFFER_DUMMY    (1U << 11)
+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+
+#define BLOCK_GROUP_DATA       (1U << 1)
+#define BLOCK_GROUP_METADATA   (1U << 2)
+#define BLOCK_GROUP_SYSTEM     (1U << 4)
+
+/*
+ * The extent buffer bitmap operations are done with byte granularity instead of
+ * word granularity for two reasons:
+ * 1. The bitmaps must be little-endian on disk.
+ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
+ *    single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+       ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+       (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+static inline int le_test_bit(int nr, const u8 *addr)
+{
+       return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
+}
+
+struct btrfs_fs_info;
+
+struct extent_io_tree {
+       struct cache_tree state;
+       struct cache_tree cache;
+       u64 cache_size;
+};
+
+struct extent_state {
+       struct cache_extent cache_node;
+       u64 start;
+       u64 end;
+       int refs;
+       unsigned long state;
+       u64 xprivate;
+};
+
+struct extent_buffer {
+       struct cache_extent cache_node;
+       u64 start;
+       u32 len;
+       int refs;
+       u32 flags;
+       struct btrfs_fs_info *fs_info;
+       char *data;
+};
+
+static inline void extent_buffer_get(struct extent_buffer *eb)
+{
+       eb->refs++;
+}
+
+void extent_io_tree_init(struct extent_io_tree *tree);
+void extent_io_tree_cleanup(struct extent_io_tree *tree);
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+                         u64 *start_ret, u64 *end_ret, int bits);
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  int bits, int filled);
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+static inline int set_extent_buffer_uptodate(struct extent_buffer *eb)
+{
+       eb->flags |= EXTENT_UPTODATE;
+       return 0;
+}
+
+static inline int clear_extent_buffer_uptodate(struct extent_buffer *eb)
+{
+       eb->flags &= ~EXTENT_UPTODATE;
+       return 0;
+}
+
+static inline int extent_buffer_uptodate(struct extent_buffer *eb)
+{
+       if (!eb || IS_ERR(eb))
+               return 0;
+       if (eb->flags & EXTENT_UPTODATE)
+               return 1;
+       return 0;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 xprivate);
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *xprivate);
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+                                        u64 bytenr, u32 blocksize);
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+                                              u64 start);
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+                                         u64 bytenr, u32 blocksize);
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+                                               u64 bytenr, u32 blocksize);
+void free_extent_buffer(struct extent_buffer *eb);
+int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part,
+                         u64 physical, struct extent_buffer *eb,
+                         unsigned long offset, unsigned long len);
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+                        unsigned long start, unsigned long len);
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+                       unsigned long start, unsigned long len);
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+                        unsigned long start, unsigned long len);
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+                       unsigned long dst_offset, unsigned long src_offset,
+                       unsigned long len);
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+                          unsigned long src_offset, unsigned long len);
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+                         unsigned long start, unsigned long len);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+                          unsigned long nr);
+int set_extent_buffer_dirty(struct extent_buffer *eb);
+int clear_extent_buffer_dirty(struct extent_buffer *eb);
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+                               unsigned long pos, unsigned long len);
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+                             unsigned long pos, unsigned long len);
+
+#endif
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
deleted file mode 100644 (file)
index 52a8cea..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-#include <u-boot/crc.h>
-#include <asm/unaligned.h>
-
-static u32 btrfs_crc32c_table[256];
-
-void btrfs_hash_init(void)
-{
-       static int inited = 0;
-
-       if (!inited) {
-               crc32c_init(btrfs_crc32c_table, 0x82F63B78);
-               inited = 1;
-       }
-}
-
-u32 btrfs_crc32c(u32 crc, const void *data, size_t length)
-{
-       return crc32c_cal(crc, (const char *) data, length,
-                         btrfs_crc32c_table);
-}
-
-u32 btrfs_csum_data(char *data, u32 seed, size_t len)
-{
-       return btrfs_crc32c(seed, data, len);
-}
-
-void btrfs_csum_final(u32 crc, void *result)
-{
-       put_unaligned(cpu_to_le32(~crc), (u32 *)result);
-}
index 991c2f6..ff33028 100644 (file)
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
+#include <linux/kernel.h>
 #include <malloc.h>
+#include <memalign.h>
+#include "btrfs.h"
+#include "disk-io.h"
+#include "volumes.h"
 
-u64 btrfs_lookup_inode_ref(struct btrfs_root *root, u64 inr,
-                          struct btrfs_inode_ref *refp, char *name)
+/*
+ * Read the content of symlink inode @ino of @root, into @target.
+ * NOTE: @target will not be \0 termiated, caller should handle it properly.
+ *
+ * Return the number of read data.
+ * Return <0 for error.
+ */
+int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target)
 {
        struct btrfs_path path;
-       struct btrfs_key *key;
-       struct btrfs_inode_ref *ref;
-       u64 res = -1ULL;
-
-       key = btrfs_search_tree_key_type(root, inr, BTRFS_INODE_REF_KEY,
-                                              &path);
-
-       if (!key)
-               return -1ULL;
-
-       ref = btrfs_path_item_ptr(&path, struct btrfs_inode_ref);
-       btrfs_inode_ref_to_cpu(ref);
-
-       if (refp)
-               *refp = *ref;
+       struct btrfs_key key;
+       struct btrfs_file_extent_item *fi;
+       int ret;
 
-       if (name) {
-               if (ref->name_len > BTRFS_NAME_MAX) {
-                       printf("%s: inode name too long: %u\n", __func__,
-                               ref->name_len);
-                       goto out;
-               }
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = 0;
+       btrfs_init_path(&path);
 
-               memcpy(name, ref + 1, ref->name_len);
+       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+       if (ret < 0)
+               return ret;
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
        }
-
-       res = key->offset;
-out:
-       btrfs_free_path(&path);
-       return res;
-}
-
-int btrfs_lookup_inode(const struct btrfs_root *root,
-                      struct btrfs_key *location,
-                      struct btrfs_inode_item *item,
-                      struct btrfs_root *new_root)
-{
-       struct btrfs_root tmp_root = *root;
-       struct btrfs_path path;
-       int res = -1;
-
-       if (location->type == BTRFS_ROOT_ITEM_KEY) {
-               if (btrfs_find_root(location->objectid, &tmp_root, NULL))
-                       return -1;
-
-               location->objectid = tmp_root.root_dirid;
-               location->type = BTRFS_INODE_ITEM_KEY;
-               location->offset = 0;
+       fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                           struct btrfs_file_extent_item);
+       if (btrfs_file_extent_type(path.nodes[0], fi) !=
+           BTRFS_FILE_EXTENT_INLINE) {
+               ret = -EUCLEAN;
+               error("Extent for symlink %llu must be INLINE type!", ino);
+               goto out;
        }
-
-       if (btrfs_search_tree(&tmp_root, location, &path))
-               return res;
-
-       if (btrfs_comp_keys(location, btrfs_path_leaf_key(&path)))
+       if (btrfs_file_extent_compression(path.nodes[0], fi) !=
+           BTRFS_COMPRESS_NONE) {
+               ret = -EUCLEAN;
+               error("Extent for symlink %llu must not be compressed!", ino);
                goto out;
-
-       if (item) {
-               *item = *btrfs_path_item_ptr(&path, struct btrfs_inode_item);
-               btrfs_inode_item_to_cpu(item);
        }
-
-       if (new_root)
-               *new_root = tmp_root;
-
-       res = 0;
-
+       if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) >=
+           root->fs_info->sectorsize) {
+               ret = -EUCLEAN;
+               error("Symlink %llu extent data too large (%llu)!\n",
+                       ino, btrfs_file_extent_ram_bytes(path.nodes[0], fi));
+               goto out;
+       }
+       read_extent_buffer(path.nodes[0], target,
+                       btrfs_file_extent_inline_start(fi),
+                       btrfs_file_extent_ram_bytes(path.nodes[0], fi));
+       ret = btrfs_file_extent_ram_bytes(path.nodes[0], fi);
 out:
-       btrfs_free_path(&path);
-       return res;
+       btrfs_release_path(&path);
+       return ret;
 }
 
-int btrfs_readlink(const struct btrfs_root *root, u64 inr, char *target)
+static int lookup_root_ref(struct btrfs_fs_info *fs_info,
+                          u64 rootid, u64 *root_ret, u64 *dir_ret)
 {
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_root_ref *root_ref;
        struct btrfs_path path;
        struct btrfs_key key;
-       struct btrfs_file_extent_item *extent;
-       const char *data_ptr;
-       int res = -1;
-
-       key.objectid = inr;
-       key.type = BTRFS_EXTENT_DATA_KEY;
-       key.offset = 0;
-
-       if (btrfs_search_tree(root, &key, &path))
-               return -1;
-
-       if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path)))
-               goto out;
-
-       extent = btrfs_path_item_ptr(&path, struct btrfs_file_extent_item);
-       if (extent->type != BTRFS_FILE_EXTENT_INLINE) {
-               printf("%s: Extent for symlink %llu not of INLINE type\n",
-                      __func__, inr);
+       int ret;
+
+       btrfs_init_path(&path);
+       key.objectid = rootid;
+       key.type = BTRFS_ROOT_BACKREF_KEY;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+       if (ret < 0)
+               return ret;
+       /* Should not happen */
+       if (ret == 0) {
+               ret = -EUCLEAN;
                goto out;
        }
-
-       btrfs_file_extent_item_to_cpu_inl(extent);
-
-       if (extent->compression != BTRFS_COMPRESS_NONE) {
-               printf("%s: Symlink %llu extent data compressed!\n", __func__,
-                      inr);
+       ret = btrfs_previous_item(root, &path, rootid, BTRFS_ROOT_BACKREF_KEY);
+       if (ret < 0)
                goto out;
-       } else if (extent->encryption != 0) {
-               printf("%s: Symlink %llu extent data encrypted!\n", __func__,
-                      inr);
-               goto out;
-       } else if (extent->ram_bytes >= btrfs_info.sb.sectorsize) {
-               printf("%s: Symlink %llu extent data too long (%llu)!\n",
-                      __func__, inr, extent->ram_bytes);
+       if (ret > 0) {
+               ret = -ENOENT;
                goto out;
        }
-
-       data_ptr = (const char *) extent
-                  + offsetof(struct btrfs_file_extent_item, disk_bytenr);
-
-       memcpy(target, data_ptr, extent->ram_bytes);
-       target[extent->ram_bytes] = '\0';
-       res = 0;
+       btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+       root_ref = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                 struct btrfs_root_ref);
+       *root_ret = key.offset;
+       *dir_ret = btrfs_root_ref_dirid(path.nodes[0], root_ref);
 out:
-       btrfs_free_path(&path);
-       return res;
+       btrfs_release_path(&path);
+       return ret;
 }
 
-/* inr must be a directory (for regular files with multiple hard links this
-   function returns only one of the parents of the file) */
-static u64 get_parent_inode(struct btrfs_root *root, u64 inr,
-                           struct btrfs_inode_item *inode_item)
+/*
+ * To get the parent inode of @ino of @root.
+ *
+ * @root_ret and @ino_ret will be filled.
+ *
+ * NOTE: This function is not reliable. It can only get one parent inode.
+ * The get the proper parent inode, we need a full VFS inodes stack to
+ * resolve properly.
+ */
+static int get_parent_inode(struct btrfs_root *root, u64 ino,
+                           struct btrfs_root **root_ret, u64 *ino_ret)
 {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_path path;
        struct btrfs_key key;
-       u64 res;
-
-       if (inr == BTRFS_FIRST_FREE_OBJECTID) {
-               if (root->objectid != btrfs_info.fs_root.objectid) {
-                       u64 parent;
-                       struct btrfs_root_ref ref;
-
-                       parent = btrfs_lookup_root_ref(root->objectid, &ref,
-                                                      NULL);
-                       if (parent == -1ULL)
-                               return -1ULL;
+       int ret;
 
-                       if (btrfs_find_root(parent, root, NULL))
-                               return -1ULL;
+       if (ino == BTRFS_FIRST_FREE_OBJECTID) {
+               u64 parent_root = -1;
 
-                       inr = ref.dirid;
+               /* It's top level already, no more parent */
+               if (root->root_key.objectid == BTRFS_FS_TREE_OBJECTID) {
+                       *root_ret = fs_info->fs_root;
+                       *ino_ret = BTRFS_FIRST_FREE_OBJECTID;
+                       return 0;
                }
 
-               if (inode_item) {
-                       key.objectid = inr;
-                       key.type = BTRFS_INODE_ITEM_KEY;
-                       key.offset = 0;
+               ret = lookup_root_ref(fs_info, root->root_key.objectid,
+                                     &parent_root, ino_ret);
+               if (ret < 0)
+                       return ret;
 
-                       if (btrfs_lookup_inode(root, &key, inode_item, NULL))
-                               return -1ULL;
-               }
+               key.objectid = parent_root;
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+               *root_ret = btrfs_read_fs_root(fs_info, &key);
+               if (IS_ERR(*root_ret))
+                       return PTR_ERR(*root_ret);
 
-               return inr;
+               return 0;
        }
 
-       res = btrfs_lookup_inode_ref(root, inr, NULL, NULL);
-       if (res == -1ULL)
-               return -1ULL;
-
-       if (inode_item) {
-               key.objectid = res;
-               key.type = BTRFS_INODE_ITEM_KEY;
-               key.offset = 0;
-
-               if (btrfs_lookup_inode(root, &key, inode_item, NULL))
-                       return -1ULL;
+       btrfs_init_path(&path);
+       key.objectid = ino;
+       key.type = BTRFS_INODE_REF_KEY;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+       if (ret < 0)
+               return ret;
+       /* Should not happen */
+       if (ret == 0) {
+               ret = -EUCLEAN;
+               goto out;
        }
-
-       return res;
+       ret = btrfs_previous_item(root, &path, ino, BTRFS_INODE_REF_KEY);
+       if (ret < 0)
+               goto out;
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+       btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+       *root_ret = root;
+       *ino_ret = key.offset;
+out:
+       btrfs_release_path(&path);
+       return ret;
 }
 
 static inline int next_length(const char *path)
 {
        int res = 0;
-       while (*path != '\0' && *path != '/' && res <= BTRFS_NAME_LEN)
-               ++res, ++path;
+       while (*path != '\0' && *path != '/') {
+               ++res;
+               ++path;
+               if (res > BTRFS_NAME_LEN)
+                       break;
+       }
        return res;
 }
 
@@ -209,175 +203,549 @@ static inline const char *skip_current_directories(const char *cur)
        return cur;
 }
 
-u64 btrfs_lookup_path(struct btrfs_root *root, u64 inr, const char *path,
-                     u8 *type_p, struct btrfs_inode_item *inode_item_p,
-                     int symlink_limit)
+/*
+ * Resolve one filename of @ino of @root.
+ *
+ * key_ret:    The child key (either INODE_ITEM or ROOT_ITEM type)
+ * type_ret:   BTRFS_FT_* of the child inode.
+ *
+ * Return 0 with above members filled.
+ * Return <0 for error.
+ */
+static int resolve_one_filename(struct btrfs_root *root, u64 ino,
+                               const char *name, int namelen,
+                               struct btrfs_key *key_ret, u8 *type_ret)
 {
-       struct btrfs_dir_item item;
-       struct btrfs_inode_item inode_item;
-       u8 type = BTRFS_FT_DIR;
-       int len, have_inode = 0;
-       const char *cur = path;
+       struct btrfs_dir_item *dir_item;
+       struct btrfs_path path;
+       int ret = 0;
+
+       btrfs_init_path(&path);
+
+       dir_item = btrfs_lookup_dir_item(NULL, root, &path, ino, name,
+                                        namelen, 0);
+       if (IS_ERR(dir_item)) {
+               ret = PTR_ERR(dir_item);
+               goto out;
+       }
 
+       btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
+       *type_ret = btrfs_dir_type(path.nodes[0], dir_item);
+out:
+       btrfs_release_path(&path);
+       return ret;
+}
+
+/*
+ * Resolve a full path @filename. The start point is @ino of @root.
+ *
+ * The result will be filled into @root_ret, @ino_ret and @type_ret.
+ */
+int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
+                       struct btrfs_root **root_ret, u64 *ino_ret,
+                       u8 *type_ret, int symlink_limit)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_root *next_root;
+       struct btrfs_key key;
+       const char *cur = filename;
+       u64 next_ino;
+       u8 next_type;
+       u8 type;
+       int len;
+       int ret = 0;
+
+       /* If the path is absolute path, also search from fs root */
        if (*cur == '/') {
-               ++cur;
-               inr = root->root_dirid;
+               root = fs_info->fs_root;
+               ino = btrfs_root_dirid(&root->root_item);
+               type = BTRFS_FT_DIR;
        }
 
-       do {
+       while (*cur != '\0') {
                cur = skip_current_directories(cur);
 
                len = next_length(cur);
                if (len > BTRFS_NAME_LEN) {
-                       printf("%s: Name too long at \"%.*s\"\n", __func__,
+                       error("%s: Name too long at \"%.*s\"", __func__,
                               BTRFS_NAME_LEN, cur);
-                       return -1ULL;
+                       return -ENAMETOOLONG;
                }
 
                if (len == 1 && cur[0] == '.')
                        break;
 
                if (len == 2 && cur[0] == '.' && cur[1] == '.') {
-                       cur += 2;
-                       inr = get_parent_inode(root, inr, &inode_item);
-                       if (inr == -1ULL)
-                               return -1ULL;
-
-                       type = BTRFS_FT_DIR;
-                       continue;
+                       /* Go one level up */
+                       ret = get_parent_inode(root, ino, &next_root, &next_ino);
+                       if (ret < 0)
+                               return ret;
+                       root = next_root;
+                       ino = next_ino;
+                       goto next;
                }
 
                if (!*cur)
                        break;
-               
-               if (btrfs_lookup_dir_item(root, inr, cur, len, &item))
-                       return -1ULL;
 
-               type = item.type;
-               have_inode = 1;
-               if (btrfs_lookup_inode(root, &item.location, &inode_item, root))
-                       return -1ULL;
+               ret = resolve_one_filename(root, ino, cur, len, &key, &type);
+               if (ret < 0)
+                       return ret;
+
+               if (key.type == BTRFS_ROOT_ITEM_KEY) {
+                       /* Child inode is a subvolume */
+
+                       next_root = btrfs_read_fs_root(fs_info, &key);
+                       if (IS_ERR(next_root))
+                               return PTR_ERR(next_root);
+                       root = next_root;
+                       ino = btrfs_root_dirid(&root->root_item);
+               } else if (type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
+                       /* Child inode is a symlink */
 
-               if (item.type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
                        char *target;
 
-                       if (!symlink_limit) {
-                               printf("%s: Too much symlinks!\n", __func__);
-                               return -1ULL;
+                       if (symlink_limit == 0) {
+                               error("%s: Too much symlinks!", __func__);
+                               return -EMLINK;
                        }
-
-                       target = malloc(min(inode_item.size + 1,
-                                           (u64) btrfs_info.sb.sectorsize));
+                       target = malloc(fs_info->sectorsize);
                        if (!target)
-                               return -1ULL;
-
-                       if (btrfs_readlink(root, item.location.objectid,
-                                          target)) {
+                               return -ENOMEM;
+                       ret = btrfs_readlink(root, key.objectid, target);
+                       if (ret < 0) {
                                free(target);
-                               return -1ULL;
+                               return ret;
                        }
+                       target[ret] = '\0';
+
+                       ret = btrfs_lookup_path(root, ino, target, &next_root,
+                                               &next_ino, &next_type,
+                                               symlink_limit);
+                       if (ret < 0)
+                               return ret;
+                       root = next_root;
+                       ino = next_ino;
+                       type = next_type;
+               } else {
+                       /* Child inode is an inode */
+                       ino = key.objectid;
+               }
+next:
+               cur += len;
+       }
 
-                       inr = btrfs_lookup_path(root, inr, target, &type,
-                                               &inode_item, symlink_limit - 1);
+       if (!ret) {
+               *root_ret = root;
+               *ino_ret = ino;
+               *type_ret = type;
+       }
 
-                       free(target);
+       return ret;
+}
 
-                       if (inr == -1ULL)
-                               return -1ULL;
-               } else if (item.type != BTRFS_FT_DIR && cur[len]) {
-                       printf("%s: \"%.*s\" not a directory\n", __func__,
-                              (int) (cur - path + len), path);
-                       return -1ULL;
-               } else {
-                       inr = item.location.objectid;
-               }
+/*
+ * Read out inline extent.
+ *
+ * Since inline extent should only exist for offset 0, no need for extra
+ * parameters.
+ * Truncating should be handled by the caller.
+ *
+ * Return the number of bytes read.
+ * Return <0 for error.
+ */
+int btrfs_read_extent_inline(struct btrfs_path *path,
+                            struct btrfs_file_extent_item *fi, char *dest)
+{
+       struct extent_buffer *leaf = path->nodes[0];
+       int slot = path->slots[0];
+       char *cbuf = NULL;
+       char *dbuf = NULL;
+       u32 csize;
+       u32 dsize;
+       int ret;
+
+       csize = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(slot));
+       if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
+               /* Uncompressed, just read it out */
+               read_extent_buffer(leaf, dest,
+                               btrfs_file_extent_inline_start(fi),
+                               csize);
+               return csize;
+       }
 
-               cur += len;
-       } while (*cur);
+       /* Compressed extent, prepare the compressed and data buffer */
+       dsize = btrfs_file_extent_ram_bytes(leaf, fi);
+       cbuf = malloc(csize);
+       dbuf = malloc(dsize);
+       if (!cbuf || !dbuf) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       read_extent_buffer(leaf, cbuf, btrfs_file_extent_inline_start(fi),
+                          csize);
+       ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi),
+                              cbuf, csize, dbuf, dsize);
+       if (ret < 0 || ret != dsize) {
+               ret = -EIO;
+               goto out;
+       }
+       memcpy(dest, dbuf, dsize);
+       ret = dsize;
+out:
+       free(cbuf);
+       free(dbuf);
+       return ret;
+}
 
-       if (type_p)
-               *type_p = type;
+/*
+ * Read out regular extent.
+ *
+ * Truncating should be handled by the caller.
+ *
+ * @offset and @len should not cross the extent boundary.
+ * Return the number of bytes read.
+ * Return <0 for error.
+ */
+int btrfs_read_extent_reg(struct btrfs_path *path,
+                         struct btrfs_file_extent_item *fi, u64 offset,
+                         int len, char *dest)
+{
+       struct extent_buffer *leaf = path->nodes[0];
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
+       struct btrfs_key key;
+       u64 extent_num_bytes;
+       u64 disk_bytenr;
+       u64 read;
+       char *cbuf = NULL;
+       char *dbuf = NULL;
+       u32 csize;
+       u32 dsize;
+       bool finished = false;
+       int num_copies;
+       int i;
+       int slot = path->slots[0];
+       int ret;
+
+       btrfs_item_key_to_cpu(leaf, &key, slot);
+       extent_num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+       ASSERT(IS_ALIGNED(offset, fs_info->sectorsize) &&
+              IS_ALIGNED(len, fs_info->sectorsize));
+       ASSERT(offset >= key.offset &&
+              offset + len <= key.offset + extent_num_bytes);
+
+       /* Preallocated or hole , fill @dest with zero */
+       if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_PREALLOC ||
+           btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+               memset(dest, 0, len);
+               return len;
+       }
 
-       if (inode_item_p) {
-               if (!have_inode) {
-                       struct btrfs_key key;
+       if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
+               u64 logical;
 
-                       key.objectid = inr;
-                       key.type = BTRFS_INODE_ITEM_KEY;
-                       key.offset = 0;
+               logical = btrfs_file_extent_disk_bytenr(leaf, fi) +
+                         btrfs_file_extent_offset(leaf, fi) +
+                         offset - key.offset;
+               read = len;
 
-                       if (btrfs_lookup_inode(root, &key, &inode_item, NULL))
-                               return -1ULL;
+               num_copies = btrfs_num_copies(fs_info, logical, len);
+               for (i = 1; i <= num_copies; i++) {
+                       ret = read_extent_data(fs_info, dest, logical, &read, i);
+                       if (ret < 0 || read != len)
+                               continue;
+                       finished = true;
+                       break;
                }
+               if (!finished)
+                       return -EIO;
+               return len;
+       }
+
+       csize = btrfs_file_extent_disk_num_bytes(leaf, fi);
+       dsize = btrfs_file_extent_ram_bytes(leaf, fi);
+       disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+       num_copies = btrfs_num_copies(fs_info, disk_bytenr, csize);
 
-               *inode_item_p = inode_item;
+       cbuf = malloc_cache_aligned(csize);
+       dbuf = malloc_cache_aligned(dsize);
+       if (!cbuf || !dbuf) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       /* For compressed extent, we must read the whole on-disk extent */
+       for (i = 1; i <= num_copies; i++) {
+               read = csize;
+               ret = read_extent_data(fs_info, cbuf, disk_bytenr,
+                                      &read, i);
+               if (ret < 0 || read != csize)
+                       continue;
+               finished = true;
+               break;
+       }
+       if (!finished) {
+               ret = -EIO;
+               goto out;
        }
 
-       return inr;
+       ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi), cbuf,
+                              csize, dbuf, dsize);
+       if (ret != dsize) {
+               ret = -EIO;
+               goto out;
+       }
+       /* Then copy the needed part */
+       memcpy(dest, dbuf + btrfs_file_extent_offset(leaf, fi), len);
+       ret = len;
+out:
+       free(cbuf);
+       free(dbuf);
+       return ret;
 }
 
-u64 btrfs_file_read(const struct btrfs_root *root, u64 inr, u64 offset,
-                   u64 size, char *buf)
+/*
+ * Get the first file extent that covers bytenr @file_offset.
+ *
+ * @file_offset must be aligned to sectorsize.
+ *
+ * return 0 for found, and path points to the file extent.
+ * return >0 for not found, and fill @next_offset.
+ * @next_offset can be 0 if there is no next file extent.
+ * return <0 for error.
+ */
+static int lookup_data_extent(struct btrfs_root *root, struct btrfs_path *path,
+                             u64 ino, u64 file_offset, u64 *next_offset)
 {
-       struct btrfs_path path;
        struct btrfs_key key;
-       struct btrfs_file_extent_item *extent;
-       int res = 0;
-       u64 rd, rd_all = -1ULL;
+       struct btrfs_file_extent_item *fi;
+       u8 extent_type;
+       int ret = 0;
 
-       key.objectid = inr;
+       ASSERT(IS_ALIGNED(file_offset, root->fs_info->sectorsize));
+       key.objectid = ino;
        key.type = BTRFS_EXTENT_DATA_KEY;
-       key.offset = offset;
-
-       if (btrfs_search_tree(root, &key, &path))
-               return -1ULL;
-
-       if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path)) < 0) {
-               if (btrfs_prev_slot(&path))
-                       goto out;
+       key.offset = file_offset;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       /* Error or we're already at the file extent */
+       if (ret <= 0)
+               return ret;
+       if (ret > 0) {
+               /* Check previous file extent */
+               ret = btrfs_previous_item(root, path, ino,
+                                         BTRFS_EXTENT_DATA_KEY);
+               if (ret < 0)
+                       return ret;
+               if (ret > 0)
+                       goto check_next;
+       }
+       /* Now the key.offset must be smaller than @file_offset */
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+       if (key.objectid != ino ||
+           key.type != BTRFS_EXTENT_DATA_KEY)
+               goto check_next;
+
+       fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                           struct btrfs_file_extent_item);
+       extent_type = btrfs_file_extent_type(path->nodes[0], fi);
+       if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+               if (file_offset == 0)
+                       return 0;
+               /* Inline extent should be the only extent, no next extent. */
+               *next_offset = 0;
+               return 1;
+       }
 
-               if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
-                       goto out;
+       /* This file extent covers @file_offset */
+       if (key.offset <= file_offset && key.offset +
+           btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
+               return 0;
+check_next:
+       ret = btrfs_next_item(root, path);
+       if (ret < 0)
+               return ret;
+       if (ret > 0) {
+               *next_offset = 0;
+               return 1;
        }
 
-       rd_all = 0;
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+       fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                           struct btrfs_file_extent_item);
+       /* Next next data extent */
+       if (key.objectid != ino ||
+           key.type != BTRFS_EXTENT_DATA_KEY) {
+               *next_offset = 0;
+               return 1;
+       }
+       /* Current file extent already beyond @file_offset */
+       if (key.offset > file_offset) {
+               *next_offset = key.offset;
+               return 1;
+       }
+       /* This file extent covers @file_offset */
+       if (key.offset <= file_offset && key.offset +
+           btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
+               return 0;
+       /* This file extent ends before @file_offset, check next */
+       ret = btrfs_next_item(root, path);
+       if (ret < 0)
+               return ret;
+       if (ret > 0) {
+               *next_offset = 0;
+               return 1;
+       }
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+       if (key.type != BTRFS_EXTENT_DATA_KEY || key.objectid != ino) {
+               *next_offset = 0;
+               return 1;
+       }
+       *next_offset = key.offset;
+       return 1;
+}
 
-       do {
-               if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
-                       break;
+static int read_and_truncate_page(struct btrfs_path *path,
+                                 struct btrfs_file_extent_item *fi,
+                                 int start, int len, char *dest)
+{
+       struct extent_buffer *leaf = path->nodes[0];
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
+       u64 aligned_start = round_down(start, fs_info->sectorsize);
+       u8 extent_type;
+       char *buf;
+       int page_off = start - aligned_start;
+       int page_len = fs_info->sectorsize - page_off;
+       int ret;
+
+       ASSERT(start + len <= aligned_start + fs_info->sectorsize);
+       buf = malloc_cache_aligned(fs_info->sectorsize);
+       if (!buf)
+               return -ENOMEM;
+
+       extent_type = btrfs_file_extent_type(leaf, fi);
+       if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+               ret = btrfs_read_extent_inline(path, fi, buf);
+               memcpy(dest, buf + page_off, min(page_len, ret));
+               free(buf);
+               return len;
+       }
 
-               extent = btrfs_path_item_ptr(&path,
-                                            struct btrfs_file_extent_item);
+       ret = btrfs_read_extent_reg(path, fi,
+                       round_down(start, fs_info->sectorsize),
+                       fs_info->sectorsize, buf);
+       if (ret < 0) {
+               free(buf);
+               return ret;
+       }
+       memcpy(dest, buf + page_off, page_len);
+       free(buf);
+       return len;
+}
 
-               if (extent->type == BTRFS_FILE_EXTENT_INLINE) {
-                       btrfs_file_extent_item_to_cpu_inl(extent);
-                       rd = btrfs_read_extent_inline(&path, extent, offset,
-                                                     size, buf);
+int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
+                   char *dest)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_path path;
+       struct btrfs_key key;
+       u64 aligned_start = round_down(file_offset, fs_info->sectorsize);
+       u64 aligned_end = round_down(file_offset + len, fs_info->sectorsize);
+       u64 next_offset;
+       u64 cur = aligned_start;
+       int ret = 0;
+
+       btrfs_init_path(&path);
+
+       /* Set the whole dest all zero, so we won't need to bother holes */
+       memset(dest, 0, len);
+
+       /* Read out the leading unaligned part */
+       if (aligned_start != file_offset) {
+               ret = lookup_data_extent(root, &path, ino, aligned_start,
+                                        &next_offset);
+               if (ret < 0)
+                       goto out;
+               if (ret == 0) {
+                       /* Read the unaligned part out*/
+                       fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                       struct btrfs_file_extent_item);
+                       ret = read_and_truncate_page(&path, fi, file_offset,
+                                       round_up(file_offset, fs_info->sectorsize) -
+                                       file_offset, dest);
+                       if (ret < 0)
+                               goto out;
+                       cur += fs_info->sectorsize;
                } else {
-                       btrfs_file_extent_item_to_cpu(extent);
-                       rd = btrfs_read_extent_reg(&path, extent, offset, size,
-                                                  buf);
+                       /* The whole file is a hole */
+                       if (!next_offset) {
+                               memset(dest, 0, len);
+                               return len;
+                       }
+                       cur = next_offset;
                }
+       }
+
+       /* Read the aligned part */
+       while (cur < aligned_end) {
+               u64 extent_num_bytes;
+               u8 type;
 
-               if (rd == -1ULL) {
-                       printf("%s: Error reading extent\n", __func__);
-                       rd_all = -1;
+               btrfs_release_path(&path);
+               ret = lookup_data_extent(root, &path, ino, cur, &next_offset);
+               if (ret < 0)
                        goto out;
+               if (ret > 0) {
+                       /* No next, direct exit */
+                       if (!next_offset) {
+                               ret = 0;
+                               goto out;
+                       }
+               }
+               fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                   struct btrfs_file_extent_item);
+               btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+               type = btrfs_file_extent_type(path.nodes[0], fi);
+               if (type == BTRFS_FILE_EXTENT_INLINE) {
+                       ret = btrfs_read_extent_inline(&path, fi, dest);
+                       goto out;
+               }
+               /* Skip holes, as we have zeroed the dest */
+               if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+                   btrfs_file_extent_disk_bytenr(path.nodes[0], fi) == 0) {
+                       cur = key.offset + btrfs_file_extent_num_bytes(
+                                       path.nodes[0], fi);
+                       continue;
                }
 
-               offset = 0;
-               buf += rd;
-               rd_all += rd;
-               size -= rd;
-
-               if (!size)
-                       break;
-       } while (!(res = btrfs_next_slot(&path)));
-
-       if (res)
-               return -1ULL;
+               /* Read the remaining part of the extent */
+               extent_num_bytes = btrfs_file_extent_num_bytes(path.nodes[0],
+                                                              fi);
+               ret = btrfs_read_extent_reg(&path, fi, cur,
+                               min(extent_num_bytes, aligned_end - cur),
+                               dest + cur - file_offset);
+               if (ret < 0)
+                       goto out;
+               cur += min(extent_num_bytes, aligned_end - cur);
+       }
 
+       /* Read the tailing unaligned part*/
+       if (file_offset + len != aligned_end) {
+               btrfs_release_path(&path);
+               ret = lookup_data_extent(root, &path, ino, aligned_end,
+                                        &next_offset);
+               /* <0 is error, >0 means no extent */
+               if (ret)
+                       goto out;
+               fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                   struct btrfs_file_extent_item);
+               ret = read_and_truncate_page(&path, fi, aligned_end,
+                               file_offset + len - aligned_end,
+                               dest + aligned_end - file_offset);
+       }
 out:
-       btrfs_free_path(&path);
-       return rd_all;
+       btrfs_release_path(&path);
+       if (ret < 0)
+               return ret;
+       return len;
 }
diff --git a/fs/btrfs/kernel-shared/btrfs_tree.h b/fs/btrfs/kernel-shared/btrfs_tree.h
new file mode 100644 (file)
index 0000000..6a76d1e
--- /dev/null
@@ -0,0 +1,1333 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copied from kernel/include/uapi/linux/btrfs_btree.h.
+ *
+ * Only modified the header.
+ */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __BTRFS_TREE_H__
+#define __BTRFS_TREE_H__
+
+#include <linux/types.h>
+
+#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
+
+/*
+ * The max metadata block size (node size).
+ *
+ * This limit is somewhat artificial. The memmove and tree block locking cost
+ * go up with larger node size.
+ */
+#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
+
+/*
+ * We can actually store much bigger names, but lets not confuse the rest
+ * of linux.
+ *
+ * btrfs_dir_item::name_len follows this limitation.
+ */
+#define BTRFS_NAME_LEN 255
+
+/*
+ * Objectids start from here.
+ *
+ * Check btrfs_disk_key for the meaning of objectids.
+ */
+
+/*
+ * Root tree holds pointers to all of the tree roots.
+ * Without special mention, the root tree contains the root bytenr of all other
+ * trees, except the chunk tree and the log tree.
+ *
+ * The super block contains the root bytenr of this tree.
+ */
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/*
+ * Extent tree stores information about which extents are in use, and backrefs
+ * for each extent.
+ */
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/*
+ * Chunk tree stores btrfs logical address -> physical address mapping.
+ *
+ * The super block contains part of chunk tree for bootstrap, and contains
+ * the root bytenr of this tree.
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
+
+/*
+ * Device tree stores info about which areas of a given device are in use,
+ * and physical address -> btrfs logical address mapping.
+ */
+#define BTRFS_DEV_TREE_OBJECTID 4ULL
+
+/* The fs tree is the first subvolume tree, storing files and directories. */
+#define BTRFS_FS_TREE_OBJECTID 5ULL
+
+/* Shows the directory objectid inside the root tree. */
+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+
+/* Csum tree holds checksums of all the data extents. */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+
+/* Quota tree holds quota configuration and tracking. */
+#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+
+/* UUID tree stores items that use the BTRFS_UUID_KEY* types. */
+#define BTRFS_UUID_TREE_OBJECTID 9ULL
+
+/* Free space cache tree (v2 space cache) tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
+/* Indicates device stats in the device tree. */
+#define BTRFS_DEV_STATS_OBJECTID 0ULL
+
+/* For storing balance parameters in the root tree. */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
+/* Orhpan objectid for tracking unlinked/truncated files. */
+#define BTRFS_ORPHAN_OBJECTID -5ULL
+
+/* Does write ahead logging to speed up fsyncs. */
+#define BTRFS_TREE_LOG_OBJECTID -6ULL
+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+
+/* For space balancing. */
+#define BTRFS_TREE_RELOC_OBJECTID -8ULL
+#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
+
+/* Extent checksums, shared between the csum tree and log trees. */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
+/* For storing free space cache (v1 space cache). */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
+/* The inode number assigned to the special inode for storing free ino cache. */
+#define BTRFS_FREE_INO_OBJECTID -12ULL
+
+/* Dummy objectid represents multiple objectids. */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
+/* All files have objectids in this range. */
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
+
+
+/*
+ * The device items go into the chunk tree.
+ *
+ * The key is in the form
+ * (BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY,  <device_id>)
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
+#define BTRFS_DEV_REPLACE_DEVID 0ULL
+
+/*
+ * Types start from here.
+ *
+ * Check btrfs_disk_key for details about types.
+ */
+
+/*
+ * Inode items have the data typically returned from stat and store other
+ * info about object characteristics.
+ *
+ * There is one for every file and dir in the FS.
+ */
+#define BTRFS_INODE_ITEM_KEY           1
+/* reserve 2-11 close to the inode for later flexibility */
+#define BTRFS_INODE_REF_KEY            12
+#define BTRFS_INODE_EXTREF_KEY         13
+#define BTRFS_XATTR_ITEM_KEY           24
+#define BTRFS_ORPHAN_ITEM_KEY          48
+
+/*
+ * Dir items are the name -> inode pointers in a directory.
+ *
+ * There is one for every name in a directory.
+ */
+#define BTRFS_DIR_LOG_ITEM_KEY  60
+#define BTRFS_DIR_LOG_INDEX_KEY 72
+#define BTRFS_DIR_ITEM_KEY     84
+#define BTRFS_DIR_INDEX_KEY    96
+
+/* Stores info (position, size ...) about a data extent of a file */
+#define BTRFS_EXTENT_DATA_KEY  108
+
+/*
+ * Extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
+ */
+#define BTRFS_EXTENT_CSUM_KEY  128
+
+/*
+ * Root items point to tree roots.
+ *
+ * They are typically in the root tree used by the super block to find all the
+ * other trees.
+ */
+#define BTRFS_ROOT_ITEM_KEY    132
+
+/*
+ * Root backrefs tie subvols and snapshots to the directory entries that
+ * reference them.
+ */
+#define BTRFS_ROOT_BACKREF_KEY 144
+
+/*
+ * Root refs make a fast index for listing all of the snapshots and
+ * subvolumes referenced by a given root.  They point directly to the
+ * directory item in the root that references the subvol.
+ */
+#define BTRFS_ROOT_REF_KEY     156
+
+/*
+ * Extent items are in the extent tree.
+ *
+ * These record which blocks are used, and how many references there are.
+ */
+#define BTRFS_EXTENT_ITEM_KEY  168
+
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY        169
+
+#define BTRFS_TREE_BLOCK_REF_KEY       176
+
+#define BTRFS_EXTENT_DATA_REF_KEY      178
+
+#define BTRFS_EXTENT_REF_V0_KEY                180
+
+#define BTRFS_SHARED_BLOCK_REF_KEY     182
+
+#define BTRFS_SHARED_DATA_REF_KEY      184
+
+/*
+ * Block groups give us hints into the extent allocation trees.
+ *
+ * Stores how many free space there is in a block group.
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
+
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents.
+ *
+ * A free space bitmap is keyed on (start, FREE_SPACE_BITMAP, length).
+ * The corresponding item is a bitmap with (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
+#define BTRFS_DEV_EXTENT_KEY   204
+#define BTRFS_DEV_ITEM_KEY     216
+#define BTRFS_CHUNK_ITEM_KEY   228
+
+/*
+ * Records the overall state of the qgroups.
+ *
+ * There's only one instance of this key present,
+ * (0, BTRFS_QGROUP_STATUS_KEY, 0)
+ */
+#define BTRFS_QGROUP_STATUS_KEY         240
+/*
+ * Records the currently used space of the qgroup.
+ *
+ * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_INFO_KEY           242
+
+/*
+ * Contains the user configured limits for the qgroup.
+ *
+ * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_LIMIT_KEY          244
+
+/*
+ * Records the child-parent relationship of qgroups. For
+ * each relation, 2 keys are present:
+ * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
+ * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
+ */
+#define BTRFS_QGROUP_RELATION_KEY       246
+
+/* Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. */
+#define BTRFS_BALANCE_ITEM_KEY 248
+
+/*
+ * The key type for tree items that are stored persistently, but do not need to
+ * exist for extended period of time. The items can exist in any tree.
+ *
+ * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - balance status item
+ *   (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
+ */
+#define BTRFS_TEMPORARY_ITEM_KEY       248
+
+/* Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY */
+#define BTRFS_DEV_STATS_KEY            249
+
+/*
+ * The key type for tree items that are stored persistently and usually exist
+ * for a long period, eg. filesystem lifetime. The item kinds can be status
+ * information, stats or preference values. The item can exist in any tree.
+ *
+ * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - device statistics, store IO stats in the device tree, one key for all
+ *   stats
+ *   (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
+ */
+#define BTRFS_PERSISTENT_ITEM_KEY      249
+
+/*
+ * Persistently stores the device replace state in the device tree.
+ *
+ * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
+ */
+#define BTRFS_DEV_REPLACE_KEY  250
+
+/*
+ * Stores items that allow to quickly map UUIDs to something else.
+ *
+ * These items are part of the filesystem UUID tree.
+ * The key is built like this:
+ * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
+ */
+#define BTRFS_UUID_KEY_SUBVOL  251     /* for UUIDs assigned to subvols */
+#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252     /* for UUIDs assigned to
+                                                * received subvols */
+
+/*
+ * String items are for debugging.
+ *
+ * They just store a short string of data in the FS.
+ */
+#define BTRFS_STRING_ITEM_KEY  253
+
+
+
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+
+/* Csum types */
+enum btrfs_csum_type {
+       BTRFS_CSUM_TYPE_CRC32   = 0,
+       BTRFS_CSUM_TYPE_XXHASH  = 1,
+       BTRFS_CSUM_TYPE_SHA256  = 2,
+       BTRFS_CSUM_TYPE_BLAKE2  = 3,
+};
+
+/*
+ * Flags definitions for directory entry item type.
+ *
+ * Used by:
+ * struct btrfs_dir_item.type
+ *
+ * Values 0..7 must match common file type values in fs_types.h.
+ */
+#define BTRFS_FT_UNKNOWN       0
+#define BTRFS_FT_REG_FILE      1
+#define BTRFS_FT_DIR           2
+#define BTRFS_FT_CHRDEV                3
+#define BTRFS_FT_BLKDEV                4
+#define BTRFS_FT_FIFO          5
+#define BTRFS_FT_SOCK          6
+#define BTRFS_FT_SYMLINK       7
+#define BTRFS_FT_XATTR         8
+#define BTRFS_FT_MAX           9
+
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_UUID_SIZE 16
+
+/*
+ * The key defines the order in the tree, and so it also defines (optimal)
+ * block layout.
+ *
+ * Objectid and offset are interpreted based on type.
+ * While normally for objectid, it either represents a root number, or an
+ * inode number.
+ *
+ * Type tells us things about the object, and is a kind of stream selector.
+ * Check the following URL for full references about btrfs_disk_key/btrfs_key:
+ * https://btrfs.wiki.kernel.org/index.php/Btree_Items
+ *
+ * btrfs_disk_key is in disk byte order.  struct btrfs_key is always
+ * in cpu native order.  Otherwise they are identical and their sizes
+ * should be the same (ie both packed)
+ */
+struct btrfs_disk_key {
+       __le64 objectid;
+       __u8 type;
+       __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_key {
+       __u64 objectid;
+       __u8 type;
+       __u64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_dev_item {
+       /* The internal btrfs device id */
+       __le64 devid;
+
+       /* Size of the device */
+       __le64 total_bytes;
+
+       /* Bytes used */
+       __le64 bytes_used;
+
+       /* Optimal io alignment for this device */
+       __le32 io_align;
+
+       /* Optimal io width for this device */
+       __le32 io_width;
+
+       /* Minimal io size for this device */
+       __le32 sector_size;
+
+       /* Type and info about this device */
+       __le64 type;
+
+       /* Expected generation for this device */
+       __le64 generation;
+
+       /*
+        * Starting byte of this partition on the device,
+        * to allow for stripe alignment in the future.
+        */
+       __le64 start_offset;
+
+       /* Grouping information for allocation decisions */
+       __le32 dev_group;
+
+       /* Optimal seek speed 0-100 where 100 is fastest */
+       __u8 seek_speed;
+
+       /* Optimal bandwidth 0-100 where 100 is fastest */
+       __u8 bandwidth;
+
+       /* Btrfs generated uuid for this device */
+       __u8 uuid[BTRFS_UUID_SIZE];
+
+       /* UUID of FS who owns this device */
+       __u8 fsid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+       __le64 devid;
+       __le64 offset;
+       __u8 dev_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+       /* Size of this chunk in bytes */
+       __le64 length;
+
+       /* Objectid of the root referencing this chunk */
+       __le64 owner;
+
+       __le64 stripe_len;
+       __le64 type;
+
+       /* Optimal io alignment for this chunk */
+       __le32 io_align;
+
+       /* Optimal io width for this chunk */
+       __le32 io_width;
+
+       /* Minimal io size for this chunk */
+       __le32 sector_size;
+
+       /*
+        * 2^16 stripes is quite a lot, a second limit is the size of a single
+        * item in the btree.
+        */
+       __le16 num_stripes;
+
+       /* Sub stripes only matter for raid10 */
+       __le16 sub_stripes;
+       struct btrfs_stripe stripe;
+       /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_EXTENT        1
+#define BTRFS_FREE_SPACE_BITMAP        2
+
+struct btrfs_free_space_entry {
+       __le64 offset;
+       __le64 bytes;
+       __u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+       struct btrfs_disk_key location;
+       __le64 generation;
+       __le64 num_entries;
+       __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
+#define BTRFS_HEADER_FLAG_WRITTEN      (1ULL << 0)
+#define BTRFS_HEADER_FLAG_RELOC                (1ULL << 1)
+
+/* Super block flags */
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR         (1ULL << 2)
+
+#define BTRFS_SUPER_FLAG_SEEDING       (1ULL << 32)
+#define BTRFS_SUPER_FLAG_METADUMP      (1ULL << 33)
+#define BTRFS_SUPER_FLAG_METADUMP_V2   (1ULL << 34)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
+
+
+/*
+ * Items in the extent tree are used to record the objectid of the
+ * owner of the block and the number of references.
+ */
+struct btrfs_extent_item {
+       __le64 refs;
+       __le64 generation;
+       __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_item_v0 {
+       __le32 refs;
+} __attribute__ ((__packed__));
+
+
+#define BTRFS_EXTENT_FLAG_DATA         (1ULL << 0)
+#define BTRFS_EXTENT_FLAG_TREE_BLOCK   (1ULL << 1)
+
+/* Use full backrefs for extent pointers in the block */
+#define BTRFS_BLOCK_FLAG_FULL_BACKREF  (1ULL << 8)
+
+/*
+ * This flag is only used internally by scrub and may be changed at any time
+ * it is only declared here to avoid collisions.
+ */
+#define BTRFS_EXTENT_FLAG_SUPER                (1ULL << 48)
+
+struct btrfs_tree_block_info {
+       struct btrfs_disk_key key;
+       __u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_data_ref {
+       __le64 root;
+       __le64 objectid;
+       __le64 offset;
+       __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_shared_data_ref {
+       __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_inline_ref {
+       __u8 type;
+       __le64 offset;
+} __attribute__ ((__packed__));
+
+/* Old style backrefs item */
+struct btrfs_extent_ref_v0 {
+       __le64 root;
+       __le64 generation;
+       __le64 objectid;
+       __le32 count;
+} __attribute__ ((__packed__));
+
+
+/* Dev extents record used space on individual devices.
+ *
+ * The owner field points back to the chunk allocation mapping tree that
+ * allocated the extent.
+ * The chunk tree uuid field is a way to double check the owner.
+ */
+struct btrfs_dev_extent {
+       __le64 chunk_tree;
+       __le64 chunk_objectid;
+       __le64 chunk_offset;
+       __le64 length;
+       __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_ref {
+       __le64 index;
+       __le16 name_len;
+       /* Name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_extref {
+       __le64 parent_objectid;
+       __le64 index;
+       __le16 name_len;
+       __u8   name[0];
+       /* Name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+       __le64 sec;
+       __le32 nsec;
+} __attribute__ ((__packed__));
+
+/* Inode flags */
+#define BTRFS_INODE_NODATASUM          (1 << 0)
+#define BTRFS_INODE_NODATACOW          (1 << 1)
+#define BTRFS_INODE_READONLY           (1 << 2)
+#define BTRFS_INODE_NOCOMPRESS         (1 << 3)
+#define BTRFS_INODE_PREALLOC           (1 << 4)
+#define BTRFS_INODE_SYNC               (1 << 5)
+#define BTRFS_INODE_IMMUTABLE          (1 << 6)
+#define BTRFS_INODE_APPEND             (1 << 7)
+#define BTRFS_INODE_NODUMP             (1 << 8)
+#define BTRFS_INODE_NOATIME            (1 << 9)
+#define BTRFS_INODE_DIRSYNC            (1 << 10)
+#define BTRFS_INODE_COMPRESS           (1 << 11)
+
+#define BTRFS_INODE_ROOT_ITEM_INIT     (1 << 31)
+
+#define BTRFS_INODE_FLAG_MASK                                          \
+       (BTRFS_INODE_NODATASUM |                                        \
+        BTRFS_INODE_NODATACOW |                                        \
+        BTRFS_INODE_READONLY |                                         \
+        BTRFS_INODE_NOCOMPRESS |                                       \
+        BTRFS_INODE_PREALLOC |                                         \
+        BTRFS_INODE_SYNC |                                             \
+        BTRFS_INODE_IMMUTABLE |                                        \
+        BTRFS_INODE_APPEND |                                           \
+        BTRFS_INODE_NODUMP |                                           \
+        BTRFS_INODE_NOATIME |                                          \
+        BTRFS_INODE_DIRSYNC |                                          \
+        BTRFS_INODE_COMPRESS |                                         \
+        BTRFS_INODE_ROOT_ITEM_INIT)
+
+struct btrfs_inode_item {
+       /* Nfs style generation number */
+       __le64 generation;
+       /* Transid that last touched this inode */
+       __le64 transid;
+       __le64 size;
+       __le64 nbytes;
+       __le64 block_group;
+       __le32 nlink;
+       __le32 uid;
+       __le32 gid;
+       __le32 mode;
+       __le64 rdev;
+       __le64 flags;
+
+       /* Modification sequence number for NFS */
+       __le64 sequence;
+
+       /*
+        * A little future expansion, for more than this we can just grow the
+        * inode item and version it
+        */
+       __le64 reserved[4];
+       struct btrfs_timespec atime;
+       struct btrfs_timespec ctime;
+       struct btrfs_timespec mtime;
+       struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_log_item {
+       __le64 end;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_item {
+       struct btrfs_disk_key location;
+       __le64 transid;
+       __le16 data_len;
+       __le16 name_len;
+       __u8 type;
+} __attribute__ ((__packed__));
+
+#define BTRFS_ROOT_SUBVOL_RDONLY       (1ULL << 0)
+
+/*
+ * Internal in-memory flag that a subvolume has been marked for deletion but
+ * still visible as a directory
+ */
+#define BTRFS_ROOT_SUBVOL_DEAD         (1ULL << 48)
+
+struct btrfs_root_item {
+       struct btrfs_inode_item inode;
+       __le64 generation;
+       __le64 root_dirid;
+       __le64 bytenr;
+       __le64 byte_limit;
+       __le64 bytes_used;
+       __le64 last_snapshot;
+       __le64 flags;
+       __le32 refs;
+       struct btrfs_disk_key drop_progress;
+       __u8 drop_level;
+       __u8 level;
+
+       /*
+        * The following fields appear after subvol_uuids+subvol_times
+        * were introduced.
+        */
+
+       /*
+        * This generation number is used to test if the new fields are valid
+        * and up to date while reading the root item. Every time the root item
+        * is written out, the "generation" field is copied into this field. If
+        * anyone ever mounted the fs with an older kernel, we will have
+        * mismatching generation values here and thus must invalidate the
+        * new fields. See btrfs_update_root and btrfs_find_last_root for
+        * details.
+        * The offset of generation_v2 is also used as the start for the memset
+        * when invalidating the fields.
+        */
+       __le64 generation_v2;
+       __u8 uuid[BTRFS_UUID_SIZE];
+       __u8 parent_uuid[BTRFS_UUID_SIZE];
+       __u8 received_uuid[BTRFS_UUID_SIZE];
+       __le64 ctransid; /* Updated when an inode changes */
+       __le64 otransid; /* Trans when created */
+       __le64 stransid; /* Trans when sent. Non-zero for received subvol. */
+       __le64 rtransid; /* Trans when received. Non-zero for received subvol.*/
+       struct btrfs_timespec ctime;
+       struct btrfs_timespec otime;
+       struct btrfs_timespec stime;
+       struct btrfs_timespec rtime;
+       __le64 reserved[8]; /* For future */
+} __attribute__ ((__packed__));
+
+/* This is used for both forward and backward root refs */
+struct btrfs_root_ref {
+       __le64 dirid;
+       __le64 sequence;
+       __le16 name_len;
+} __attribute__ ((__packed__));
+
+struct btrfs_disk_balance_args {
+       /*
+        * Profiles to operate on.
+        *
+        * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE.
+        */
+       __le64 profiles;
+
+       /*
+        * Usage filter
+        * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
+        * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
+        */
+       union {
+               __le64 usage;
+               struct {
+                       __le32 usage_min;
+                       __le32 usage_max;
+               };
+       };
+
+       /* Devid filter */
+       __le64 devid;
+
+       /* Devid subset filter [pstart..pend) */
+       __le64 pstart;
+       __le64 pend;
+
+       /* Btrfs virtual address space subset filter [vstart..vend) */
+       __le64 vstart;
+       __le64 vend;
+
+       /*
+        * Profile to convert to.
+        *
+        * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE.
+        */
+       __le64 target;
+
+       /* BTRFS_BALANCE_ARGS_* */
+       __le64 flags;
+
+       /*
+        * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'.
+        * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
+        * and maximum.
+        */
+       union {
+               __le64 limit;
+               struct {
+                       __le32 limit_min;
+                       __le32 limit_max;
+               };
+       };
+
+       /*
+        * Process chunks that cross stripes_min..stripes_max devices,
+        * BTRFS_BALANCE_ARGS_STRIPES_RANGE.
+        */
+       __le32 stripes_min;
+       __le32 stripes_max;
+
+       __le64 unused[6];
+} __attribute__ ((__packed__));
+
+/*
+ * Stores balance parameters to disk so that balance can be properly
+ * resumed after crash or unmount.
+ */
+struct btrfs_balance_item {
+       /* BTRFS_BALANCE_* */
+       __le64 flags;
+
+       struct btrfs_disk_balance_args data;
+       struct btrfs_disk_balance_args meta;
+       struct btrfs_disk_balance_args sys;
+
+       __le64 unused[4];
+} __attribute__ ((__packed__));
+
+enum {
+       BTRFS_FILE_EXTENT_INLINE   = 0,
+       BTRFS_FILE_EXTENT_REG      = 1,
+       BTRFS_FILE_EXTENT_PREALLOC = 2,
+       BTRFS_NR_FILE_EXTENT_TYPES = 3,
+};
+
+enum btrfs_compression_type {
+       BTRFS_COMPRESS_NONE  = 0,
+       BTRFS_COMPRESS_ZLIB  = 1,
+       BTRFS_COMPRESS_LZO   = 2,
+       BTRFS_COMPRESS_ZSTD  = 3,
+       BTRFS_NR_COMPRESS_TYPES = 4,
+};
+
+struct btrfs_file_extent_item {
+       /* Transaction id that created this extent */
+       __le64 generation;
+       /*
+        * Max number of bytes to hold this extent in ram.
+        *
+        * When we split a compressed extent we can't know how big each of the
+        * resulting pieces will be.  So, this is an upper limit on the size of
+        * the extent in ram instead of an exact limit.
+        */
+       __le64 ram_bytes;
+
+       /*
+        * 32 bits for the various ways we might encode the data,
+        * including compression and encryption.  If any of these
+        * are set to something a given disk format doesn't understand
+        * it is treated like an incompat flag for reading and writing,
+        * but not for stat.
+        */
+       __u8 compression;
+       __u8 encryption;
+       __le16 other_encoding; /* Spare for later use */
+
+       /* Are we inline data or a real extent? */
+       __u8 type;
+
+       /*
+        * Disk space consumed by the extent, checksum blocks are not included
+        * in these numbers
+        *
+        * At this offset in the structure, the inline extent data start.
+        */
+       __le64 disk_bytenr;
+       __le64 disk_num_bytes;
+
+       /*
+        * The logical offset inside the file extent.
+        *
+        * This allows a file extent to point into the middle of an existing
+        * extent on disk, sharing it between two snapshots (useful if some
+        * bytes in the middle of the extent have changed).
+        */
+       __le64 offset;
+
+       /*
+        * The logical number of bytes this file extent is referencing (no
+        * csums included).
+        *
+        * This always reflects the size uncompressed and without encoding.
+        */
+       __le64 num_bytes;
+
+} __attribute__ ((__packed__));
+
+struct btrfs_csum_item {
+       __u8 csum;
+} __attribute__ ((__packed__));
+
+enum btrfs_dev_stat_values {
+       /* Disk I/O failure stats */
+       BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
+       BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
+       BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
+
+       /* Stats for indirect indications for I/O failures */
+       BTRFS_DEV_STAT_CORRUPTION_ERRS, /* Checksum error, bytenr error or
+                                        * contents is illegal: this is an
+                                        * indication that the block was damaged
+                                        * during read or write, or written to
+                                        * wrong location or read from wrong
+                                        * location */
+       BTRFS_DEV_STAT_GENERATION_ERRS, /* An indication that blocks have not
+                                        * been written */
+
+       BTRFS_DEV_STAT_VALUES_MAX
+};
+
+struct btrfs_dev_stats_item {
+       /*
+        * Grow this item struct at the end for future enhancements and keep
+        * the existing values unchanged.
+        */
+       __le64 values[BTRFS_DEV_STAT_VALUES_MAX];
+} __attribute__ ((__packed__));
+
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS    0
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID     1
+
+struct btrfs_dev_replace_item {
+       /*
+        * Grow this item struct at the end for future enhancements and keep
+        * the existing values unchanged.
+        */
+       __le64 src_devid;
+       __le64 cursor_left;
+       __le64 cursor_right;
+       __le64 cont_reading_from_srcdev_mode;
+
+       __le64 replace_state;
+       __le64 time_started;
+       __le64 time_stopped;
+       __le64 num_write_errors;
+       __le64 num_uncorrectable_read_errors;
+} __attribute__ ((__packed__));
+
+/* Different types of block groups (and chunks) */
+#define BTRFS_BLOCK_GROUP_DATA         (1ULL << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM       (1ULL << 1)
+#define BTRFS_BLOCK_GROUP_METADATA     (1ULL << 2)
+#define BTRFS_BLOCK_GROUP_RAID0                (1ULL << 3)
+#define BTRFS_BLOCK_GROUP_RAID1                (1ULL << 4)
+#define BTRFS_BLOCK_GROUP_DUP          (1ULL << 5)
+#define BTRFS_BLOCK_GROUP_RAID10       (1ULL << 6)
+#define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
+#define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_RAID1C3       (1ULL << 9)
+#define BTRFS_BLOCK_GROUP_RAID1C4       (1ULL << 10)
+#define BTRFS_BLOCK_GROUP_RESERVED     (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
+                                        BTRFS_SPACE_INFO_GLOBAL_RSV)
+
+enum btrfs_raid_types {
+       BTRFS_RAID_RAID10,
+       BTRFS_RAID_RAID1,
+       BTRFS_RAID_DUP,
+       BTRFS_RAID_RAID0,
+       BTRFS_RAID_SINGLE,
+       BTRFS_RAID_RAID5,
+       BTRFS_RAID_RAID6,
+       BTRFS_RAID_RAID1C3,
+       BTRFS_RAID_RAID1C4,
+       BTRFS_NR_RAID_TYPES
+};
+
+#define BTRFS_BLOCK_GROUP_TYPE_MASK    (BTRFS_BLOCK_GROUP_DATA |    \
+                                        BTRFS_BLOCK_GROUP_SYSTEM |  \
+                                        BTRFS_BLOCK_GROUP_METADATA)
+
+#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 |   \
+                                        BTRFS_BLOCK_GROUP_RAID1 |   \
+                                        BTRFS_BLOCK_GROUP_RAID1C3 | \
+                                        BTRFS_BLOCK_GROUP_RAID1C4 | \
+                                        BTRFS_BLOCK_GROUP_RAID5 |   \
+                                        BTRFS_BLOCK_GROUP_RAID6 |   \
+                                        BTRFS_BLOCK_GROUP_DUP |     \
+                                        BTRFS_BLOCK_GROUP_RAID10)
+#define BTRFS_BLOCK_GROUP_RAID56_MASK  (BTRFS_BLOCK_GROUP_RAID5 |   \
+                                        BTRFS_BLOCK_GROUP_RAID6)
+
+#define BTRFS_BLOCK_GROUP_RAID1_MASK   (BTRFS_BLOCK_GROUP_RAID1 |   \
+                                        BTRFS_BLOCK_GROUP_RAID1C3 | \
+                                        BTRFS_BLOCK_GROUP_RAID1C4)
+
+/*
+ * We need a bit for restriper to be able to tell when chunks of type
+ * SINGLE are available.  This "extended" profile format is used in
+ * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
+ * (on-disk).  The corresponding on-disk bit in chunk.type is reserved
+ * to avoid remappings between two formats in future.
+ */
+#define BTRFS_AVAIL_ALLOC_BIT_SINGLE   (1ULL << 48)
+
+/*
+ * A fake block group type that is used to communicate global block reserve
+ * size to userspace via the SPACE_INFO ioctl.
+ */
+#define BTRFS_SPACE_INFO_GLOBAL_RSV    (1ULL << 49)
+
+#define BTRFS_EXTENDED_PROFILE_MASK    (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
+                                        BTRFS_AVAIL_ALLOC_BIT_SINGLE)
+
+static inline __u64 chunk_to_extended(__u64 flags)
+{
+       if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
+               flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+       return flags;
+}
+static inline __u64 extended_to_chunk(__u64 flags)
+{
+       return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+}
+
+struct btrfs_block_group_item {
+       __le64 used;
+       __le64 chunk_objectid;
+       __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_info {
+       __le32 extent_count;
+       __le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
+#define BTRFS_QGROUP_LEVEL_SHIFT               48
+static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
+{
+       return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+}
+
+/* Is subvolume quota turned on? */
+#define BTRFS_QGROUP_STATUS_FLAG_ON            (1ULL << 0)
+
+/* Is qgroup rescan running? */
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN                (1ULL << 1)
+
+/*
+ * Some qgroup entries are known to be out of date, either because the
+ * configuration has changed in a way that makes a rescan necessary, or
+ * because the fs has been mounted with a non-qgroup-aware version.
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT  (1ULL << 2)
+
+#define BTRFS_QGROUP_STATUS_VERSION        1
+
+struct btrfs_qgroup_status_item {
+       __le64 version;
+       /*
+        * The generation is updated during every commit. As older
+        * versions of btrfs are not aware of qgroups, it will be
+        * possible to detect inconsistencies by checking the
+        * generation on mount time.
+        */
+       __le64 generation;
+
+       /* Flag definitions see above */
+       __le64 flags;
+
+       /*
+        * Only used during scanning to record the progress of the scan.
+        * It contains a logical address.
+        */
+       __le64 rescan;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_info_item {
+       __le64 generation;
+       __le64 rfer;
+       __le64 rfer_cmpr;
+       __le64 excl;
+       __le64 excl_cmpr;
+} __attribute__ ((__packed__));
+
+/*
+ * Flags definition for qgroup limits
+ *
+ * Used by:
+ * struct btrfs_qgroup_limit.flags
+ * struct btrfs_qgroup_limit_item.flags
+ */
+#define BTRFS_QGROUP_LIMIT_MAX_RFER    (1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL    (1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER    (1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL    (1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR   (1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR   (1ULL << 5)
+
+struct btrfs_qgroup_limit_item {
+       /* Only updated when any of the other values change. */
+       __le64 flags;
+       __le64 max_rfer;
+       __le64 max_excl;
+       __le64 rsv_rfer;
+       __le64 rsv_excl;
+} __attribute__ ((__packed__));
+
+/*
+ * Just in case we somehow lose the roots and are not able to mount,
+ * we store an array of the roots from previous transactions in the super.
+ */
+#define BTRFS_NUM_BACKUP_ROOTS 4
+struct btrfs_root_backup {
+       __le64 tree_root;
+       __le64 tree_root_gen;
+
+       __le64 chunk_root;
+       __le64 chunk_root_gen;
+
+       __le64 extent_root;
+       __le64 extent_root_gen;
+
+       __le64 fs_root;
+       __le64 fs_root_gen;
+
+       __le64 dev_root;
+       __le64 dev_root_gen;
+
+       __le64 csum_root;
+       __le64 csum_root_gen;
+
+       __le64 total_bytes;
+       __le64 bytes_used;
+       __le64 num_devices;
+       /* future */
+       __le64 unused_64[4];
+
+       u8 tree_root_level;
+       u8 chunk_root_level;
+       u8 extent_root_level;
+       u8 fs_root_level;
+       u8 dev_root_level;
+       u8 csum_root_level;
+       /* future and to align */
+       u8 unused_8[10];
+} __attribute__ ((__packed__));
+
+/*
+ * This is a very generous portion of the super block, giving us room to
+ * translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+
+#define BTRFS_LABEL_SIZE 256
+
+/* The super block basically lists the main trees of the FS. */
+struct btrfs_super_block {
+       /* The first 4 fields must match struct btrfs_header */
+       u8 csum[BTRFS_CSUM_SIZE];
+       /* FS specific UUID, visible to user */
+       u8 fsid[BTRFS_FSID_SIZE];
+       __le64 bytenr; /* this block number */
+       __le64 flags;
+
+       /* Allowed to be different from the btrfs_header from here own down. */
+       __le64 magic;
+       __le64 generation;
+       __le64 root;
+       __le64 chunk_root;
+       __le64 log_root;
+
+       /* This will help find the new super based on the log root. */
+       __le64 log_root_transid;
+       __le64 total_bytes;
+       __le64 bytes_used;
+       __le64 root_dir_objectid;
+       __le64 num_devices;
+       __le32 sectorsize;
+       __le32 nodesize;
+       __le32 __unused_leafsize;
+       __le32 stripesize;
+       __le32 sys_chunk_array_size;
+       __le64 chunk_root_generation;
+       __le64 compat_flags;
+       __le64 compat_ro_flags;
+       __le64 incompat_flags;
+       __le16 csum_type;
+       u8 root_level;
+       u8 chunk_root_level;
+       u8 log_root_level;
+       struct btrfs_dev_item dev_item;
+
+       char label[BTRFS_LABEL_SIZE];
+
+       __le64 cache_generation;
+       __le64 uuid_tree_generation;
+
+       /* The UUID written into btree blocks */
+       u8 metadata_uuid[BTRFS_FSID_SIZE];
+
+       /* Future expansion */
+       __le64 reserved[28];
+       u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+       struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
+} __attribute__ ((__packed__));
+
+/*
+ * Feature flags
+ *
+ * Used by:
+ * struct btrfs_super_block::(compat|compat_ro|incompat)_flags
+ * struct btrfs_ioctl_feature_flags
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE                (1ULL << 0)
+
+/*
+ * Older kernels (< 4.9) on big-endian systems produced broken free space tree
+ * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
+ * < 4.7.3).  If this bit is clear, then the free space tree cannot be trusted.
+ * btrfs-progs can also intentionally clear this bit to ask the kernel to
+ * rebuild the free space tree, however this might not work on older kernels
+ * that do not know about this bit. If not sure, clear the cache manually on
+ * first mount when booting older kernel versions.
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID  (1ULL << 1)
+
+#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF   (1ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL  (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS    (1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO    (1ULL << 3)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD   (1ULL << 4)
+
+/*
+ * Older kernels tried to do bigger metadata blocks, but the
+ * code was pretty buggy.  Lets not let them try anymore.
+ */
+#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA    (1ULL << 5)
+
+#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF   (1ULL << 6)
+#define BTRFS_FEATURE_INCOMPAT_RAID56          (1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_NO_HOLES                (1ULL << 9)
+#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID   (1ULL << 10)
+#define BTRFS_FEATURE_INCOMPAT_RAID1C34                (1ULL << 11)
+
+/*
+ * Compat flags that we support.
+ *
+ * If any incompat flags are set other than the ones specified below then we
+ * will fail to mount.
+ */
+#define BTRFS_FEATURE_COMPAT_SUPP              0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_SET          0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR                0ULL
+
+#define BTRFS_FEATURE_COMPAT_RO_SUPP                   \
+       (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |      \
+        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
+
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET       0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR     0ULL
+
+#define BTRFS_FEATURE_INCOMPAT_SUPP                    \
+       (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
+        BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
+        BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |          \
+        BTRFS_FEATURE_INCOMPAT_BIG_METADATA |          \
+        BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |          \
+        BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD |         \
+        BTRFS_FEATURE_INCOMPAT_RAID56 |                \
+        BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |         \
+        BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |       \
+        BTRFS_FEATURE_INCOMPAT_NO_HOLES        |       \
+        BTRFS_FEATURE_INCOMPAT_METADATA_UUID   |       \
+        BTRFS_FEATURE_INCOMPAT_RAID1C34)
+
+#define BTRFS_FEATURE_INCOMPAT_SAFE_SET                        \
+       (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR              0ULL
+
+#define BTRFS_BACKREF_REV_MAX          256
+#define BTRFS_BACKREF_REV_SHIFT                56
+#define BTRFS_BACKREF_REV_MASK         (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
+                                        BTRFS_BACKREF_REV_SHIFT)
+
+#define BTRFS_OLD_BACKREF_REV          0
+#define BTRFS_MIXED_BACKREF_REV                1
+
+#define BTRFS_MAX_LEVEL 8
+
+/* Every tree block (leaf or node) starts with this header. */
+struct btrfs_header {
+       /* These first four must match the super block */
+       u8 csum[BTRFS_CSUM_SIZE];
+       u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+       __le64 bytenr; /* Which block this node is supposed to live in */
+       __le64 flags;
+
+       /* Allowed to be different from the super from here on down. */
+       u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+       __le64 generation;
+       __le64 owner;
+       __le32 nritems;
+       u8 level;
+} __attribute__ ((__packed__));
+
+/*
+ * A leaf is full of items. Offset and size tell us where to find
+ * the item in the leaf (relative to the start of the data area).
+ */
+struct btrfs_item {
+       struct btrfs_disk_key key;
+       __le32 offset;
+       __le32 size;
+} __attribute__ ((__packed__));
+
+/*
+ * leaves have an item area and a data area:
+ * [item0, item1....itemN] [free space] [dataN...data1, data0]
+ *
+ * The data is separate from the items to get the keys closer together
+ * during searches.
+ */
+struct btrfs_leaf {
+       struct btrfs_header header;
+       struct btrfs_item items[];
+} __attribute__ ((__packed__));
+
+/*
+ * All non-leaf blocks are nodes, they hold only keys and pointers to children
+ * blocks.
+ */
+struct btrfs_key_ptr {
+       struct btrfs_disk_key key;
+       __le64 blockptr;
+       __le64 generation;
+} __attribute__ ((__packed__));
+
+struct btrfs_node {
+       struct btrfs_header header;
+       struct btrfs_key_ptr ptrs[];
+} __attribute__ ((__packed__));
+
+#endif /* __BTRFS_TREE_H__ */
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
new file mode 100644 (file)
index 0000000..a39ad72
--- /dev/null
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "ctree.h"
+
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+                       struct btrfs_root_item *item, struct btrfs_key *key)
+{
+       struct btrfs_path *path;
+       struct btrfs_key search_key;
+       struct btrfs_key found_key;
+       struct extent_buffer *l;
+       int ret;
+       int slot;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       search_key.objectid = objectid;
+       search_key.type = BTRFS_ROOT_ITEM_KEY;
+       search_key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       if (path->slots[0] == 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       BUG_ON(ret == 0);
+       l = path->nodes[0];
+       slot = path->slots[0] - 1;
+       btrfs_item_key_to_cpu(l, &found_key, slot);
+       if (found_key.type != BTRFS_ROOT_ITEM_KEY ||
+           found_key.objectid != objectid) {
+               ret = -ENOENT;
+               goto out;
+       }
+       read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+                          sizeof(*item));
+       memcpy(key, &found_key, sizeof(found_key));
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
diff --git a/fs/btrfs/root.c b/fs/btrfs/root.c
deleted file mode 100644 (file)
index 127b67f..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-
-static void read_root_item(struct btrfs_path *p, struct btrfs_root_item *item)
-{
-       u32 len;
-       int reset = 0;
-
-       len = btrfs_path_item_size(p);
-       memcpy(item, btrfs_path_item_ptr(p, struct btrfs_root_item), len);
-       btrfs_root_item_to_cpu(item);
-
-       if (len < sizeof(*item))
-               reset = 1;
-       if (!reset && item->generation != item->generation_v2) {
-               if (item->generation_v2 != 0)
-                       printf("%s: generation != generation_v2 in root item",
-                              __func__);
-               reset = 1;
-       }
-       if (reset) {
-               memset(&item->generation_v2, 0,
-                      sizeof(*item) - offsetof(struct btrfs_root_item,
-                                               generation_v2));
-       }
-}
-
-int btrfs_find_root(u64 objectid, struct btrfs_root *root,
-                   struct btrfs_root_item *root_item)
-{
-       struct btrfs_path path;
-       struct btrfs_root_item my_root_item;
-
-       if (!btrfs_search_tree_key_type(&btrfs_info.tree_root, objectid,
-                                       BTRFS_ROOT_ITEM_KEY, &path))
-               return -1;
-
-       if (!root_item)
-               root_item = &my_root_item;
-       read_root_item(&path, root_item);
-
-       if (root) {
-               root->objectid = objectid;
-               root->bytenr = root_item->bytenr;
-               root->root_dirid = root_item->root_dirid;
-       }
-
-       btrfs_free_path(&path);
-       return 0;
-}
-
-u64 btrfs_lookup_root_ref(u64 subvolid, struct btrfs_root_ref *refp, char *name)
-{
-       struct btrfs_path path;
-       struct btrfs_key *key;
-       struct btrfs_root_ref *ref;
-       u64 res = -1ULL;
-
-       key = btrfs_search_tree_key_type(&btrfs_info.tree_root, subvolid,
-                                              BTRFS_ROOT_BACKREF_KEY, &path);
-
-       if (!key)
-               return -1ULL;
-
-       ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref);
-       btrfs_root_ref_to_cpu(ref);
-
-       if (refp)
-               *refp = *ref;
-
-       if (name) {
-               if (ref->name_len > BTRFS_VOL_NAME_MAX) {
-                       printf("%s: volume name too long: %u\n", __func__,
-                              ref->name_len);
-                       goto out;
-               }
-
-               memcpy(name, ref + 1, ref->name_len);
-       }
-
-       res = key->offset;
-out:
-       btrfs_free_path(&path);
-       return res;
-}
-
index 06e54f3..2815673 100644 (file)
  * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
  */
 
-#include "btrfs.h"
 #include <malloc.h>
+#include "ctree.h"
+#include "btrfs.h"
+#include "disk-io.h"
 
-static int get_subvol_name(u64 subvolid, char *name, int max_len)
+/*
+ * Resolve the path of ino inside subvolume @root into @path_ret.
+ *
+ * @path_ret must be at least PATH_MAX size.
+ */
+static int get_path_in_subvol(struct btrfs_root *root, u64 ino, char *path_ret)
 {
-       struct btrfs_root_ref rref;
-       struct btrfs_inode_ref iref;
-       struct btrfs_root root;
-       u64 dir;
-       char tmp[max(BTRFS_VOL_NAME_MAX, BTRFS_NAME_MAX)];
-       char *ptr;
-
-       ptr = name + max_len - 1;
-       *ptr = '\0';
-
-       while (subvolid != BTRFS_FS_TREE_OBJECTID) {
-               subvolid = btrfs_lookup_root_ref(subvolid, &rref, tmp);
-
-               if (subvolid == -1ULL)
-                       return -1;
-
-               ptr -= rref.name_len + 1;
-               if (ptr < name)
-                       goto too_long;
-
-               memcpy(ptr + 1, tmp, rref.name_len);
-               *ptr = '/';
-
-               if (btrfs_find_root(subvolid, &root, NULL))
-                       return -1;
-
-               dir = rref.dirid;
-
-               while (dir != BTRFS_FIRST_FREE_OBJECTID) {
-                       dir = btrfs_lookup_inode_ref(&root, dir, &iref, tmp);
-
-                       if (dir == -1ULL)
-                               return -1;
-
-                       ptr -= iref.name_len + 1;
-                       if (ptr < name)
-                               goto too_long;
-
-                       memcpy(ptr + 1, tmp, iref.name_len);
-                       *ptr = '/';
+       struct btrfs_path path;
+       struct btrfs_key key;
+       char *tmp;
+       u64 cur = ino;
+       int ret = 0;
+
+       tmp = malloc(PATH_MAX);
+       if (!tmp)
+               return -ENOMEM;
+       tmp[0] = '\0';
+
+       btrfs_init_path(&path);
+       while (cur != BTRFS_FIRST_FREE_OBJECTID) {
+               struct btrfs_inode_ref *iref;
+               int name_len;
+
+               btrfs_release_path(&path);
+               key.objectid = cur;
+               key.type = BTRFS_INODE_REF_KEY;
+               key.offset = (u64)-1;
+
+               ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+               /* Impossible */
+               if (ret == 0)
+                       ret = -EUCLEAN;
+               if (ret < 0)
+                       goto out;
+               ret = btrfs_previous_item(root, &path, cur,
+                                         BTRFS_INODE_REF_KEY);
+               if (ret > 0)
+                       ret = -ENOENT;
+               if (ret < 0)
+                       goto out;
+
+               strncpy(tmp, path_ret, PATH_MAX);
+               iref = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                     struct btrfs_inode_ref);
+               name_len = btrfs_inode_ref_name_len(path.nodes[0],
+                                                   iref);
+               if (name_len > BTRFS_NAME_LEN) {
+                       ret = -ENAMETOOLONG;
+                       goto out;
                }
+               read_extent_buffer(path.nodes[0], path_ret,
+                                  (unsigned long)(iref + 1), name_len);
+               path_ret[name_len] = '/';
+               path_ret[name_len + 1] = '\0';
+               strncat(path_ret, tmp, PATH_MAX);
+
+               btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+               cur = key.offset;
        }
-
-       if (ptr == name + max_len - 1) {
-               name[0] = '/';
-               name[1] = '\0';
-       } else {
-               memmove(name, ptr, name + max_len - ptr);
-       }
-
-       return 0;
-
-too_long:
-       printf("%s: subvolume name too long\n", __func__);
-       return -1;
+out:
+       btrfs_release_path(&path);
+       free(tmp);
+       return ret;
 }
 
-u64 btrfs_get_default_subvol_objectid(void)
+static int list_one_subvol(struct btrfs_root *root, char *path_ret)
 {
-       struct btrfs_dir_item item;
-
-       if (btrfs_lookup_dir_item(&btrfs_info.tree_root,
-                                 btrfs_info.sb.root_dir_objectid, "default", 7,
-                                 &item))
-               return BTRFS_FS_TREE_OBJECTID;
-       return item.location.objectid;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_path path;
+       struct btrfs_key key;
+       char *tmp;
+       u64 cur = root->root_key.objectid;
+       int ret = 0;
+
+       tmp = malloc(PATH_MAX);
+       if (!tmp)
+               return -ENOMEM;
+       tmp[0] = '\0';
+       path_ret[0] = '\0';
+       btrfs_init_path(&path);
+       while (cur != BTRFS_FS_TREE_OBJECTID) {
+               struct btrfs_root_ref *rr;
+               struct btrfs_key location;
+               int name_len;
+               u64 ino;
+
+               key.objectid = cur;
+               key.type = BTRFS_ROOT_BACKREF_KEY;
+               key.offset = (u64)-1;
+               btrfs_release_path(&path);
+
+               ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
+               if (ret == 0)
+                       ret = -EUCLEAN;
+               if (ret < 0)
+                       goto out;
+               ret = btrfs_previous_item(tree_root, &path, cur,
+                                         BTRFS_ROOT_BACKREF_KEY);
+               if (ret > 0)
+                       ret = -ENOENT;
+               if (ret < 0)
+                       goto out;
+
+               /* Get the subvolume name */
+               rr = btrfs_item_ptr(path.nodes[0], path.slots[0],
+                                   struct btrfs_root_ref);
+               strncpy(tmp, path_ret, PATH_MAX);
+               name_len = btrfs_root_ref_name_len(path.nodes[0], rr);
+               if (name_len > BTRFS_NAME_LEN) {
+                       ret = -ENAMETOOLONG;
+                       goto out;
+               }
+               ino = btrfs_root_ref_dirid(path.nodes[0], rr);
+               read_extent_buffer(path.nodes[0], path_ret,
+                                  (unsigned long)(rr + 1), name_len);
+               path_ret[name_len] = '/';
+               path_ret[name_len + 1] = '\0';
+               strncat(path_ret, tmp, PATH_MAX);
+
+               /* Get the path inside the parent subvolume */
+               btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+               location.objectid = key.offset;
+               location.type = BTRFS_ROOT_ITEM_KEY;
+               location.offset = (u64)-1;
+               root = btrfs_read_fs_root(fs_info, &location);
+               if (IS_ERR(root)) {
+                       ret = PTR_ERR(root);
+                       goto out;
+               }
+               ret = get_path_in_subvol(root, ino, path_ret);
+               if (ret < 0)
+                       goto out;
+               cur = key.offset;
+       }
+       /* Add the leading '/' */
+       strncpy(tmp, path_ret, PATH_MAX);
+       strncpy(path_ret, "/", PATH_MAX);
+       strncat(path_ret, tmp, PATH_MAX);
+out:
+       btrfs_release_path(&path);
+       free(tmp);
+       return ret;
 }
 
-static void list_subvols(u64 tree, char *nameptr, int max_name_len, int level)
+static int list_subvolums(struct btrfs_fs_info *fs_info)
 {
-       struct btrfs_key key, *found_key;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *root;
        struct btrfs_path path;
-       struct btrfs_root_ref *ref;
-       int res;
-
-       key.objectid = tree;
-       key.type = BTRFS_ROOT_REF_KEY;
+       struct btrfs_key key;
+       char *result;
+       int ret = 0;
+
+       result = malloc(PATH_MAX);
+       if (!result)
+               return -ENOMEM;
+
+       ret = list_one_subvol(fs_info->fs_root, result);
+       if (ret < 0)
+               goto out;
+       root = fs_info->fs_root;
+       printf("ID %llu gen %llu path %.*s\n",
+               root->root_key.objectid, btrfs_root_generation(&root->root_item),
+               PATH_MAX, result);
+
+       key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+       key.type = BTRFS_ROOT_ITEM_KEY;
        key.offset = 0;
-
-       if (btrfs_search_tree(&btrfs_info.tree_root, &key, &path))
-               return;
-
-       do {
-               found_key = btrfs_path_leaf_key(&path);
-               if (btrfs_comp_keys_type(&key, found_key))
+       btrfs_init_path(&path);
+       ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
+       if (ret < 0)
+               goto out;
+       while (1) {
+               if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
+                       goto next;
+
+               btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+               if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
                        break;
-
-               ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref);
-               btrfs_root_ref_to_cpu(ref);
-
-               printf("ID %llu parent %llu name ", found_key->offset, tree);
-               if (nameptr && !get_subvol_name(found_key->offset, nameptr,
-                                               max_name_len))
-                       printf("%s\n", nameptr);
-               else
-                       printf("%.*s\n", (int) ref->name_len,
-                              (const char *) (ref + 1));
-
-               if (level > 0)
-                       list_subvols(found_key->offset, nameptr, max_name_len,
-                                    level - 1);
-               else
-                       printf("%s: Too much recursion, maybe skipping some "
-                              "subvolumes\n", __func__);
-       } while (!(res = btrfs_next_slot(&path)));
-
-       btrfs_free_path(&path);
+               if (key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
+                   key.type != BTRFS_ROOT_ITEM_KEY)
+                       goto next;
+               key.offset = (u64)-1;
+               root = btrfs_read_fs_root(fs_info, &key);
+               if (IS_ERR(root)) {
+                       ret = PTR_ERR(root);
+                       if (ret == -ENOENT)
+                               goto next;
+               }
+               ret = list_one_subvol(root, result);
+               if (ret < 0)
+                       goto out;
+               printf("ID %llu gen %llu path %.*s\n",
+                       root->root_key.objectid,
+                       btrfs_root_generation(&root->root_item),
+                       PATH_MAX, result);
+next:
+               ret = btrfs_next_item(tree_root, &path);
+               if (ret < 0)
+                       goto out;
+               if (ret > 0) {
+                       ret = 0;
+                       break;
+               }
+       }
+out:
+       free(result);
+       return ret;
 }
 
 void btrfs_list_subvols(void)
 {
-       char *nameptr = malloc(4096);
+       struct btrfs_fs_info *fs_info = current_fs_info;
+       int ret;
 
-       list_subvols(BTRFS_FS_TREE_OBJECTID, nameptr, nameptr ? 4096 : 0, 40);
-
-       if (nameptr)
-               free(nameptr);
+       if (!fs_info)
+               return;
+       ret = list_subvolums(fs_info);
+       if (ret < 0)
+               error("failed to list subvolume: %d", ret);
 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
deleted file mode 100644 (file)
index 913a4d4..0000000
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include <common.h>
-#include <log.h>
-#include <memalign.h>
-#include <part.h>
-#include <linux/compat.h>
-#include "btrfs.h"
-
-#define BTRFS_SUPER_FLAG_SUPP  (BTRFS_HEADER_FLAG_WRITTEN      \
-                                | BTRFS_HEADER_FLAG_RELOC      \
-                                | BTRFS_SUPER_FLAG_ERROR       \
-                                | BTRFS_SUPER_FLAG_SEEDING     \
-                                | BTRFS_SUPER_FLAG_METADUMP)
-
-#define BTRFS_SUPER_INFO_SIZE  4096
-
-/*
- * checks if a valid root backup is present.
- * considers the case when all root backups empty valid.
- * returns -1 in case of invalid root backup and 0 for valid.
- */
-static int btrfs_check_super_roots(struct btrfs_super_block *sb)
-{
-       struct btrfs_root_backup *root_backup;
-       int i, newest = -1;
-       int num_empty = 0;
-
-       for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; ++i) {
-               root_backup = sb->super_roots + i;
-
-               if (root_backup->tree_root == 0 && root_backup->tree_root_gen == 0)
-                       num_empty++;
-
-               if (root_backup->tree_root_gen == sb->generation)
-                       newest = i;
-       }
-
-       if (num_empty == BTRFS_NUM_BACKUP_ROOTS) {
-               return 0;
-       } else if (newest >= 0) {
-               return 0;
-       }
-
-       return -1;
-}
-
-static inline int is_power_of_2(u64 x)
-{
-       return !(x & (x - 1));
-}
-
-static int btrfs_check_super_csum(char *raw_disk_sb)
-{
-       struct btrfs_super_block *disk_sb =
-               (struct btrfs_super_block *) raw_disk_sb;
-       u16 csum_type = le16_to_cpu(disk_sb->csum_type);
-
-       if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
-               u32 crc = ~(u32) 0;
-               const int csum_size = sizeof(crc);
-               char result[csum_size];
-
-               crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, crc,
-                                     BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
-               btrfs_csum_final(crc, result);
-
-               if (memcmp(raw_disk_sb, result, csum_size))
-                       return -1;
-       } else {
-               return -1;
-       }
-
-       return 0;
-}
-
-static int btrfs_check_super(struct btrfs_super_block *sb)
-{
-       int ret = 0;
-
-       if (sb->flags & ~BTRFS_SUPER_FLAG_SUPP) {
-               printf("%s: Unsupported flags: %llu\n", __func__,
-                      sb->flags & ~BTRFS_SUPER_FLAG_SUPP);
-       }
-
-       if (sb->root_level > BTRFS_MAX_LEVEL) {
-               printf("%s: tree_root level too big: %d >= %d\n", __func__,
-                      sb->root_level, BTRFS_MAX_LEVEL);
-               ret = -1;
-       }
-
-       if (sb->chunk_root_level > BTRFS_MAX_LEVEL) {
-               printf("%s: chunk_root level too big: %d >= %d\n", __func__,
-                      sb->chunk_root_level, BTRFS_MAX_LEVEL);
-               ret = -1;
-       }
-
-       if (sb->log_root_level > BTRFS_MAX_LEVEL) {
-               printf("%s: log_root level too big: %d >= %d\n", __func__,
-                      sb->log_root_level, BTRFS_MAX_LEVEL);
-               ret = -1;
-       }
-
-       if (!is_power_of_2(sb->sectorsize) || sb->sectorsize < 4096 ||
-           sb->sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printf("%s: invalid sectorsize %u\n", __func__,
-                      sb->sectorsize);
-               ret = -1;
-       }
-
-       if (!is_power_of_2(sb->nodesize) || sb->nodesize < sb->sectorsize ||
-           sb->nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printf("%s: invalid nodesize %u\n", __func__, sb->nodesize);
-               ret = -1;
-       }
-
-       if (sb->nodesize != sb->__unused_leafsize) {
-               printf("%s: invalid leafsize %u, should be %u\n", __func__,
-                      sb->__unused_leafsize, sb->nodesize);
-               ret = -1;
-       }
-
-       if (!IS_ALIGNED(sb->root, sb->sectorsize)) {
-               printf("%s: tree_root block unaligned: %llu\n", __func__,
-                      sb->root);
-               ret = -1;
-       }
-
-       if (!IS_ALIGNED(sb->chunk_root, sb->sectorsize)) {
-               printf("%s: chunk_root block unaligned: %llu\n", __func__,
-                      sb->chunk_root);
-               ret = -1;
-       }
-
-       if (!IS_ALIGNED(sb->log_root, sb->sectorsize)) {
-               printf("%s: log_root block unaligned: %llu\n", __func__,
-                      sb->log_root);
-               ret = -1;
-       }
-
-       if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
-               printf("%s: dev_item UUID does not match fsid\n", __func__);
-               ret = -1;
-       }
-
-       if (sb->bytes_used < 6*sb->nodesize) {
-               printf("%s: bytes_used is too small %llu\n", __func__,
-                      sb->bytes_used);
-               ret = -1;
-       }
-
-       if (!is_power_of_2(sb->stripesize)) {
-               printf("%s: invalid stripesize %u\n", __func__, sb->stripesize);
-               ret = -1;
-       }
-
-       if (sb->sys_chunk_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
-               printf("%s: system chunk array too big %u > %u\n", __func__,
-                      sb->sys_chunk_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
-               ret = -1;
-       }
-
-       if (sb->sys_chunk_array_size < sizeof(struct btrfs_key) +
-           sizeof(struct btrfs_chunk)) {
-               printf("%s: system chunk array too small %u < %zu\n", __func__,
-                      sb->sys_chunk_array_size, sizeof(struct btrfs_key)
-                      + sizeof(struct btrfs_chunk));
-               ret = -1;
-       }
-
-       return ret;
-}
-
-int btrfs_read_superblock(void)
-{
-       const u64 superblock_offsets[4] = {
-               0x10000ull,
-               0x4000000ull,
-               0x4000000000ull,
-               0x4000000000000ull
-       };
-       ALLOC_CACHE_ALIGN_BUFFER(char, raw_sb, BTRFS_SUPER_INFO_SIZE);
-       struct btrfs_super_block *sb = (struct btrfs_super_block *) raw_sb;
-       u64 dev_total_bytes;
-       int i;
-
-       dev_total_bytes = (u64) btrfs_part_info->size * btrfs_part_info->blksz;
-
-       btrfs_info.sb.generation = 0;
-
-       for (i = 0; i < 4; ++i) {
-               if (superblock_offsets[i] + sizeof(sb) > dev_total_bytes)
-                       break;
-
-               if (!btrfs_devread(superblock_offsets[i], BTRFS_SUPER_INFO_SIZE,
-                                  raw_sb))
-                       break;
-
-               if (btrfs_check_super_csum(raw_sb)) {
-                       debug("%s: invalid checksum at superblock mirror %i\n",
-                             __func__, i);
-                       continue;
-               }
-
-               btrfs_super_block_to_cpu(sb);
-
-               if (sb->magic != BTRFS_MAGIC) {
-                       debug("%s: invalid BTRFS magic 0x%016llX at "
-                             "superblock mirror %i\n", __func__, sb->magic, i);
-               } else if (sb->bytenr != superblock_offsets[i]) {
-                       printf("%s: invalid bytenr 0x%016llX (expected "
-                              "0x%016llX) at superblock mirror %i\n",
-                              __func__, sb->bytenr, superblock_offsets[i], i);
-               } else if (btrfs_check_super(sb)) {
-                       printf("%s: Checking superblock mirror %i failed\n",
-                              __func__, i);
-               } else if (sb->generation > btrfs_info.sb.generation) {
-                       memcpy(&btrfs_info.sb, sb, sizeof(*sb));
-               } else {
-                       /* Nothing */
-               }
-       }
-
-       if (!btrfs_info.sb.generation) {
-               debug("%s: No valid BTRFS superblock found!\n", __func__);
-               return -1;
-       }
-
-       if (btrfs_check_super_roots(&btrfs_info.sb)) {
-               printf("%s: No valid root_backup found!\n", __func__);
-               return -1;
-       }
-
-       if (sb->sectorsize != PAGE_SIZE) {
-               printf(
-       "%s: Unsupported sector size (%u), only supports %u as sector size\n",
-                       __func__, sb->sectorsize, PAGE_SIZE);
-               return -1;
-       }
-
-       if (btrfs_info.sb.num_devices != 1) {
-               printf("%s: Unsupported number of devices (%lli). This driver "
-                      "only supports filesystem on one device.\n", __func__,
-                      btrfs_info.sb.num_devices);
-               return -1;
-       }
-
-       debug("Chosen superblock with generation = %llu\n",
-             btrfs_info.sb.generation);
-
-       return 0;
-}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
new file mode 100644 (file)
index 0000000..fcf52d4
--- /dev/null
@@ -0,0 +1,1173 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <stdlib.h>
+#include <common.h>
+#include <fs_internal.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "extent-io.h"
+
+const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10] = {
+               .sub_stripes    = 2,
+               .dev_stripes    = 1,
+               .devs_max       = 0,    /* 0 == as many as possible */
+               .devs_min       = 4,
+               .tolerated_failures = 1,
+               .devs_increment = 2,
+               .ncopies        = 2,
+               .nparity        = 0,
+               .raid_name      = "raid10",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID10,
+       },
+       [BTRFS_RAID_RAID1] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 2,
+               .devs_min       = 2,
+               .tolerated_failures = 1,
+               .devs_increment = 2,
+               .ncopies        = 2,
+               .nparity        = 0,
+               .raid_name      = "raid1",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID1,
+       },
+       [BTRFS_RAID_RAID1C3] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 3,
+               .devs_min       = 3,
+               .tolerated_failures = 2,
+               .devs_increment = 3,
+               .ncopies        = 3,
+               .raid_name      = "raid1c3",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID1C3,
+       },
+       [BTRFS_RAID_RAID1C4] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 4,
+               .devs_min       = 4,
+               .tolerated_failures = 3,
+               .devs_increment = 4,
+               .ncopies        = 4,
+               .raid_name      = "raid1c4",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID1C4,
+       },
+       [BTRFS_RAID_DUP] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 2,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 2,
+               .nparity        = 0,
+               .raid_name      = "dup",
+               .bg_flag        = BTRFS_BLOCK_GROUP_DUP,
+       },
+       [BTRFS_RAID_RAID0] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 1,
+               .nparity        = 0,
+               .raid_name      = "raid0",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID0,
+       },
+       [BTRFS_RAID_SINGLE] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .tolerated_failures = 0,
+               .devs_increment = 1,
+               .ncopies        = 1,
+               .nparity        = 0,
+               .raid_name      = "single",
+               .bg_flag        = 0,
+       },
+       [BTRFS_RAID_RAID5] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .tolerated_failures = 1,
+               .devs_increment = 1,
+               .ncopies        = 1,
+               .nparity        = 1,
+               .raid_name      = "raid5",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID5,
+       },
+       [BTRFS_RAID_RAID6] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 3,
+               .tolerated_failures = 2,
+               .devs_increment = 1,
+               .ncopies        = 1,
+               .nparity        = 2,
+               .raid_name      = "raid6",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID6,
+       },
+};
+
+struct stripe {
+       struct btrfs_device *dev;
+       u64 physical;
+};
+
+static inline int nr_parity_stripes(struct map_lookup *map)
+{
+       if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+               return 1;
+       else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+               return 2;
+       else
+               return 0;
+}
+
+static inline int nr_data_stripes(struct map_lookup *map)
+{
+       return map->num_stripes - nr_parity_stripes(map);
+}
+
+#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) )
+
+static LIST_HEAD(fs_uuids);
+
+/*
+ * Find a device specified by @devid or @uuid in the list of @fs_devices, or
+ * return NULL.
+ *
+ * If devid and uuid are both specified, the match must be exact, otherwise
+ * only devid is used.
+ */
+static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
+               u64 devid, u8 *uuid)
+{
+       struct list_head *head = &fs_devices->devices;
+       struct btrfs_device *dev;
+
+       list_for_each_entry(dev, head, dev_list) {
+               if (dev->devid == devid &&
+                   (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
+                       return dev;
+               }
+       }
+       return NULL;
+}
+
+static struct btrfs_fs_devices *find_fsid(u8 *fsid, u8 *metadata_uuid)
+{
+       struct btrfs_fs_devices *fs_devices;
+
+       list_for_each_entry(fs_devices, &fs_uuids, list) {
+               if (metadata_uuid && (memcmp(fsid, fs_devices->fsid,
+                                            BTRFS_FSID_SIZE) == 0) &&
+                   (memcmp(metadata_uuid, fs_devices->metadata_uuid,
+                           BTRFS_FSID_SIZE) == 0)) {
+                       return fs_devices;
+               } else if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0){
+                       return fs_devices;
+               }
+       }
+       return NULL;
+}
+
+static int device_list_add(struct btrfs_super_block *disk_super,
+                          u64 devid, struct blk_desc *desc,
+                          struct disk_partition *part,
+                          struct btrfs_fs_devices **fs_devices_ret)
+{
+       struct btrfs_device *device;
+       struct btrfs_fs_devices *fs_devices;
+       u64 found_transid = btrfs_super_generation(disk_super);
+       bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
+               BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+       if (metadata_uuid)
+               fs_devices = find_fsid(disk_super->fsid,
+                                      disk_super->metadata_uuid);
+       else
+               fs_devices = find_fsid(disk_super->fsid, NULL);
+
+       if (!fs_devices) {
+               fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
+               if (!fs_devices)
+                       return -ENOMEM;
+               INIT_LIST_HEAD(&fs_devices->devices);
+               list_add(&fs_devices->list, &fs_uuids);
+               memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
+               if (metadata_uuid)
+                       memcpy(fs_devices->metadata_uuid,
+                              disk_super->metadata_uuid, BTRFS_FSID_SIZE);
+               else
+                       memcpy(fs_devices->metadata_uuid, fs_devices->fsid,
+                              BTRFS_FSID_SIZE);
+
+               fs_devices->latest_devid = devid;
+               fs_devices->latest_trans = found_transid;
+               fs_devices->lowest_devid = (u64)-1;
+               device = NULL;
+       } else {
+               device = find_device(fs_devices, devid,
+                                   disk_super->dev_item.uuid);
+       }
+       if (!device) {
+               device = kzalloc(sizeof(*device), GFP_NOFS);
+               if (!device) {
+                       /* we can safely leave the fs_devices entry around */
+                       return -ENOMEM;
+               }
+               device->devid = devid;
+               device->desc = desc;
+               device->part = part;
+               device->generation = found_transid;
+               memcpy(device->uuid, disk_super->dev_item.uuid,
+                      BTRFS_UUID_SIZE);
+               device->total_devs = btrfs_super_num_devices(disk_super);
+               device->super_bytes_used = btrfs_super_bytes_used(disk_super);
+               device->total_bytes =
+                       btrfs_stack_device_total_bytes(&disk_super->dev_item);
+               device->bytes_used =
+                       btrfs_stack_device_bytes_used(&disk_super->dev_item);
+               list_add(&device->dev_list, &fs_devices->devices);
+               device->fs_devices = fs_devices;
+       } else if (!device->desc || !device->part) {
+               /*
+                * The existing device has newer generation, so this one could
+                * be a stale one, don't add it.
+                */
+               if (found_transid < device->generation) {
+                       error(
+       "adding devid %llu gen %llu but found an existing device gen %llu",
+                               device->devid, found_transid,
+                               device->generation);
+                       return -EEXIST;
+               } else {
+                       device->desc = desc;
+                       device->part = part;
+               }
+       }
+
+
+       if (found_transid > fs_devices->latest_trans) {
+               fs_devices->latest_devid = devid;
+               fs_devices->latest_trans = found_transid;
+       }
+       if (fs_devices->lowest_devid > devid) {
+               fs_devices->lowest_devid = devid;
+       }
+       *fs_devices_ret = fs_devices;
+       return 0;
+}
+
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+{
+       struct btrfs_fs_devices *seed_devices;
+       struct btrfs_device *device;
+       int ret = 0;
+
+again:
+       if (!fs_devices)
+               return 0;
+       while (!list_empty(&fs_devices->devices)) {
+               device = list_entry(fs_devices->devices.next,
+                                   struct btrfs_device, dev_list);
+               list_del(&device->dev_list);
+               /* free the memory */
+               free(device);
+       }
+
+       seed_devices = fs_devices->seed;
+       fs_devices->seed = NULL;
+       if (seed_devices) {
+               struct btrfs_fs_devices *orig;
+
+               orig = fs_devices;
+               fs_devices = seed_devices;
+               list_del(&orig->list);
+               free(orig);
+               goto again;
+       } else {
+               list_del(&fs_devices->list);
+               free(fs_devices);
+       }
+
+       return ret;
+}
+
+void btrfs_close_all_devices(void)
+{
+       struct btrfs_fs_devices *fs_devices;
+
+       while (!list_empty(&fs_uuids)) {
+               fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices,
+                                       list);
+               btrfs_close_devices(fs_devices);
+       }
+}
+
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices)
+{
+       struct btrfs_device *device;
+
+       list_for_each_entry(device, &fs_devices->devices, dev_list) {
+               if (!device->desc || !device->part) {
+                       printf("no device found for devid %llu, skip it \n",
+                               device->devid);
+                       continue;
+               }
+       }
+       return 0;
+}
+
+int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
+                         struct btrfs_fs_devices **fs_devices_ret,
+                         u64 *total_devs)
+{
+       struct btrfs_super_block *disk_super;
+       char buf[BTRFS_SUPER_INFO_SIZE];
+       int ret;
+       u64 devid;
+
+       disk_super = (struct btrfs_super_block *)buf;
+       ret = btrfs_read_dev_super(desc, part, disk_super);
+       if (ret < 0)
+               return -EIO;
+       devid = btrfs_stack_device_id(&disk_super->dev_item);
+       if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)
+               *total_devs = 1;
+       else
+               *total_devs = btrfs_super_num_devices(disk_super);
+
+       ret = device_list_add(disk_super, devid, desc, part, fs_devices_ret);
+
+       return ret;
+}
+
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
+                                      u8 *uuid, u8 *fsid)
+{
+       struct btrfs_device *device;
+       struct btrfs_fs_devices *cur_devices;
+
+       cur_devices = fs_info->fs_devices;
+       while (cur_devices) {
+               if (!fsid ||
+                  !memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
+                       device = find_device(cur_devices, devid, uuid);
+                       if (device)
+                               return device;
+               }
+               cur_devices = cur_devices->seed;
+       }
+       return NULL;
+}
+
+static struct btrfs_device *fill_missing_device(u64 devid)
+{
+       struct btrfs_device *device;
+
+       device = kzalloc(sizeof(*device), GFP_NOFS);
+       return device;
+}
+
+/*
+ * slot == -1: SYSTEM chunk
+ * return -EIO on error, otherwise return 0
+ */
+int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+                           struct extent_buffer *leaf,
+                           struct btrfs_chunk *chunk,
+                           int slot, u64 logical)
+{
+       u64 length;
+       u64 stripe_len;
+       u16 num_stripes;
+       u16 sub_stripes;
+       u64 type;
+       u32 chunk_ondisk_size;
+       u32 sectorsize = fs_info->sectorsize;
+
+       /*
+        * Basic chunk item size check.  Note that btrfs_chunk already contains
+        * one stripe, so no "==" check.
+        */
+       if (slot >= 0 &&
+           btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) {
+               error("invalid chunk item size, have %u expect [%zu, %zu)",
+                       btrfs_item_size_nr(leaf, slot),
+                       sizeof(struct btrfs_chunk),
+                       BTRFS_LEAF_DATA_SIZE(fs_info));
+               return -EUCLEAN;
+       }
+       length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+       type = btrfs_chunk_type(leaf, chunk);
+
+       if (num_stripes == 0) {
+               error("invalid num_stripes, have %u expect non-zero",
+                       num_stripes);
+               return -EUCLEAN;
+       }
+       if (slot >= 0 && btrfs_chunk_item_size(num_stripes) !=
+           btrfs_item_size_nr(leaf, slot)) {
+               error("invalid chunk item size, have %u expect %lu",
+                       btrfs_item_size_nr(leaf, slot),
+                       btrfs_chunk_item_size(num_stripes));
+               return -EUCLEAN;
+       }
+
+       /*
+        * These valid checks may be insufficient to cover every corner cases.
+        */
+       if (!IS_ALIGNED(logical, sectorsize)) {
+               error("invalid chunk logical %llu",  logical);
+               return -EIO;
+       }
+       if (btrfs_chunk_sector_size(leaf, chunk) != sectorsize) {
+               error("invalid chunk sectorsize %llu",
+                     (unsigned long long)btrfs_chunk_sector_size(leaf, chunk));
+               return -EIO;
+       }
+       if (!length || !IS_ALIGNED(length, sectorsize)) {
+               error("invalid chunk length %llu",  length);
+               return -EIO;
+       }
+       if (stripe_len != BTRFS_STRIPE_LEN) {
+               error("invalid chunk stripe length: %llu", stripe_len);
+               return -EIO;
+       }
+       /* Check on chunk item type */
+       if (slot == -1 && (type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+               error("invalid chunk type %llu", type);
+               return -EIO;
+       }
+       if (type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+                    BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
+               error("unrecognized chunk type: %llu",
+                     ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+                       BTRFS_BLOCK_GROUP_PROFILE_MASK) & type);
+               return -EIO;
+       }
+       if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+               error("missing chunk type flag: %llu", type);
+               return -EIO;
+       }
+       if (!(is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) ||
+             (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) {
+               error("conflicting chunk type detected: %llu", type);
+               return -EIO;
+       }
+       if ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+           !is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
+               error("conflicting chunk profile detected: %llu", type);
+               return -EIO;
+       }
+
+       chunk_ondisk_size = btrfs_chunk_item_size(num_stripes);
+       /*
+        * Btrfs_chunk contains at least one stripe, and for sys_chunk
+        * it can't exceed the system chunk array size
+        * For normal chunk, it should match its chunk item size.
+        */
+       if (num_stripes < 1 ||
+           (slot == -1 && chunk_ondisk_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) ||
+           (slot >= 0 && chunk_ondisk_size > btrfs_item_size_nr(leaf, slot))) {
+               error("invalid num_stripes: %u", num_stripes);
+               return -EIO;
+       }
+       /*
+        * Device number check against profile
+        */
+       if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != 2 ||
+                 !IS_ALIGNED(num_stripes, sub_stripes))) ||
+           (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+           (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < 3) ||
+           (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < 4) ||
+           (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+           (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+           (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+           ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+            num_stripes != 1)) {
+               error("Invalid num_stripes:sub_stripes %u:%u for profile %llu",
+                     num_stripes, sub_stripes,
+                     type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+/*
+ * Slot is used to verify the chunk item is valid
+ *
+ * For sys chunk in superblock, pass -1 to indicate sys chunk.
+ */
+static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
+                         struct extent_buffer *leaf,
+                         struct btrfs_chunk *chunk, int slot)
+{
+       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+       struct map_lookup *map;
+       struct cache_extent *ce;
+       u64 logical;
+       u64 length;
+       u64 devid;
+       u8 uuid[BTRFS_UUID_SIZE];
+       int num_stripes;
+       int ret;
+       int i;
+
+       logical = key->offset;
+       length = btrfs_chunk_length(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       /* Validation check */
+       ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, slot, logical);
+       if (ret) {
+               error("%s checksums match, but it has an invalid chunk, %s",
+                     (slot == -1) ? "Superblock" : "Metadata",
+                     (slot == -1) ? "try btrfsck --repair -s <superblock> ie, 0,1,2" : "");
+               return ret;
+       }
+
+       ce = search_cache_extent(&map_tree->cache_tree, logical);
+
+       /* already mapped? */
+       if (ce && ce->start <= logical && ce->start + ce->size > logical) {
+               return 0;
+       }
+
+       map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
+       if (!map)
+               return -ENOMEM;
+
+       map->ce.start = logical;
+       map->ce.size = length;
+       map->num_stripes = num_stripes;
+       map->io_width = btrfs_chunk_io_width(leaf, chunk);
+       map->io_align = btrfs_chunk_io_align(leaf, chunk);
+       map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
+       map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       map->type = btrfs_chunk_type(leaf, chunk);
+       map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+
+       for (i = 0; i < num_stripes; i++) {
+               map->stripes[i].physical =
+                       btrfs_stripe_offset_nr(leaf, chunk, i);
+               devid = btrfs_stripe_devid_nr(leaf, chunk, i);
+               read_extent_buffer(leaf, uuid, (unsigned long)
+                                  btrfs_stripe_dev_uuid_nr(chunk, i),
+                                  BTRFS_UUID_SIZE);
+               map->stripes[i].dev = btrfs_find_device(fs_info, devid, uuid,
+                                                       NULL);
+               if (!map->stripes[i].dev) {
+                       map->stripes[i].dev = fill_missing_device(devid);
+                       printf("warning, device %llu is missing\n",
+                              (unsigned long long)devid);
+                       list_add(&map->stripes[i].dev->dev_list,
+                                &fs_info->fs_devices->devices);
+               }
+
+       }
+       ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
+       if (ret < 0) {
+               errno = -ret;
+               error("failed to add chunk map start=%llu len=%llu: %d (%m)",
+                     map->ce.start, map->ce.size, ret);
+       }
+
+       return ret;
+}
+
+static int fill_device_from_item(struct extent_buffer *leaf,
+                                struct btrfs_dev_item *dev_item,
+                                struct btrfs_device *device)
+{
+       unsigned long ptr;
+
+       device->devid = btrfs_device_id(leaf, dev_item);
+       device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+       device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
+       device->type = btrfs_device_type(leaf, dev_item);
+       device->io_align = btrfs_device_io_align(leaf, dev_item);
+       device->io_width = btrfs_device_io_width(leaf, dev_item);
+       device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+
+       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
+
+       return 0;
+}
+
+static int read_one_dev(struct btrfs_fs_info *fs_info,
+                       struct extent_buffer *leaf,
+                       struct btrfs_dev_item *dev_item)
+{
+       struct btrfs_device *device;
+       u64 devid;
+       int ret = 0;
+       u8 fs_uuid[BTRFS_UUID_SIZE];
+       u8 dev_uuid[BTRFS_UUID_SIZE];
+
+       devid = btrfs_device_id(leaf, dev_item);
+       read_extent_buffer(leaf, dev_uuid,
+                          (unsigned long)btrfs_device_uuid(dev_item),
+                          BTRFS_UUID_SIZE);
+       read_extent_buffer(leaf, fs_uuid,
+                          (unsigned long)btrfs_device_fsid(dev_item),
+                          BTRFS_FSID_SIZE);
+
+       if (memcmp(fs_uuid, fs_info->fs_devices->fsid, BTRFS_UUID_SIZE)) {
+               error("Seed device is not yet supported\n");
+               return -ENOTSUPP;
+       }
+
+       device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
+       if (!device) {
+               device = kzalloc(sizeof(*device), GFP_NOFS);
+               if (!device)
+                       return -ENOMEM;
+               list_add(&device->dev_list,
+                        &fs_info->fs_devices->devices);
+       }
+
+       fill_device_from_item(leaf, dev_item, device);
+       fs_info->fs_devices->total_rw_bytes +=
+               btrfs_device_total_bytes(leaf, dev_item);
+       return ret;
+}
+
+int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_super_block *super_copy = fs_info->super_copy;
+       struct extent_buffer *sb;
+       struct btrfs_disk_key *disk_key;
+       struct btrfs_chunk *chunk;
+       u8 *array_ptr;
+       unsigned long sb_array_offset;
+       int ret = 0;
+       u32 num_stripes;
+       u32 array_size;
+       u32 len = 0;
+       u32 cur_offset;
+       struct btrfs_key key;
+
+       if (fs_info->nodesize < BTRFS_SUPER_INFO_SIZE) {
+               printf("ERROR: nodesize %u too small to read superblock\n",
+                               fs_info->nodesize);
+               return -EINVAL;
+       }
+       sb = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET,
+                                      BTRFS_SUPER_INFO_SIZE);
+       if (!sb)
+               return -ENOMEM;
+       btrfs_set_buffer_uptodate(sb);
+       write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy));
+       array_size = btrfs_super_sys_array_size(super_copy);
+
+       array_ptr = super_copy->sys_chunk_array;
+       sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
+       cur_offset = 0;
+
+       while (cur_offset < array_size) {
+               disk_key = (struct btrfs_disk_key *)array_ptr;
+               len = sizeof(*disk_key);
+               if (cur_offset + len > array_size)
+                       goto out_short_read;
+
+               btrfs_disk_key_to_cpu(&key, disk_key);
+
+               array_ptr += len;
+               sb_array_offset += len;
+               cur_offset += len;
+
+               if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+                       chunk = (struct btrfs_chunk *)sb_array_offset;
+                       /*
+                        * At least one btrfs_chunk with one stripe must be
+                        * present, exact stripe count check comes afterwards
+                        */
+                       len = btrfs_chunk_item_size(1);
+                       if (cur_offset + len > array_size)
+                               goto out_short_read;
+
+                       num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+                       if (!num_stripes) {
+                               printk(
+           "ERROR: invalid number of stripes %u in sys_array at offset %u\n",
+                                       num_stripes, cur_offset);
+                               ret = -EIO;
+                               break;
+                       }
+
+                       len = btrfs_chunk_item_size(num_stripes);
+                       if (cur_offset + len > array_size)
+                               goto out_short_read;
+
+                       ret = read_one_chunk(fs_info, &key, sb, chunk, -1);
+                       if (ret)
+                               break;
+               } else {
+                       printk(
+               "ERROR: unexpected item type %u in sys_array at offset %u\n",
+                               (u32)key.type, cur_offset);
+                       ret = -EIO;
+                       break;
+               }
+               array_ptr += len;
+               sb_array_offset += len;
+               cur_offset += len;
+       }
+       free_extent_buffer(sb);
+       return ret;
+
+out_short_read:
+       printk("ERROR: sys_array too short to read %u bytes at offset %u\n",
+                       len, cur_offset);
+       free_extent_buffer(sb);
+       return -EIO;
+}
+
+int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       struct btrfs_key found_key;
+       struct btrfs_root *root = fs_info->chunk_root;
+       int ret;
+       int slot;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       /*
+        * Read all device items, and then all the chunk items. All
+        * device items are found before any chunk item (their object id
+        * is smaller than the lowest possible object id for a chunk
+        * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
+        */
+       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+       key.offset = 0;
+       key.type = 0;
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto error;
+       while(1) {
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret == 0)
+                               continue;
+                       if (ret < 0)
+                               goto error;
+                       break;
+               }
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+               if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+                       struct btrfs_dev_item *dev_item;
+                       dev_item = btrfs_item_ptr(leaf, slot,
+                                                 struct btrfs_dev_item);
+                       ret = read_one_dev(fs_info, leaf, dev_item);
+                       if (ret < 0)
+                               goto error;
+               } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
+                       struct btrfs_chunk *chunk;
+                       chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+                       ret = read_one_chunk(fs_info, &found_key, leaf, chunk,
+                                            slot);
+                       if (ret < 0)
+                               goto error;
+               }
+               path->slots[0]++;
+       }
+
+       ret = 0;
+error:
+       btrfs_free_path(path);
+       return ret;
+}
+
+/*
+ * Get stripe length from chunk item and its stripe items
+ *
+ * Caller should only call this function after validating the chunk item
+ * by using btrfs_check_chunk_valid().
+ */
+u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
+                       struct extent_buffer *leaf,
+                       struct btrfs_chunk *chunk)
+{
+       u64 stripe_len;
+       u64 chunk_len;
+       u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+       u64 profile = btrfs_chunk_type(leaf, chunk) &
+                     BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+       chunk_len = btrfs_chunk_length(leaf, chunk);
+
+       switch (profile) {
+       case 0: /* Single profile */
+       case BTRFS_BLOCK_GROUP_RAID1:
+       case BTRFS_BLOCK_GROUP_RAID1C3:
+       case BTRFS_BLOCK_GROUP_RAID1C4:
+       case BTRFS_BLOCK_GROUP_DUP:
+               stripe_len = chunk_len;
+               break;
+       case BTRFS_BLOCK_GROUP_RAID0:
+               stripe_len = chunk_len / num_stripes;
+               break;
+       case BTRFS_BLOCK_GROUP_RAID5:
+               stripe_len = chunk_len / (num_stripes - 1);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID6:
+               stripe_len = chunk_len / (num_stripes - 2);
+               break;
+       case BTRFS_BLOCK_GROUP_RAID10:
+               stripe_len = chunk_len / (num_stripes /
+                               btrfs_chunk_sub_stripes(leaf, chunk));
+               break;
+       default:
+               /* Invalid chunk profile found */
+               BUG_ON(1);
+       }
+       return stripe_len;
+}
+
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
+{
+       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+       struct cache_extent *ce;
+       struct map_lookup *map;
+       int ret;
+
+       ce = search_cache_extent(&map_tree->cache_tree, logical);
+       if (!ce) {
+               fprintf(stderr, "No mapping for %llu-%llu\n",
+                       (unsigned long long)logical,
+                       (unsigned long long)logical+len);
+               return 1;
+       }
+       if (ce->start > logical || ce->start + ce->size < logical) {
+               fprintf(stderr, "Invalid mapping for %llu-%llu, got "
+                       "%llu-%llu\n", (unsigned long long)logical,
+                       (unsigned long long)logical+len,
+                       (unsigned long long)ce->start,
+                       (unsigned long long)ce->start + ce->size);
+               return 1;
+       }
+       map = container_of(ce, struct map_lookup, ce);
+
+       if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+                        BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4))
+               ret = map->num_stripes;
+       else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
+               ret = map->sub_stripes;
+       else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+               ret = 2;
+       else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+               ret = 3;
+       else
+               ret = 1;
+       return ret;
+}
+
+int btrfs_next_bg(struct btrfs_fs_info *fs_info, u64 *logical,
+                 u64 *size, u64 type)
+{
+       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+       struct cache_extent *ce;
+       struct map_lookup *map;
+       u64 cur = *logical;
+
+       ce = search_cache_extent(&map_tree->cache_tree, cur);
+
+       while (ce) {
+               /*
+                * only jump to next bg if our cur is not 0
+                * As the initial logical for btrfs_next_bg() is 0, and
+                * if we jump to next bg, we skipped a valid bg.
+                */
+               if (cur) {
+                       ce = next_cache_extent(ce);
+                       if (!ce)
+                               return -ENOENT;
+               }
+
+               cur = ce->start;
+               map = container_of(ce, struct map_lookup, ce);
+               if (map->type & type) {
+                       *logical = ce->start;
+                       *size = ce->size;
+                       return 0;
+               }
+               if (!cur)
+                       ce = next_cache_extent(ce);
+       }
+
+       return -ENOENT;
+}
+
+static inline int parity_smaller(u64 a, u64 b)
+{
+       return a > b;
+}
+
+/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
+static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map)
+{
+       struct btrfs_bio_stripe s;
+       int i;
+       u64 l;
+       int again = 1;
+
+       while (again) {
+               again = 0;
+               for (i = 0; i < bbio->num_stripes - 1; i++) {
+                       if (parity_smaller(raid_map[i], raid_map[i+1])) {
+                               s = bbio->stripes[i];
+                               l = raid_map[i];
+                               bbio->stripes[i] = bbio->stripes[i+1];
+                               raid_map[i] = raid_map[i+1];
+                               bbio->stripes[i+1] = s;
+                               raid_map[i+1] = l;
+                               again = 1;
+                       }
+               }
+       }
+}
+
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+                     u64 logical, u64 *length, u64 *type,
+                     struct btrfs_multi_bio **multi_ret, int mirror_num,
+                     u64 **raid_map_ret)
+{
+       struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+       struct cache_extent *ce;
+       struct map_lookup *map;
+       u64 offset;
+       u64 stripe_offset;
+       u64 *raid_map = NULL;
+       int stripe_nr;
+       int stripes_allocated = 8;
+       int stripes_required = 1;
+       int stripe_index;
+       int i;
+       struct btrfs_multi_bio *multi = NULL;
+
+       if (multi_ret && rw == READ) {
+               stripes_allocated = 1;
+       }
+again:
+       ce = search_cache_extent(&map_tree->cache_tree, logical);
+       if (!ce) {
+               kfree(multi);
+               *length = (u64)-1;
+               return -ENOENT;
+       }
+       if (ce->start > logical) {
+               kfree(multi);
+               *length = ce->start - logical;
+               return -ENOENT;
+       }
+
+       if (multi_ret) {
+               multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
+                               GFP_NOFS);
+               if (!multi)
+                       return -ENOMEM;
+       }
+       map = container_of(ce, struct map_lookup, ce);
+       offset = logical - ce->start;
+
+       if (rw == WRITE) {
+               if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+                                BTRFS_BLOCK_GROUP_RAID1C3 |
+                                BTRFS_BLOCK_GROUP_RAID1C4 |
+                                BTRFS_BLOCK_GROUP_DUP)) {
+                       stripes_required = map->num_stripes;
+               } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+                       stripes_required = map->sub_stripes;
+               }
+       }
+       if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)
+           && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
+                   /* RAID[56] write or recovery. Return all stripes */
+                   stripes_required = map->num_stripes;
+
+                   /* Only allocate the map if we've already got a large enough multi_ret */
+                   if (stripes_allocated >= stripes_required) {
+                           raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+                           if (!raid_map) {
+                                   kfree(multi);
+                                   return -ENOMEM;
+                           }
+                   }
+       }
+
+       /* if our multi bio struct is too small, back off and try again */
+       if (multi_ret && stripes_allocated < stripes_required) {
+               stripes_allocated = stripes_required;
+               kfree(multi);
+               multi = NULL;
+               goto again;
+       }
+       stripe_nr = offset;
+       /*
+        * stripe_nr counts the total number of stripes we have to stride
+        * to get to this block
+        */
+       stripe_nr = stripe_nr / map->stripe_len;
+
+       stripe_offset = stripe_nr * map->stripe_len;
+       BUG_ON(offset < stripe_offset);
+
+       /* stripe_offset is the offset of this block in its stripe*/
+       stripe_offset = offset - stripe_offset;
+
+       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+                        BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4 |
+                        BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+                        BTRFS_BLOCK_GROUP_RAID10 |
+                        BTRFS_BLOCK_GROUP_DUP)) {
+               /* we limit the length of each bio to what fits in a stripe */
+               *length = min_t(u64, ce->size - offset,
+                             map->stripe_len - stripe_offset);
+       } else {
+               *length = ce->size - offset;
+       }
+
+       if (!multi_ret)
+               goto out;
+
+       multi->num_stripes = 1;
+       stripe_index = 0;
+       if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+                        BTRFS_BLOCK_GROUP_RAID1C3 |
+                        BTRFS_BLOCK_GROUP_RAID1C4)) {
+               if (rw == WRITE)
+                       multi->num_stripes = map->num_stripes;
+               else if (mirror_num)
+                       stripe_index = mirror_num - 1;
+               else
+                       stripe_index = stripe_nr % map->num_stripes;
+       } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+               int factor = map->num_stripes / map->sub_stripes;
+
+               stripe_index = stripe_nr % factor;
+               stripe_index *= map->sub_stripes;
+
+               if (rw == WRITE)
+                       multi->num_stripes = map->sub_stripes;
+               else if (mirror_num)
+                       stripe_index += mirror_num - 1;
+
+               stripe_nr = stripe_nr / factor;
+       } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
+               if (rw == WRITE)
+                       multi->num_stripes = map->num_stripes;
+               else if (mirror_num)
+                       stripe_index = mirror_num - 1;
+       } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                               BTRFS_BLOCK_GROUP_RAID6)) {
+
+               if (raid_map) {
+                       int rot;
+                       u64 tmp;
+                       u64 raid56_full_stripe_start;
+                       u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len;
+
+                       /*
+                        * align the start of our data stripe in the logical
+                        * address space
+                        */
+                       raid56_full_stripe_start = offset / full_stripe_len;
+                       raid56_full_stripe_start *= full_stripe_len;
+
+                       /* get the data stripe number */
+                       stripe_nr = raid56_full_stripe_start / map->stripe_len;
+                       stripe_nr = stripe_nr / nr_data_stripes(map);
+
+                       /* Work out the disk rotation on this stripe-set */
+                       rot = stripe_nr % map->num_stripes;
+
+                       /* Fill in the logical address of each stripe */
+                       tmp = stripe_nr * nr_data_stripes(map);
+
+                       for (i = 0; i < nr_data_stripes(map); i++)
+                               raid_map[(i+rot) % map->num_stripes] =
+                                       ce->start + (tmp + i) * map->stripe_len;
+
+                       raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
+                       if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+                               raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
+
+                       *length = map->stripe_len;
+                       stripe_index = 0;
+                       stripe_offset = 0;
+                       multi->num_stripes = map->num_stripes;
+               } else {
+                       stripe_index = stripe_nr % nr_data_stripes(map);
+                       stripe_nr = stripe_nr / nr_data_stripes(map);
+
+                       /*
+                        * Mirror #0 or #1 means the original data block.
+                        * Mirror #2 is RAID5 parity block.
+                        * Mirror #3 is RAID6 Q block.
+                        */
+                       if (mirror_num > 1)
+                               stripe_index = nr_data_stripes(map) + mirror_num - 2;
+
+                       /* We distribute the parity blocks across stripes */
+                       stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
+               }
+       } else {
+               /*
+                * after this do_div call, stripe_nr is the number of stripes
+                * on this device we have to walk to find the data, and
+                * stripe_index is the number of our device in the stripe array
+                */
+               stripe_index = stripe_nr % map->num_stripes;
+               stripe_nr = stripe_nr / map->num_stripes;
+       }
+       BUG_ON(stripe_index >= map->num_stripes);
+
+       for (i = 0; i < multi->num_stripes; i++) {
+               multi->stripes[i].physical =
+                       map->stripes[stripe_index].physical + stripe_offset +
+                       stripe_nr * map->stripe_len;
+               multi->stripes[i].dev = map->stripes[stripe_index].dev;
+               stripe_index++;
+       }
+       *multi_ret = multi;
+
+       if (type)
+               *type = map->type;
+
+       if (raid_map) {
+               sort_parity_stripes(multi, raid_map);
+               *raid_map_ret = raid_map;
+       }
+out:
+       return 0;
+}
+
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+                   u64 logical, u64 *length,
+                   struct btrfs_multi_bio **multi_ret, int mirror_num,
+                   u64 **raid_map_ret)
+{
+       return __btrfs_map_block(fs_info, rw, logical, length, NULL,
+                                multi_ret, mirror_num, raid_map_ret);
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
new file mode 100644 (file)
index 0000000..9d1a07a
--- /dev/null
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#ifndef __BTRFS_VOLUMES_H__
+#define __BTRFS_VOLUMES_H__
+
+#include <fs_internal.h>
+#include "ctree.h"
+
+#define BTRFS_STRIPE_LEN       SZ_64K
+
+struct btrfs_device {
+       struct list_head dev_list;
+       struct btrfs_root *dev_root;
+       struct btrfs_fs_devices *fs_devices;
+
+       struct blk_desc *desc;
+       struct disk_partition *part;
+
+       u64 total_devs;
+       u64 super_bytes_used;
+
+       u64 generation;
+
+       /* the internal btrfs device id */
+       u64 devid;
+
+       /* size of the device */
+       u64 total_bytes;
+
+       /* bytes used */
+       u64 bytes_used;
+
+       /* optimal io alignment for this device */
+       u32 io_align;
+
+       /* optimal io width for this device */
+       u32 io_width;
+
+       /* minimal io size for this device */
+       u32 sector_size;
+
+       /* type and info about this device */
+       u64 type;
+
+       /* physical drive uuid (or lvm uuid) */
+       u8 uuid[BTRFS_UUID_SIZE];
+};
+
+struct btrfs_fs_devices {
+       u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+       u8 metadata_uuid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+
+       u64 latest_devid;
+       u64 lowest_devid;
+       u64 latest_trans;
+
+       u64 total_rw_bytes;
+
+       struct list_head devices;
+       struct list_head list;
+
+       int seeding;
+       struct btrfs_fs_devices *seed;
+};
+
+struct btrfs_bio_stripe {
+       struct btrfs_device *dev;
+       u64 physical;
+};
+
+struct btrfs_multi_bio {
+       int error;
+       int num_stripes;
+       struct btrfs_bio_stripe stripes[];
+};
+
+struct map_lookup {
+       struct cache_extent ce;
+       u64 type;
+       int io_align;
+       int io_width;
+       int stripe_len;
+       int sector_size;
+       int num_stripes;
+       int sub_stripes;
+       struct btrfs_bio_stripe stripes[];
+};
+
+struct btrfs_raid_attr {
+       int sub_stripes;        /* sub_stripes info for map */
+       int dev_stripes;        /* stripes per dev */
+       int devs_max;           /* max devs to use */
+       int devs_min;           /* min devs needed */
+       int tolerated_failures; /* max tolerated fail devs */
+       int devs_increment;     /* ndevs has to be a multiple of this */
+       int ncopies;            /* how many copies to data has */
+       int nparity;            /* number of stripes worth of bytes to store
+                                * parity information */
+       const char raid_name[8]; /* name of the raid */
+       u64 bg_flag;            /* block group flag of the raid */
+};
+
+extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
+
+static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
+{
+       if (flags & BTRFS_BLOCK_GROUP_RAID10)
+               return BTRFS_RAID_RAID10;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+               return BTRFS_RAID_RAID1;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+               return BTRFS_RAID_RAID1C3;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+               return BTRFS_RAID_RAID1C4;
+       else if (flags & BTRFS_BLOCK_GROUP_DUP)
+               return BTRFS_RAID_DUP;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+               return BTRFS_RAID_RAID0;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+               return BTRFS_RAID_RAID5;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+               return BTRFS_RAID_RAID6;
+
+       return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
+#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
+                           (sizeof(struct btrfs_bio_stripe) * (n)))
+#define btrfs_map_lookup_size(n) (sizeof(struct map_lookup) + \
+                                (sizeof(struct btrfs_bio_stripe) * (n)))
+
+#define BTRFS_RAID5_P_STRIPE ((u64)-2)
+#define BTRFS_RAID6_Q_STRIPE ((u64)-1)
+
+static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
+{
+       u64 stripe_size;
+
+       if (type & BTRFS_BLOCK_GROUP_RAID0) {
+               stripe_size = length;
+               stripe_size /= num_stripes;
+       } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
+               stripe_size = length * 2;
+               stripe_size /= num_stripes;
+       } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
+               stripe_size = length;
+               stripe_size /= (num_stripes - 1);
+       } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
+               stripe_size = length;
+               stripe_size /= (num_stripes - 2);
+       } else {
+               stripe_size = length;
+       }
+       return stripe_size;
+}
+
+#ifndef READ
+#define READ 0
+#define WRITE 1
+#define READA 2
+#endif
+
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+                     u64 logical, u64 *length, u64 *type,
+                     struct btrfs_multi_bio **multi_ret, int mirror_num,
+                     u64 **raid_map);
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+                   u64 logical, u64 *length,
+                   struct btrfs_multi_bio **multi_ret, int mirror_num,
+                   u64 **raid_map_ret);
+int btrfs_next_bg(struct btrfs_fs_info *map_tree, u64 *logical,
+                    u64 *size, u64 type);
+static inline int btrfs_next_bg_metadata(struct btrfs_fs_info *fs_info,
+                                        u64 *logical, u64 *size)
+{
+       return btrfs_next_bg(fs_info, logical, size,
+                       BTRFS_BLOCK_GROUP_METADATA);
+}
+static inline int btrfs_next_bg_system(struct btrfs_fs_info *fs_info,
+                                      u64 *logical, u64 *size)
+{
+       return btrfs_next_bg(fs_info, logical, size,
+                       BTRFS_BLOCK_GROUP_SYSTEM);
+}
+int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
+int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices);
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
+void btrfs_close_all_devices(void);
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
+int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
+                         struct btrfs_fs_devices **fs_devices_ret,
+                         u64 *total_devs);
+struct list_head *btrfs_scanned_uuids(void);
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
+                                      u8 *uuid, u8 *fsid);
+int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+                           struct extent_buffer *leaf,
+                           struct btrfs_chunk *chunk,
+                           int slot, u64 logical);
+u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
+                       struct extent_buffer *leaf,
+                       struct btrfs_chunk *chunk);
+#endif