2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 return rb_entry(node, struct extent_backref, node);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
122 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124 struct data_backref *back1 = to_data_backref(ext1);
125 struct data_backref *back2 = to_data_backref(ext2);
127 WARN_ON(!ext1->is_data);
128 WARN_ON(!ext2->is_data);
130 /* parent and root are a union, so this covers both */
131 if (back1->parent > back2->parent)
133 if (back1->parent < back2->parent)
136 /* This is a full backref and the parents match. */
137 if (back1->node.full_backref)
140 if (back1->owner > back2->owner)
142 if (back1->owner < back2->owner)
145 if (back1->offset > back2->offset)
147 if (back1->offset < back2->offset)
150 if (back1->bytes > back2->bytes)
152 if (back1->bytes < back2->bytes)
155 if (back1->found_ref && back2->found_ref) {
156 if (back1->disk_bytenr > back2->disk_bytenr)
158 if (back1->disk_bytenr < back2->disk_bytenr)
161 if (back1->found_ref > back2->found_ref)
163 if (back1->found_ref < back2->found_ref)
171 * Much like data_backref, just removed the undetermined members
172 * and change it to use list_head.
173 * During extent scan, it is stored in root->orphan_data_extent.
174 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
176 struct orphan_data_extent {
177 struct list_head list;
185 struct tree_backref {
186 struct extent_backref node;
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
195 return container_of(back, struct tree_backref, node);
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
200 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202 struct tree_backref *back1 = to_tree_backref(ext1);
203 struct tree_backref *back2 = to_tree_backref(ext2);
205 WARN_ON(ext1->is_data);
206 WARN_ON(ext2->is_data);
208 /* parent and root are a union, so this covers both */
209 if (back1->parent > back2->parent)
211 if (back1->parent < back2->parent)
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
219 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
222 if (ext1->is_data > ext2->is_data)
225 if (ext1->is_data < ext2->is_data)
228 if (ext1->full_backref > ext2->full_backref)
230 if (ext1->full_backref < ext2->full_backref)
234 return compare_data_backref(node1, node2);
236 return compare_tree_backref(node1, node2);
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
242 struct extent_record {
243 struct list_head backrefs;
244 struct list_head dups;
245 struct rb_root backref_tree;
246 struct list_head list;
247 struct cache_extent cache;
248 struct btrfs_disk_key parent_key;
253 u64 extent_item_refs;
255 u64 parent_generation;
259 unsigned int flag_block_full_backref:2;
260 unsigned int found_rec:1;
261 unsigned int content_checked:1;
262 unsigned int owner_ref_checked:1;
263 unsigned int is_root:1;
264 unsigned int metadata:1;
265 unsigned int bad_full_backref:1;
266 unsigned int crossing_stripes:1;
267 unsigned int wrong_chunk_type:1;
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
272 return container_of(entry, struct extent_record, list);
275 struct inode_backref {
276 struct list_head list;
277 unsigned int found_dir_item:1;
278 unsigned int found_dir_index:1;
279 unsigned int found_inode_ref:1;
280 unsigned int filetype:8;
282 unsigned int ref_type;
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
291 return list_entry(entry, struct inode_backref, list);
294 struct root_item_record {
295 struct list_head list;
302 struct btrfs_key drop_key;
305 #define REF_ERR_NO_DIR_ITEM (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX (1 << 1)
307 #define REF_ERR_NO_INODE_REF (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
310 #define REF_ERR_DUP_INODE_REF (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
319 struct file_extent_hole {
325 struct inode_record {
326 struct list_head backrefs;
327 unsigned int checked:1;
328 unsigned int merging:1;
329 unsigned int found_inode_item:1;
330 unsigned int found_dir_item:1;
331 unsigned int found_file_extent:1;
332 unsigned int found_csum_item:1;
333 unsigned int some_csum_missing:1;
334 unsigned int nodatasum:1;
347 struct rb_root holes;
348 struct list_head orphan_extents;
353 #define I_ERR_NO_INODE_ITEM (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
369 struct root_backref {
370 struct list_head list;
371 unsigned int found_dir_item:1;
372 unsigned int found_dir_index:1;
373 unsigned int found_back_ref:1;
374 unsigned int found_forward_ref:1;
375 unsigned int reachable:1;
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
386 return list_entry(entry, struct root_backref, list);
390 struct list_head backrefs;
391 struct cache_extent cache;
392 unsigned int found_root_item:1;
398 struct cache_extent cache;
403 struct cache_extent cache;
404 struct cache_tree root_cache;
405 struct cache_tree inode_cache;
406 struct inode_record *current;
415 struct walk_control {
416 struct cache_tree shared;
417 struct shared_node *nodes[BTRFS_MAX_LEVEL];
423 struct btrfs_key key;
425 struct list_head list;
428 struct extent_entry {
433 struct list_head list;
436 struct root_item_info {
437 /* level of the root */
439 /* number of nodes at this level, must be 1 for a root */
443 struct cache_extent cache_extent;
447 * Error bit for low memory mode check.
449 * Currently no caller cares about it yet. Just internal use for error
452 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH (1 << 8)
463 static void *print_status_check(void *p)
465 struct task_ctx *priv = p;
466 const char work_indicator[] = { '.', 'o', 'O', 'o' };
468 static char *task_position_string[] = {
470 "checking free space cache",
474 task_period_start(priv->info, 1000 /* 1s */);
476 if (priv->tp == TASK_NOTHING)
480 printf("%s [%c]\r", task_position_string[priv->tp],
481 work_indicator[count % 4]);
484 task_period_wait(priv->info);
489 static int print_status_return(void *p)
497 static enum btrfs_check_mode parse_check_mode(const char *str)
499 if (strcmp(str, "lowmem") == 0)
500 return CHECK_MODE_LOWMEM;
501 if (strcmp(str, "orig") == 0)
502 return CHECK_MODE_ORIGINAL;
503 if (strcmp(str, "original") == 0)
504 return CHECK_MODE_ORIGINAL;
506 return CHECK_MODE_UNKNOWN;
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
512 struct file_extent_hole *hole;
514 if (RB_EMPTY_ROOT(holes))
517 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
523 struct file_extent_hole *hole1;
524 struct file_extent_hole *hole2;
526 hole1 = rb_entry(node1, struct file_extent_hole, node);
527 hole2 = rb_entry(node2, struct file_extent_hole, node);
529 if (hole1->start > hole2->start)
531 if (hole1->start < hole2->start)
533 /* Now hole1->start == hole2->start */
534 if (hole1->len >= hole2->len)
536 * Hole 1 will be merge center
537 * Same hole will be merged later
540 /* Hole 2 will be merge center */
545 * Add a hole to the record
547 * This will do hole merge for copy_file_extent_holes(),
548 * which will ensure there won't be continuous holes.
550 static int add_file_extent_hole(struct rb_root *holes,
553 struct file_extent_hole *hole;
554 struct file_extent_hole *prev = NULL;
555 struct file_extent_hole *next = NULL;
557 hole = malloc(sizeof(*hole));
562 /* Since compare will not return 0, no -EEXIST will happen */
563 rb_insert(holes, &hole->node, compare_hole);
565 /* simple merge with previous hole */
566 if (rb_prev(&hole->node))
567 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
569 if (prev && prev->start + prev->len >= hole->start) {
570 hole->len = hole->start + hole->len - prev->start;
571 hole->start = prev->start;
572 rb_erase(&prev->node, holes);
577 /* iterate merge with next holes */
579 if (!rb_next(&hole->node))
581 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
583 if (hole->start + hole->len >= next->start) {
584 if (hole->start + hole->len <= next->start + next->len)
585 hole->len = next->start + next->len -
587 rb_erase(&next->node, holes);
596 static int compare_hole_range(struct rb_node *node, void *data)
598 struct file_extent_hole *hole;
601 hole = (struct file_extent_hole *)data;
604 hole = rb_entry(node, struct file_extent_hole, node);
605 if (start < hole->start)
607 if (start >= hole->start && start < hole->start + hole->len)
613 * Delete a hole in the record
615 * This will do the hole split and is much restrict than add.
617 static int del_file_extent_hole(struct rb_root *holes,
620 struct file_extent_hole *hole;
621 struct file_extent_hole tmp;
626 struct rb_node *node;
633 node = rb_search(holes, &tmp, compare_hole_range, NULL);
636 hole = rb_entry(node, struct file_extent_hole, node);
637 if (start + len > hole->start + hole->len)
641 * Now there will be no overlap, delete the hole and re-add the
642 * split(s) if they exists.
644 if (start > hole->start) {
645 prev_start = hole->start;
646 prev_len = start - hole->start;
649 if (hole->start + hole->len > start + len) {
650 next_start = start + len;
651 next_len = hole->start + hole->len - start - len;
654 rb_erase(node, holes);
657 ret = add_file_extent_hole(holes, prev_start, prev_len);
662 ret = add_file_extent_hole(holes, next_start, next_len);
669 static int copy_file_extent_holes(struct rb_root *dst,
672 struct file_extent_hole *hole;
673 struct rb_node *node;
676 node = rb_first(src);
678 hole = rb_entry(node, struct file_extent_hole, node);
679 ret = add_file_extent_hole(dst, hole->start, hole->len);
682 node = rb_next(node);
687 static void free_file_extent_holes(struct rb_root *holes)
689 struct rb_node *node;
690 struct file_extent_hole *hole;
692 node = rb_first(holes);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 rb_erase(node, holes);
697 node = rb_first(holes);
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704 struct btrfs_root *root)
706 if (root->last_trans != trans->transid) {
707 root->track_dirty = 1;
708 root->last_trans = trans->transid;
709 root->commit_root = root->node;
710 extent_buffer_get(root->node);
714 static u8 imode_to_type(u32 imode)
717 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
719 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
720 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
721 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
722 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
723 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
724 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
727 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
733 struct device_record *rec1;
734 struct device_record *rec2;
736 rec1 = rb_entry(node1, struct device_record, node);
737 rec2 = rb_entry(node2, struct device_record, node);
738 if (rec1->devid > rec2->devid)
740 else if (rec1->devid < rec2->devid)
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
748 struct inode_record *rec;
749 struct inode_backref *backref;
750 struct inode_backref *orig;
751 struct inode_backref *tmp;
752 struct orphan_data_extent *src_orphan;
753 struct orphan_data_extent *dst_orphan;
757 rec = malloc(sizeof(*rec));
759 return ERR_PTR(-ENOMEM);
760 memcpy(rec, orig_rec, sizeof(*rec));
762 INIT_LIST_HEAD(&rec->backrefs);
763 INIT_LIST_HEAD(&rec->orphan_extents);
764 rec->holes = RB_ROOT;
766 list_for_each_entry(orig, &orig_rec->backrefs, list) {
767 size = sizeof(*orig) + orig->namelen + 1;
768 backref = malloc(size);
773 memcpy(backref, orig, size);
774 list_add_tail(&backref->list, &rec->backrefs);
776 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777 dst_orphan = malloc(sizeof(*dst_orphan));
782 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
785 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
791 if (!list_empty(&rec->backrefs))
792 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793 list_del(&orig->list);
797 if (!list_empty(&rec->orphan_extents))
798 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799 list_del(&orig->list);
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
811 struct orphan_data_extent *orphan;
813 if (list_empty(orphan_extents))
815 printf("The following data extent is lost in tree %llu:\n",
817 list_for_each_entry(orphan, orphan_extents, list) {
818 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819 orphan->objectid, orphan->offset, orphan->disk_bytenr,
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
826 u64 root_objectid = root->root_key.objectid;
827 int errors = rec->errors;
831 /* reloc root errors, we print its corresponding fs root objectid*/
832 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833 root_objectid = root->root_key.offset;
834 fprintf(stderr, "reloc");
836 fprintf(stderr, "root %llu inode %llu errors %x",
837 (unsigned long long) root_objectid,
838 (unsigned long long) rec->ino, rec->errors);
840 if (errors & I_ERR_NO_INODE_ITEM)
841 fprintf(stderr, ", no inode item");
842 if (errors & I_ERR_NO_ORPHAN_ITEM)
843 fprintf(stderr, ", no orphan item");
844 if (errors & I_ERR_DUP_INODE_ITEM)
845 fprintf(stderr, ", dup inode item");
846 if (errors & I_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & I_ERR_ODD_DIR_ITEM)
849 fprintf(stderr, ", odd dir item");
850 if (errors & I_ERR_ODD_FILE_EXTENT)
851 fprintf(stderr, ", odd file extent");
852 if (errors & I_ERR_BAD_FILE_EXTENT)
853 fprintf(stderr, ", bad file extent");
854 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855 fprintf(stderr, ", file extent overlap");
856 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857 fprintf(stderr, ", file extent discount");
858 if (errors & I_ERR_DIR_ISIZE_WRONG)
859 fprintf(stderr, ", dir isize wrong");
860 if (errors & I_ERR_FILE_NBYTES_WRONG)
861 fprintf(stderr, ", nbytes wrong");
862 if (errors & I_ERR_ODD_CSUM_ITEM)
863 fprintf(stderr, ", odd csum item");
864 if (errors & I_ERR_SOME_CSUM_MISSING)
865 fprintf(stderr, ", some csum missing");
866 if (errors & I_ERR_LINK_COUNT_WRONG)
867 fprintf(stderr, ", link count wrong");
868 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869 fprintf(stderr, ", orphan file extent");
870 fprintf(stderr, "\n");
871 /* Print the orphan extents if needed */
872 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
875 /* Print the holes if needed */
876 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877 struct file_extent_hole *hole;
878 struct rb_node *node;
881 node = rb_first(&rec->holes);
882 fprintf(stderr, "Found file extent holes:\n");
885 hole = rb_entry(node, struct file_extent_hole, node);
886 fprintf(stderr, "\tstart: %llu, len: %llu\n",
887 hole->start, hole->len);
888 node = rb_next(node);
891 fprintf(stderr, "\tstart: 0, len: %llu\n",
892 round_up(rec->isize, root->sectorsize));
896 static void print_ref_error(int errors)
898 if (errors & REF_ERR_NO_DIR_ITEM)
899 fprintf(stderr, ", no dir item");
900 if (errors & REF_ERR_NO_DIR_INDEX)
901 fprintf(stderr, ", no dir index");
902 if (errors & REF_ERR_NO_INODE_REF)
903 fprintf(stderr, ", no inode ref");
904 if (errors & REF_ERR_DUP_DIR_ITEM)
905 fprintf(stderr, ", dup dir item");
906 if (errors & REF_ERR_DUP_DIR_INDEX)
907 fprintf(stderr, ", dup dir index");
908 if (errors & REF_ERR_DUP_INODE_REF)
909 fprintf(stderr, ", dup inode ref");
910 if (errors & REF_ERR_INDEX_UNMATCH)
911 fprintf(stderr, ", index mismatch");
912 if (errors & REF_ERR_FILETYPE_UNMATCH)
913 fprintf(stderr, ", filetype mismatch");
914 if (errors & REF_ERR_NAME_TOO_LONG)
915 fprintf(stderr, ", name too long");
916 if (errors & REF_ERR_NO_ROOT_REF)
917 fprintf(stderr, ", no root ref");
918 if (errors & REF_ERR_NO_ROOT_BACKREF)
919 fprintf(stderr, ", no root backref");
920 if (errors & REF_ERR_DUP_ROOT_REF)
921 fprintf(stderr, ", dup root ref");
922 if (errors & REF_ERR_DUP_ROOT_BACKREF)
923 fprintf(stderr, ", dup root backref");
924 fprintf(stderr, "\n");
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
930 struct ptr_node *node;
931 struct cache_extent *cache;
932 struct inode_record *rec = NULL;
935 cache = lookup_cache_extent(inode_cache, ino, 1);
937 node = container_of(cache, struct ptr_node, cache);
939 if (mod && rec->refs > 1) {
940 node->data = clone_inode_rec(rec);
941 if (IS_ERR(node->data))
947 rec = calloc(1, sizeof(*rec));
949 return ERR_PTR(-ENOMEM);
951 rec->extent_start = (u64)-1;
953 INIT_LIST_HEAD(&rec->backrefs);
954 INIT_LIST_HEAD(&rec->orphan_extents);
955 rec->holes = RB_ROOT;
957 node = malloc(sizeof(*node));
960 return ERR_PTR(-ENOMEM);
962 node->cache.start = ino;
963 node->cache.size = 1;
966 if (ino == BTRFS_FREE_INO_OBJECTID)
969 ret = insert_cache_extent(inode_cache, &node->cache);
971 return ERR_PTR(-EEXIST);
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
978 struct orphan_data_extent *orphan;
980 while (!list_empty(orphan_extents)) {
981 orphan = list_entry(orphan_extents->next,
982 struct orphan_data_extent, list);
983 list_del(&orphan->list);
988 static void free_inode_rec(struct inode_record *rec)
990 struct inode_backref *backref;
995 while (!list_empty(&rec->backrefs)) {
996 backref = to_inode_backref(rec->backrefs.next);
997 list_del(&backref->list);
1000 free_orphan_data_extents(&rec->orphan_extents);
1001 free_file_extent_holes(&rec->holes);
1005 static int can_free_inode_rec(struct inode_record *rec)
1007 if (!rec->errors && rec->checked && rec->found_inode_item &&
1008 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014 struct inode_record *rec)
1016 struct cache_extent *cache;
1017 struct inode_backref *tmp, *backref;
1018 struct ptr_node *node;
1019 unsigned char filetype;
1021 if (!rec->found_inode_item)
1024 filetype = imode_to_type(rec->imode);
1025 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026 if (backref->found_dir_item && backref->found_dir_index) {
1027 if (backref->filetype != filetype)
1028 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029 if (!backref->errors && backref->found_inode_ref &&
1030 rec->nlink == rec->found_link) {
1031 list_del(&backref->list);
1037 if (!rec->checked || rec->merging)
1040 if (S_ISDIR(rec->imode)) {
1041 if (rec->found_size != rec->isize)
1042 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043 if (rec->found_file_extent)
1044 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046 if (rec->found_dir_item)
1047 rec->errors |= I_ERR_ODD_DIR_ITEM;
1048 if (rec->found_size != rec->nbytes)
1049 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050 if (rec->nlink > 0 && !no_holes &&
1051 (rec->extent_end < rec->isize ||
1052 first_extent_gap(&rec->holes) < rec->isize))
1053 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1056 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057 if (rec->found_csum_item && rec->nodatasum)
1058 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059 if (rec->some_csum_missing && !rec->nodatasum)
1060 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1063 BUG_ON(rec->refs != 1);
1064 if (can_free_inode_rec(rec)) {
1065 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066 node = container_of(cache, struct ptr_node, cache);
1067 BUG_ON(node->data != rec);
1068 remove_cache_extent(inode_cache, &node->cache);
1070 free_inode_rec(rec);
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1076 struct btrfs_path path;
1077 struct btrfs_key key;
1080 key.objectid = BTRFS_ORPHAN_OBJECTID;
1081 key.type = BTRFS_ORPHAN_ITEM_KEY;
1084 btrfs_init_path(&path);
1085 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086 btrfs_release_path(&path);
1092 static int process_inode_item(struct extent_buffer *eb,
1093 int slot, struct btrfs_key *key,
1094 struct shared_node *active_node)
1096 struct inode_record *rec;
1097 struct btrfs_inode_item *item;
1099 rec = active_node->current;
1100 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101 if (rec->found_inode_item) {
1102 rec->errors |= I_ERR_DUP_INODE_ITEM;
1105 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106 rec->nlink = btrfs_inode_nlink(eb, item);
1107 rec->isize = btrfs_inode_size(eb, item);
1108 rec->nbytes = btrfs_inode_nbytes(eb, item);
1109 rec->imode = btrfs_inode_mode(eb, item);
1110 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1112 rec->found_inode_item = 1;
1113 if (rec->nlink == 0)
1114 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115 maybe_free_inode_rec(&active_node->inode_cache, rec);
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1121 int namelen, u64 dir)
1123 struct inode_backref *backref;
1125 list_for_each_entry(backref, &rec->backrefs, list) {
1126 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1128 if (backref->dir != dir || backref->namelen != namelen)
1130 if (memcmp(name, backref->name, namelen))
1135 backref = malloc(sizeof(*backref) + namelen + 1);
1138 memset(backref, 0, sizeof(*backref));
1140 backref->namelen = namelen;
1141 memcpy(backref->name, name, namelen);
1142 backref->name[namelen] = '\0';
1143 list_add_tail(&backref->list, &rec->backrefs);
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148 u64 ino, u64 dir, u64 index,
1149 const char *name, int namelen,
1150 int filetype, int itemtype, int errors)
1152 struct inode_record *rec;
1153 struct inode_backref *backref;
1155 rec = get_inode_rec(inode_cache, ino, 1);
1156 BUG_ON(IS_ERR(rec));
1157 backref = get_inode_backref(rec, name, namelen, dir);
1160 backref->errors |= errors;
1161 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162 if (backref->found_dir_index)
1163 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164 if (backref->found_inode_ref && backref->index != index)
1165 backref->errors |= REF_ERR_INDEX_UNMATCH;
1166 if (backref->found_dir_item && backref->filetype != filetype)
1167 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1169 backref->index = index;
1170 backref->filetype = filetype;
1171 backref->found_dir_index = 1;
1172 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1174 if (backref->found_dir_item)
1175 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176 if (backref->found_dir_index && backref->filetype != filetype)
1177 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1179 backref->filetype = filetype;
1180 backref->found_dir_item = 1;
1181 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183 if (backref->found_inode_ref)
1184 backref->errors |= REF_ERR_DUP_INODE_REF;
1185 if (backref->found_dir_index && backref->index != index)
1186 backref->errors |= REF_ERR_INDEX_UNMATCH;
1188 backref->index = index;
1190 backref->ref_type = itemtype;
1191 backref->found_inode_ref = 1;
1196 maybe_free_inode_rec(inode_cache, rec);
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201 struct cache_tree *dst_cache)
1203 struct inode_backref *backref;
1208 list_for_each_entry(backref, &src->backrefs, list) {
1209 if (backref->found_dir_index) {
1210 add_inode_backref(dst_cache, dst->ino, backref->dir,
1211 backref->index, backref->name,
1212 backref->namelen, backref->filetype,
1213 BTRFS_DIR_INDEX_KEY, backref->errors);
1215 if (backref->found_dir_item) {
1217 add_inode_backref(dst_cache, dst->ino,
1218 backref->dir, 0, backref->name,
1219 backref->namelen, backref->filetype,
1220 BTRFS_DIR_ITEM_KEY, backref->errors);
1222 if (backref->found_inode_ref) {
1223 add_inode_backref(dst_cache, dst->ino,
1224 backref->dir, backref->index,
1225 backref->name, backref->namelen, 0,
1226 backref->ref_type, backref->errors);
1230 if (src->found_dir_item)
1231 dst->found_dir_item = 1;
1232 if (src->found_file_extent)
1233 dst->found_file_extent = 1;
1234 if (src->found_csum_item)
1235 dst->found_csum_item = 1;
1236 if (src->some_csum_missing)
1237 dst->some_csum_missing = 1;
1238 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1244 BUG_ON(src->found_link < dir_count);
1245 dst->found_link += src->found_link - dir_count;
1246 dst->found_size += src->found_size;
1247 if (src->extent_start != (u64)-1) {
1248 if (dst->extent_start == (u64)-1) {
1249 dst->extent_start = src->extent_start;
1250 dst->extent_end = src->extent_end;
1252 if (dst->extent_end > src->extent_start)
1253 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254 else if (dst->extent_end < src->extent_start) {
1255 ret = add_file_extent_hole(&dst->holes,
1257 src->extent_start - dst->extent_end);
1259 if (dst->extent_end < src->extent_end)
1260 dst->extent_end = src->extent_end;
1264 dst->errors |= src->errors;
1265 if (src->found_inode_item) {
1266 if (!dst->found_inode_item) {
1267 dst->nlink = src->nlink;
1268 dst->isize = src->isize;
1269 dst->nbytes = src->nbytes;
1270 dst->imode = src->imode;
1271 dst->nodatasum = src->nodatasum;
1272 dst->found_inode_item = 1;
1274 dst->errors |= I_ERR_DUP_INODE_ITEM;
1282 static int splice_shared_node(struct shared_node *src_node,
1283 struct shared_node *dst_node)
1285 struct cache_extent *cache;
1286 struct ptr_node *node, *ins;
1287 struct cache_tree *src, *dst;
1288 struct inode_record *rec, *conflict;
1289 u64 current_ino = 0;
1293 if (--src_node->refs == 0)
1295 if (src_node->current)
1296 current_ino = src_node->current->ino;
1298 src = &src_node->root_cache;
1299 dst = &dst_node->root_cache;
1301 cache = search_cache_extent(src, 0);
1303 node = container_of(cache, struct ptr_node, cache);
1305 cache = next_cache_extent(cache);
1308 remove_cache_extent(src, &node->cache);
1311 ins = malloc(sizeof(*ins));
1313 ins->cache.start = node->cache.start;
1314 ins->cache.size = node->cache.size;
1318 ret = insert_cache_extent(dst, &ins->cache);
1319 if (ret == -EEXIST) {
1320 conflict = get_inode_rec(dst, rec->ino, 1);
1321 BUG_ON(IS_ERR(conflict));
1322 merge_inode_recs(rec, conflict, dst);
1324 conflict->checked = 1;
1325 if (dst_node->current == conflict)
1326 dst_node->current = NULL;
1328 maybe_free_inode_rec(dst, conflict);
1329 free_inode_rec(rec);
1336 if (src == &src_node->root_cache) {
1337 src = &src_node->inode_cache;
1338 dst = &dst_node->inode_cache;
1342 if (current_ino > 0 && (!dst_node->current ||
1343 current_ino > dst_node->current->ino)) {
1344 if (dst_node->current) {
1345 dst_node->current->checked = 1;
1346 maybe_free_inode_rec(dst, dst_node->current);
1348 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349 BUG_ON(IS_ERR(dst_node->current));
1354 static void free_inode_ptr(struct cache_extent *cache)
1356 struct ptr_node *node;
1357 struct inode_record *rec;
1359 node = container_of(cache, struct ptr_node, cache);
1361 free_inode_rec(rec);
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1370 struct cache_extent *cache;
1371 struct shared_node *node;
1373 cache = lookup_cache_extent(shared, bytenr, 1);
1375 node = container_of(cache, struct shared_node, cache);
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1384 struct shared_node *node;
1386 node = calloc(1, sizeof(*node));
1389 node->cache.start = bytenr;
1390 node->cache.size = 1;
1391 cache_tree_init(&node->root_cache);
1392 cache_tree_init(&node->inode_cache);
1395 ret = insert_cache_extent(shared, &node->cache);
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401 struct walk_control *wc, int level)
1403 struct shared_node *node;
1404 struct shared_node *dest;
1407 if (level == wc->active_node)
1410 BUG_ON(wc->active_node <= level);
1411 node = find_shared_node(&wc->shared, bytenr);
1413 ret = add_shared_node(&wc->shared, bytenr, refs);
1415 node = find_shared_node(&wc->shared, bytenr);
1416 wc->nodes[level] = node;
1417 wc->active_node = level;
1421 if (wc->root_level == wc->active_node &&
1422 btrfs_root_refs(&root->root_item) == 0) {
1423 if (--node->refs == 0) {
1424 free_inode_recs_tree(&node->root_cache);
1425 free_inode_recs_tree(&node->inode_cache);
1426 remove_cache_extent(&wc->shared, &node->cache);
1432 dest = wc->nodes[wc->active_node];
1433 splice_shared_node(node, dest);
1434 if (node->refs == 0) {
1435 remove_cache_extent(&wc->shared, &node->cache);
1441 static int leave_shared_node(struct btrfs_root *root,
1442 struct walk_control *wc, int level)
1444 struct shared_node *node;
1445 struct shared_node *dest;
1448 if (level == wc->root_level)
1451 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1455 BUG_ON(i >= BTRFS_MAX_LEVEL);
1457 node = wc->nodes[wc->active_node];
1458 wc->nodes[wc->active_node] = NULL;
1459 wc->active_node = i;
1461 dest = wc->nodes[wc->active_node];
1462 if (wc->active_node < wc->root_level ||
1463 btrfs_root_refs(&root->root_item) > 0) {
1464 BUG_ON(node->refs <= 1);
1465 splice_shared_node(node, dest);
1467 BUG_ON(node->refs < 2);
1476 * 1 - if the root with id child_root_id is a child of root parent_root_id
1477 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1478 * has other root(s) as parent(s)
1479 * 2 - if the root child_root_id doesn't have any parent roots
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1484 struct btrfs_path path;
1485 struct btrfs_key key;
1486 struct extent_buffer *leaf;
1490 btrfs_init_path(&path);
1492 key.objectid = parent_root_id;
1493 key.type = BTRFS_ROOT_REF_KEY;
1494 key.offset = child_root_id;
1495 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1499 btrfs_release_path(&path);
1503 key.objectid = child_root_id;
1504 key.type = BTRFS_ROOT_BACKREF_KEY;
1506 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1512 leaf = path.nodes[0];
1513 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1517 leaf = path.nodes[0];
1520 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521 if (key.objectid != child_root_id ||
1522 key.type != BTRFS_ROOT_BACKREF_KEY)
1527 if (key.offset == parent_root_id) {
1528 btrfs_release_path(&path);
1535 btrfs_release_path(&path);
1538 return has_parent ? 0 : 2;
1541 static int process_dir_item(struct btrfs_root *root,
1542 struct extent_buffer *eb,
1543 int slot, struct btrfs_key *key,
1544 struct shared_node *active_node)
1554 struct btrfs_dir_item *di;
1555 struct inode_record *rec;
1556 struct cache_tree *root_cache;
1557 struct cache_tree *inode_cache;
1558 struct btrfs_key location;
1559 char namebuf[BTRFS_NAME_LEN];
1561 root_cache = &active_node->root_cache;
1562 inode_cache = &active_node->inode_cache;
1563 rec = active_node->current;
1564 rec->found_dir_item = 1;
1566 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567 total = btrfs_item_size_nr(eb, slot);
1568 while (cur < total) {
1570 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571 name_len = btrfs_dir_name_len(eb, di);
1572 data_len = btrfs_dir_data_len(eb, di);
1573 filetype = btrfs_dir_type(eb, di);
1575 rec->found_size += name_len;
1576 if (name_len <= BTRFS_NAME_LEN) {
1580 len = BTRFS_NAME_LEN;
1581 error = REF_ERR_NAME_TOO_LONG;
1583 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1585 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586 add_inode_backref(inode_cache, location.objectid,
1587 key->objectid, key->offset, namebuf,
1588 len, filetype, key->type, error);
1589 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590 add_inode_backref(root_cache, location.objectid,
1591 key->objectid, key->offset,
1592 namebuf, len, filetype,
1595 fprintf(stderr, "invalid location in dir item %u\n",
1597 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598 key->objectid, key->offset, namebuf,
1599 len, filetype, key->type, error);
1602 len = sizeof(*di) + name_len + data_len;
1603 di = (struct btrfs_dir_item *)((char *)di + len);
1606 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607 rec->errors |= I_ERR_DUP_DIR_INDEX;
1612 static int process_inode_ref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1622 struct cache_tree *inode_cache;
1623 struct btrfs_inode_ref *ref;
1624 char namebuf[BTRFS_NAME_LEN];
1626 inode_cache = &active_node->inode_cache;
1628 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629 total = btrfs_item_size_nr(eb, slot);
1630 while (cur < total) {
1631 name_len = btrfs_inode_ref_name_len(eb, ref);
1632 index = btrfs_inode_ref_index(eb, ref);
1633 if (name_len <= BTRFS_NAME_LEN) {
1637 len = BTRFS_NAME_LEN;
1638 error = REF_ERR_NAME_TOO_LONG;
1640 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641 add_inode_backref(inode_cache, key->objectid, key->offset,
1642 index, namebuf, len, 0, key->type, error);
1644 len = sizeof(*ref) + name_len;
1645 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1651 static int process_inode_extref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1662 struct cache_tree *inode_cache;
1663 struct btrfs_inode_extref *extref;
1664 char namebuf[BTRFS_NAME_LEN];
1666 inode_cache = &active_node->inode_cache;
1668 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669 total = btrfs_item_size_nr(eb, slot);
1670 while (cur < total) {
1671 name_len = btrfs_inode_extref_name_len(eb, extref);
1672 index = btrfs_inode_extref_index(eb, extref);
1673 parent = btrfs_inode_extref_parent(eb, extref);
1674 if (name_len <= BTRFS_NAME_LEN) {
1678 len = BTRFS_NAME_LEN;
1679 error = REF_ERR_NAME_TOO_LONG;
1681 read_extent_buffer(eb, namebuf,
1682 (unsigned long)(extref + 1), len);
1683 add_inode_backref(inode_cache, key->objectid, parent,
1684 index, namebuf, len, 0, key->type, error);
1686 len = sizeof(*extref) + name_len;
1687 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695 u64 len, u64 *found)
1697 struct btrfs_key key;
1698 struct btrfs_path path;
1699 struct extent_buffer *leaf;
1704 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1706 btrfs_init_path(&path);
1708 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1710 key.type = BTRFS_EXTENT_CSUM_KEY;
1712 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1716 if (ret > 0 && path.slots[0] > 0) {
1717 leaf = path.nodes[0];
1718 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720 key.type == BTRFS_EXTENT_CSUM_KEY)
1725 leaf = path.nodes[0];
1726 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1732 leaf = path.nodes[0];
1735 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737 key.type != BTRFS_EXTENT_CSUM_KEY)
1740 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741 if (key.offset >= start + len)
1744 if (key.offset > start)
1747 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749 if (csum_end > start) {
1750 size = min(csum_end - start, len);
1759 btrfs_release_path(&path);
1765 static int process_file_extent(struct btrfs_root *root,
1766 struct extent_buffer *eb,
1767 int slot, struct btrfs_key *key,
1768 struct shared_node *active_node)
1770 struct inode_record *rec;
1771 struct btrfs_file_extent_item *fi;
1773 u64 disk_bytenr = 0;
1774 u64 extent_offset = 0;
1775 u64 mask = root->sectorsize - 1;
1779 rec = active_node->current;
1780 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781 rec->found_file_extent = 1;
1783 if (rec->extent_start == (u64)-1) {
1784 rec->extent_start = key->offset;
1785 rec->extent_end = key->offset;
1788 if (rec->extent_end > key->offset)
1789 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790 else if (rec->extent_end < key->offset) {
1791 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792 key->offset - rec->extent_end);
1797 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798 extent_type = btrfs_file_extent_type(eb, fi);
1800 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1803 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804 rec->found_size += num_bytes;
1805 num_bytes = (num_bytes + mask) & ~mask;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810 extent_offset = btrfs_file_extent_offset(eb, fi);
1811 if (num_bytes == 0 || (num_bytes & mask))
1812 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813 if (num_bytes + extent_offset >
1814 btrfs_file_extent_ram_bytes(eb, fi))
1815 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817 (btrfs_file_extent_compression(eb, fi) ||
1818 btrfs_file_extent_encryption(eb, fi) ||
1819 btrfs_file_extent_other_encoding(eb, fi)))
1820 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821 if (disk_bytenr > 0)
1822 rec->found_size += num_bytes;
1824 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1826 rec->extent_end = key->offset + num_bytes;
1829 * The data reloc tree will copy full extents into its inode and then
1830 * copy the corresponding csums. Because the extent it copied could be
1831 * a preallocated extent that hasn't been written to yet there may be no
1832 * csums to copy, ergo we won't have csums for our file extent. This is
1833 * ok so just don't bother checking csums if the inode belongs to the
1836 if (disk_bytenr > 0 &&
1837 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1839 if (btrfs_file_extent_compression(eb, fi))
1840 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1842 disk_bytenr += extent_offset;
1844 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1847 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1849 rec->found_csum_item = 1;
1850 if (found < num_bytes)
1851 rec->some_csum_missing = 1;
1852 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1854 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861 struct walk_control *wc)
1863 struct btrfs_key key;
1867 struct cache_tree *inode_cache;
1868 struct shared_node *active_node;
1870 if (wc->root_level == wc->active_node &&
1871 btrfs_root_refs(&root->root_item) == 0)
1874 active_node = wc->nodes[wc->active_node];
1875 inode_cache = &active_node->inode_cache;
1876 nritems = btrfs_header_nritems(eb);
1877 for (i = 0; i < nritems; i++) {
1878 btrfs_item_key_to_cpu(eb, &key, i);
1880 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1882 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1885 if (active_node->current == NULL ||
1886 active_node->current->ino < key.objectid) {
1887 if (active_node->current) {
1888 active_node->current->checked = 1;
1889 maybe_free_inode_rec(inode_cache,
1890 active_node->current);
1892 active_node->current = get_inode_rec(inode_cache,
1894 BUG_ON(IS_ERR(active_node->current));
1897 case BTRFS_DIR_ITEM_KEY:
1898 case BTRFS_DIR_INDEX_KEY:
1899 ret = process_dir_item(root, eb, i, &key, active_node);
1901 case BTRFS_INODE_REF_KEY:
1902 ret = process_inode_ref(eb, i, &key, active_node);
1904 case BTRFS_INODE_EXTREF_KEY:
1905 ret = process_inode_extref(eb, i, &key, active_node);
1907 case BTRFS_INODE_ITEM_KEY:
1908 ret = process_inode_item(eb, i, &key, active_node);
1910 case BTRFS_EXTENT_DATA_KEY:
1911 ret = process_file_extent(root, eb, i, &key,
1921 static void reada_walk_down(struct btrfs_root *root,
1922 struct extent_buffer *node, int slot)
1931 level = btrfs_header_level(node);
1935 nritems = btrfs_header_nritems(node);
1936 blocksize = root->nodesize;
1937 for (i = slot; i < nritems; i++) {
1938 bytenr = btrfs_node_blockptr(node, i);
1939 ptr_gen = btrfs_node_ptr_generation(node, i);
1940 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1945 * Check the child node/leaf by the following condition:
1946 * 1. the first item key of the node/leaf should be the same with the one
1948 * 2. block in parent node should match the child node/leaf.
1949 * 3. generation of parent node and child's header should be consistent.
1951 * Or the child node/leaf pointed by the key in parent is not valid.
1953 * We hope to check leaf owner too, but since subvol may share leaves,
1954 * which makes leaf owner check not so strong, key check should be
1955 * sufficient enough for that case.
1957 static int check_child_node(struct btrfs_root *root,
1958 struct extent_buffer *parent, int slot,
1959 struct extent_buffer *child)
1961 struct btrfs_key parent_key;
1962 struct btrfs_key child_key;
1965 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966 if (btrfs_header_level(child) == 0)
1967 btrfs_item_key_to_cpu(child, &child_key, 0);
1969 btrfs_node_key_to_cpu(child, &child_key, 0);
1971 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1974 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975 parent_key.objectid, parent_key.type, parent_key.offset,
1976 child_key.objectid, child_key.type, child_key.offset);
1978 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1980 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981 btrfs_node_blockptr(parent, slot),
1982 btrfs_header_bytenr(child));
1984 if (btrfs_node_ptr_generation(parent, slot) !=
1985 btrfs_header_generation(child)) {
1987 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988 btrfs_header_generation(child),
1989 btrfs_node_ptr_generation(parent, slot));
1995 u64 bytenr[BTRFS_MAX_LEVEL];
1996 u64 refs[BTRFS_MAX_LEVEL];
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000 struct walk_control *wc, int *level,
2001 struct node_refs *nrefs)
2003 enum btrfs_tree_block_status status;
2006 struct extent_buffer *next;
2007 struct extent_buffer *cur;
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2015 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016 refs = nrefs->refs[*level];
2019 ret = btrfs_lookup_extent_info(NULL, root,
2020 path->nodes[*level]->start,
2021 *level, 1, &refs, NULL);
2026 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027 nrefs->refs[*level] = refs;
2031 ret = enter_shared_node(root, path->nodes[*level]->start,
2039 while (*level >= 0) {
2040 WARN_ON(*level < 0);
2041 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042 cur = path->nodes[*level];
2044 if (btrfs_header_level(cur) != *level)
2047 if (path->slots[*level] >= btrfs_header_nritems(cur))
2050 ret = process_one_leaf(root, cur, wc);
2055 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057 blocksize = root->nodesize;
2059 if (bytenr == nrefs->bytenr[*level - 1]) {
2060 refs = nrefs->refs[*level - 1];
2062 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063 *level - 1, 1, &refs, NULL);
2067 nrefs->bytenr[*level - 1] = bytenr;
2068 nrefs->refs[*level - 1] = refs;
2073 ret = enter_shared_node(root, bytenr, refs,
2076 path->slots[*level]++;
2081 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083 free_extent_buffer(next);
2084 reada_walk_down(root, cur, path->slots[*level]);
2085 next = read_tree_block(root, bytenr, blocksize,
2087 if (!extent_buffer_uptodate(next)) {
2088 struct btrfs_key node_key;
2090 btrfs_node_key_to_cpu(path->nodes[*level],
2092 path->slots[*level]);
2093 btrfs_add_corrupt_extent_record(root->fs_info,
2095 path->nodes[*level]->start,
2096 root->nodesize, *level);
2102 ret = check_child_node(root, cur, path->slots[*level], next);
2108 if (btrfs_is_leaf(next))
2109 status = btrfs_check_leaf(root, NULL, next);
2111 status = btrfs_check_node(root, NULL, next);
2112 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113 free_extent_buffer(next);
2118 *level = *level - 1;
2119 free_extent_buffer(path->nodes[*level]);
2120 path->nodes[*level] = next;
2121 path->slots[*level] = 0;
2124 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129 struct walk_control *wc, int *level)
2132 struct extent_buffer *leaf;
2134 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135 leaf = path->nodes[i];
2136 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2141 free_extent_buffer(path->nodes[*level]);
2142 path->nodes[*level] = NULL;
2143 BUG_ON(*level > wc->active_node);
2144 if (*level == wc->active_node)
2145 leave_shared_node(root, wc, *level);
2152 static int check_root_dir(struct inode_record *rec)
2154 struct inode_backref *backref;
2157 if (!rec->found_inode_item || rec->errors)
2159 if (rec->nlink != 1 || rec->found_link != 0)
2161 if (list_empty(&rec->backrefs))
2163 backref = to_inode_backref(rec->backrefs.next);
2164 if (!backref->found_inode_ref)
2166 if (backref->index != 0 || backref->namelen != 2 ||
2167 memcmp(backref->name, "..", 2))
2169 if (backref->found_dir_index || backref->found_dir_item)
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177 struct btrfs_root *root, struct btrfs_path *path,
2178 struct inode_record *rec)
2180 struct btrfs_inode_item *ei;
2181 struct btrfs_key key;
2184 key.objectid = rec->ino;
2185 key.type = BTRFS_INODE_ITEM_KEY;
2186 key.offset = (u64)-1;
2188 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2192 if (!path->slots[0]) {
2199 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200 if (key.objectid != rec->ino) {
2205 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206 struct btrfs_inode_item);
2207 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208 btrfs_mark_buffer_dirty(path->nodes[0]);
2209 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211 root->root_key.objectid);
2213 btrfs_release_path(path);
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218 struct btrfs_root *root,
2219 struct btrfs_path *path,
2220 struct inode_record *rec)
2224 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225 btrfs_release_path(path);
2227 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root,
2233 struct btrfs_path *path,
2234 struct inode_record *rec)
2236 struct btrfs_inode_item *ei;
2237 struct btrfs_key key;
2240 key.objectid = rec->ino;
2241 key.type = BTRFS_INODE_ITEM_KEY;
2244 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2251 /* Since ret == 0, no need to check anything */
2252 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253 struct btrfs_inode_item);
2254 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255 btrfs_mark_buffer_dirty(path->nodes[0]);
2256 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257 printf("reset nbytes for ino %llu root %llu\n",
2258 rec->ino, root->root_key.objectid);
2260 btrfs_release_path(path);
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265 struct cache_tree *inode_cache,
2266 struct inode_record *rec,
2267 struct inode_backref *backref)
2269 struct btrfs_path *path;
2270 struct btrfs_trans_handle *trans;
2271 struct btrfs_dir_item *dir_item;
2272 struct extent_buffer *leaf;
2273 struct btrfs_key key;
2274 struct btrfs_disk_key disk_key;
2275 struct inode_record *dir_rec;
2276 unsigned long name_ptr;
2277 u32 data_size = sizeof(*dir_item) + backref->namelen;
2280 path = btrfs_alloc_path();
2284 trans = btrfs_start_transaction(root, 1);
2285 if (IS_ERR(trans)) {
2286 btrfs_free_path(path);
2287 return PTR_ERR(trans);
2290 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291 (unsigned long long)rec->ino);
2292 key.objectid = backref->dir;
2293 key.type = BTRFS_DIR_INDEX_KEY;
2294 key.offset = backref->index;
2296 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2299 leaf = path->nodes[0];
2300 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2302 disk_key.objectid = cpu_to_le64(rec->ino);
2303 disk_key.type = BTRFS_INODE_ITEM_KEY;
2304 disk_key.offset = 0;
2306 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308 btrfs_set_dir_data_len(leaf, dir_item, 0);
2309 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310 name_ptr = (unsigned long)(dir_item + 1);
2311 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312 btrfs_mark_buffer_dirty(leaf);
2313 btrfs_free_path(path);
2314 btrfs_commit_transaction(trans, root);
2316 backref->found_dir_index = 1;
2317 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318 BUG_ON(IS_ERR(dir_rec));
2321 dir_rec->found_size += backref->namelen;
2322 if (dir_rec->found_size == dir_rec->isize &&
2323 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325 if (dir_rec->found_size != dir_rec->isize)
2326 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2331 static int delete_dir_index(struct btrfs_root *root,
2332 struct cache_tree *inode_cache,
2333 struct inode_record *rec,
2334 struct inode_backref *backref)
2336 struct btrfs_trans_handle *trans;
2337 struct btrfs_dir_item *di;
2338 struct btrfs_path *path;
2341 path = btrfs_alloc_path();
2345 trans = btrfs_start_transaction(root, 1);
2346 if (IS_ERR(trans)) {
2347 btrfs_free_path(path);
2348 return PTR_ERR(trans);
2352 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353 (unsigned long long)backref->dir,
2354 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355 (unsigned long long)root->objectid);
2357 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358 backref->name, backref->namelen,
2359 backref->index, -1);
2362 btrfs_free_path(path);
2363 btrfs_commit_transaction(trans, root);
2370 ret = btrfs_del_item(trans, root, path);
2372 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2374 btrfs_free_path(path);
2375 btrfs_commit_transaction(trans, root);
2379 static int create_inode_item(struct btrfs_root *root,
2380 struct inode_record *rec,
2381 struct inode_backref *backref, int root_dir)
2383 struct btrfs_trans_handle *trans;
2384 struct btrfs_inode_item inode_item;
2385 time_t now = time(NULL);
2388 trans = btrfs_start_transaction(root, 1);
2389 if (IS_ERR(trans)) {
2390 ret = PTR_ERR(trans);
2394 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395 "be incomplete, please check permissions and content after "
2396 "the fsck completes.\n", (unsigned long long)root->objectid,
2397 (unsigned long long)rec->ino);
2399 memset(&inode_item, 0, sizeof(inode_item));
2400 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2402 btrfs_set_stack_inode_nlink(&inode_item, 1);
2404 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406 if (rec->found_dir_item) {
2407 if (rec->found_file_extent)
2408 fprintf(stderr, "root %llu inode %llu has both a dir "
2409 "item and extents, unsure if it is a dir or a "
2410 "regular file so setting it as a directory\n",
2411 (unsigned long long)root->objectid,
2412 (unsigned long long)rec->ino);
2413 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415 } else if (!rec->found_dir_item) {
2416 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2419 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2428 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2430 btrfs_commit_transaction(trans, root);
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435 struct inode_record *rec,
2436 struct cache_tree *inode_cache,
2439 struct inode_backref *tmp, *backref;
2440 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2444 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445 if (!delete && rec->ino == root_dirid) {
2446 if (!rec->found_inode_item) {
2447 ret = create_inode_item(root, rec, backref, 1);
2454 /* Index 0 for root dir's are special, don't mess with it */
2455 if (rec->ino == root_dirid && backref->index == 0)
2459 ((backref->found_dir_index && !backref->found_inode_ref) ||
2460 (backref->found_dir_index && backref->found_inode_ref &&
2461 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462 ret = delete_dir_index(root, inode_cache, rec, backref);
2466 list_del(&backref->list);
2470 if (!delete && !backref->found_dir_index &&
2471 backref->found_dir_item && backref->found_inode_ref) {
2472 ret = add_missing_dir_index(root, inode_cache, rec,
2477 if (backref->found_dir_item &&
2478 backref->found_dir_index &&
2479 backref->found_dir_index) {
2480 if (!backref->errors &&
2481 backref->found_inode_ref) {
2482 list_del(&backref->list);
2488 if (!delete && (!backref->found_dir_index &&
2489 !backref->found_dir_item &&
2490 backref->found_inode_ref)) {
2491 struct btrfs_trans_handle *trans;
2492 struct btrfs_key location;
2494 ret = check_dir_conflict(root, backref->name,
2500 * let nlink fixing routine to handle it,
2501 * which can do it better.
2506 location.objectid = rec->ino;
2507 location.type = BTRFS_INODE_ITEM_KEY;
2508 location.offset = 0;
2510 trans = btrfs_start_transaction(root, 1);
2511 if (IS_ERR(trans)) {
2512 ret = PTR_ERR(trans);
2515 fprintf(stderr, "adding missing dir index/item pair "
2517 (unsigned long long)rec->ino);
2518 ret = btrfs_insert_dir_item(trans, root, backref->name,
2520 backref->dir, &location,
2521 imode_to_type(rec->imode),
2524 btrfs_commit_transaction(trans, root);
2528 if (!delete && (backref->found_inode_ref &&
2529 backref->found_dir_index &&
2530 backref->found_dir_item &&
2531 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532 !rec->found_inode_item)) {
2533 ret = create_inode_item(root, rec, backref, 0);
2540 return ret ? ret : repaired;
2544 * To determine the file type for nlink/inode_item repair
2546 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547 * Return -ENOENT if file type is not found.
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2551 struct inode_backref *backref;
2553 /* For inode item recovered case */
2554 if (rec->found_inode_item) {
2555 *type = imode_to_type(rec->imode);
2559 list_for_each_entry(backref, &rec->backrefs, list) {
2560 if (backref->found_dir_index || backref->found_dir_item) {
2561 *type = backref->filetype;
2569 * To determine the file name for nlink repair
2571 * Return 0 if file name is found, set name and namelen.
2572 * Return -ENOENT if file name is not found.
2574 static int find_file_name(struct inode_record *rec,
2575 char *name, int *namelen)
2577 struct inode_backref *backref;
2579 list_for_each_entry(backref, &rec->backrefs, list) {
2580 if (backref->found_dir_index || backref->found_dir_item ||
2581 backref->found_inode_ref) {
2582 memcpy(name, backref->name, backref->namelen);
2583 *namelen = backref->namelen;
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592 struct btrfs_root *root,
2593 struct btrfs_path *path,
2594 struct inode_record *rec)
2596 struct inode_backref *backref;
2597 struct inode_backref *tmp;
2598 struct btrfs_key key;
2599 struct btrfs_inode_item *inode_item;
2602 /* We don't believe this either, reset it and iterate backref */
2603 rec->found_link = 0;
2605 /* Remove all backref including the valid ones */
2606 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608 backref->index, backref->name,
2609 backref->namelen, 0);
2613 /* remove invalid backref, so it won't be added back */
2614 if (!(backref->found_dir_index &&
2615 backref->found_dir_item &&
2616 backref->found_inode_ref)) {
2617 list_del(&backref->list);
2624 /* Set nlink to 0 */
2625 key.objectid = rec->ino;
2626 key.type = BTRFS_INODE_ITEM_KEY;
2628 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2635 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636 struct btrfs_inode_item);
2637 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638 btrfs_mark_buffer_dirty(path->nodes[0]);
2639 btrfs_release_path(path);
2642 * Add back valid inode_ref/dir_item/dir_index,
2643 * add_link() will handle the nlink inc, so new nlink must be correct
2645 list_for_each_entry(backref, &rec->backrefs, list) {
2646 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647 backref->name, backref->namelen,
2648 backref->filetype, &backref->index, 1);
2653 btrfs_release_path(path);
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658 struct btrfs_root *root,
2659 struct btrfs_path *path,
2660 struct inode_record *rec)
2662 char *dir_name = "lost+found";
2663 char namebuf[BTRFS_NAME_LEN] = {0};
2668 int name_recovered = 0;
2669 int type_recovered = 0;
2673 * Get file name and type first before these invalid inode ref
2674 * are deleted by remove_all_invalid_backref()
2676 name_recovered = !find_file_name(rec, namebuf, &namelen);
2677 type_recovered = !find_file_type(rec, &type);
2679 if (!name_recovered) {
2680 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681 rec->ino, rec->ino);
2682 namelen = count_digits(rec->ino);
2683 sprintf(namebuf, "%llu", rec->ino);
2686 if (!type_recovered) {
2687 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2689 type = BTRFS_FT_REG_FILE;
2693 ret = reset_nlink(trans, root, path, rec);
2696 "Failed to reset nlink for inode %llu: %s\n",
2697 rec->ino, strerror(-ret));
2701 if (rec->found_link == 0) {
2702 lost_found_ino = root->highest_inode;
2703 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2708 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2712 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713 dir_name, strerror(-ret));
2716 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717 namebuf, namelen, type, NULL, 1);
2719 * Add ".INO" suffix several times to handle case where
2720 * "FILENAME.INO" is already taken by another file.
2722 while (ret == -EEXIST) {
2724 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2726 if (namelen + count_digits(rec->ino) + 1 >
2731 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2733 namelen += count_digits(rec->ino) + 1;
2734 ret = btrfs_add_link(trans, root, rec->ino,
2735 lost_found_ino, namebuf,
2736 namelen, type, NULL, 1);
2740 "Failed to link the inode %llu to %s dir: %s\n",
2741 rec->ino, dir_name, strerror(-ret));
2745 * Just increase the found_link, don't actually add the
2746 * backref. This will make things easier and this inode
2747 * record will be freed after the repair is done.
2748 * So fsck will not report problem about this inode.
2751 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752 namelen, namebuf, dir_name);
2754 printf("Fixed the nlink of inode %llu\n", rec->ino);
2757 * Clear the flag anyway, or we will loop forever for the same inode
2758 * as it will not be removed from the bad inode list and the dead loop
2761 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762 btrfs_release_path(path);
2767 * Check if there is any normal(reg or prealloc) file extent for given
2769 * This is used to determine the file type when neither its dir_index/item or
2770 * inode_item exists.
2772 * This will *NOT* report error, if any error happens, just consider it does
2773 * not have any normal file extent.
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2777 struct btrfs_path *path;
2778 struct btrfs_key key;
2779 struct btrfs_key found_key;
2780 struct btrfs_file_extent_item *fi;
2784 path = btrfs_alloc_path();
2788 key.type = BTRFS_EXTENT_DATA_KEY;
2791 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2796 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797 ret = btrfs_next_leaf(root, path);
2804 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2806 if (found_key.objectid != ino ||
2807 found_key.type != BTRFS_EXTENT_DATA_KEY)
2809 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810 struct btrfs_file_extent_item);
2811 type = btrfs_file_extent_type(path->nodes[0], fi);
2812 if (type != BTRFS_FILE_EXTENT_INLINE) {
2818 btrfs_free_path(path);
2822 static u32 btrfs_type_to_imode(u8 type)
2824 static u32 imode_by_btrfs_type[] = {
2825 [BTRFS_FT_REG_FILE] = S_IFREG,
2826 [BTRFS_FT_DIR] = S_IFDIR,
2827 [BTRFS_FT_CHRDEV] = S_IFCHR,
2828 [BTRFS_FT_BLKDEV] = S_IFBLK,
2829 [BTRFS_FT_FIFO] = S_IFIFO,
2830 [BTRFS_FT_SOCK] = S_IFSOCK,
2831 [BTRFS_FT_SYMLINK] = S_IFLNK,
2834 return imode_by_btrfs_type[(type)];
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root,
2839 struct btrfs_path *path,
2840 struct inode_record *rec)
2844 int type_recovered = 0;
2847 printf("Trying to rebuild inode:%llu\n", rec->ino);
2849 type_recovered = !find_file_type(rec, &filetype);
2852 * Try to determine inode type if type not found.
2854 * For found regular file extent, it must be FILE.
2855 * For found dir_item/index, it must be DIR.
2857 * For undetermined one, use FILE as fallback.
2860 * 1. If found backref(inode_index/item is already handled) to it,
2862 * Need new inode-inode ref structure to allow search for that.
2864 if (!type_recovered) {
2865 if (rec->found_file_extent &&
2866 find_normal_file_extent(root, rec->ino)) {
2868 filetype = BTRFS_FT_REG_FILE;
2869 } else if (rec->found_dir_item) {
2871 filetype = BTRFS_FT_DIR;
2872 } else if (!list_empty(&rec->orphan_extents)) {
2874 filetype = BTRFS_FT_REG_FILE;
2876 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2879 filetype = BTRFS_FT_REG_FILE;
2883 ret = btrfs_new_inode(trans, root, rec->ino,
2884 mode | btrfs_type_to_imode(filetype));
2889 * Here inode rebuild is done, we only rebuild the inode item,
2890 * don't repair the nlink(like move to lost+found).
2891 * That is the job of nlink repair.
2893 * We just fill the record and return
2895 rec->found_dir_item = 1;
2896 rec->imode = mode | btrfs_type_to_imode(filetype);
2898 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899 /* Ensure the inode_nlinks repair function will be called */
2900 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct orphan_data_extent *orphan;
2911 struct orphan_data_extent *tmp;
2914 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2916 * Check for conflicting file extents
2918 * Here we don't know whether the extents is compressed or not,
2919 * so we can only assume it not compressed nor data offset,
2920 * and use its disk_len as extent length.
2922 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923 orphan->offset, orphan->disk_len, 0);
2924 btrfs_release_path(path);
2929 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930 orphan->disk_bytenr, orphan->disk_len);
2931 ret = btrfs_free_extent(trans,
2932 root->fs_info->extent_root,
2933 orphan->disk_bytenr, orphan->disk_len,
2934 0, root->objectid, orphan->objectid,
2939 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940 orphan->offset, orphan->disk_bytenr,
2941 orphan->disk_len, orphan->disk_len);
2945 /* Update file size info */
2946 rec->found_size += orphan->disk_len;
2947 if (rec->found_size == rec->nbytes)
2948 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2950 /* Update the file extent hole info too */
2951 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2955 if (RB_EMPTY_ROOT(&rec->holes))
2956 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2958 list_del(&orphan->list);
2961 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967 struct btrfs_root *root,
2968 struct btrfs_path *path,
2969 struct inode_record *rec)
2971 struct rb_node *node;
2972 struct file_extent_hole *hole;
2976 node = rb_first(&rec->holes);
2980 hole = rb_entry(node, struct file_extent_hole, node);
2981 ret = btrfs_punch_hole(trans, root, rec->ino,
2982 hole->start, hole->len);
2985 ret = del_file_extent_hole(&rec->holes, hole->start,
2989 if (RB_EMPTY_ROOT(&rec->holes))
2990 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991 node = rb_first(&rec->holes);
2993 /* special case for a file losing all its file extent */
2995 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996 round_up(rec->isize, root->sectorsize));
3000 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001 rec->ino, root->objectid);
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3008 struct btrfs_trans_handle *trans;
3009 struct btrfs_path *path;
3012 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013 I_ERR_NO_ORPHAN_ITEM |
3014 I_ERR_LINK_COUNT_WRONG |
3015 I_ERR_NO_INODE_ITEM |
3016 I_ERR_FILE_EXTENT_ORPHAN |
3017 I_ERR_FILE_EXTENT_DISCOUNT|
3018 I_ERR_FILE_NBYTES_WRONG)))
3021 path = btrfs_alloc_path();
3026 * For nlink repair, it may create a dir and add link, so
3027 * 2 for parent(256)'s dir_index and dir_item
3028 * 2 for lost+found dir's inode_item and inode_ref
3029 * 1 for the new inode_ref of the file
3030 * 2 for lost+found dir's dir_index and dir_item for the file
3032 trans = btrfs_start_transaction(root, 7);
3033 if (IS_ERR(trans)) {
3034 btrfs_free_path(path);
3035 return PTR_ERR(trans);
3038 if (rec->errors & I_ERR_NO_INODE_ITEM)
3039 ret = repair_inode_no_item(trans, root, path, rec);
3040 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043 ret = repair_inode_discount_extent(trans, root, path, rec);
3044 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045 ret = repair_inode_isize(trans, root, path, rec);
3046 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047 ret = repair_inode_orphan_item(trans, root, path, rec);
3048 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049 ret = repair_inode_nlinks(trans, root, path, rec);
3050 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051 ret = repair_inode_nbytes(trans, root, path, rec);
3052 btrfs_commit_transaction(trans, root);
3053 btrfs_free_path(path);
3057 static int check_inode_recs(struct btrfs_root *root,
3058 struct cache_tree *inode_cache)
3060 struct cache_extent *cache;
3061 struct ptr_node *node;
3062 struct inode_record *rec;
3063 struct inode_backref *backref;
3068 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3070 if (btrfs_root_refs(&root->root_item) == 0) {
3071 if (!cache_tree_empty(inode_cache))
3072 fprintf(stderr, "warning line %d\n", __LINE__);
3077 * We need to record the highest inode number for later 'lost+found'
3079 * We must select an ino not used/referred by any existing inode, or
3080 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081 * this may cause 'lost+found' dir has wrong nlinks.
3083 cache = last_cache_extent(inode_cache);
3085 node = container_of(cache, struct ptr_node, cache);
3087 if (rec->ino > root->highest_inode)
3088 root->highest_inode = rec->ino;
3092 * We need to repair backrefs first because we could change some of the
3093 * errors in the inode recs.
3095 * We also need to go through and delete invalid backrefs first and then
3096 * add the correct ones second. We do this because we may get EEXIST
3097 * when adding back the correct index because we hadn't yet deleted the
3100 * For example, if we were missing a dir index then the directories
3101 * isize would be wrong, so if we fixed the isize to what we thought it
3102 * would be and then fixed the backref we'd still have a invalid fs, so
3103 * we need to add back the dir index and then check to see if the isize
3108 if (stage == 3 && !err)
3111 cache = search_cache_extent(inode_cache, 0);
3112 while (repair && cache) {
3113 node = container_of(cache, struct ptr_node, cache);
3115 cache = next_cache_extent(cache);
3117 /* Need to free everything up and rescan */
3119 remove_cache_extent(inode_cache, &node->cache);
3121 free_inode_rec(rec);
3125 if (list_empty(&rec->backrefs))
3128 ret = repair_inode_backrefs(root, rec, inode_cache,
3142 rec = get_inode_rec(inode_cache, root_dirid, 0);
3143 BUG_ON(IS_ERR(rec));
3145 ret = check_root_dir(rec);
3147 fprintf(stderr, "root %llu root dir %llu error\n",
3148 (unsigned long long)root->root_key.objectid,
3149 (unsigned long long)root_dirid);
3150 print_inode_error(root, rec);
3155 struct btrfs_trans_handle *trans;
3157 trans = btrfs_start_transaction(root, 1);
3158 if (IS_ERR(trans)) {
3159 err = PTR_ERR(trans);
3164 "root %llu missing its root dir, recreating\n",
3165 (unsigned long long)root->objectid);
3167 ret = btrfs_make_root_dir(trans, root, root_dirid);
3170 btrfs_commit_transaction(trans, root);
3174 fprintf(stderr, "root %llu root dir %llu not found\n",
3175 (unsigned long long)root->root_key.objectid,
3176 (unsigned long long)root_dirid);
3180 cache = search_cache_extent(inode_cache, 0);
3183 node = container_of(cache, struct ptr_node, cache);
3185 remove_cache_extent(inode_cache, &node->cache);
3187 if (rec->ino == root_dirid ||
3188 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189 free_inode_rec(rec);
3193 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194 ret = check_orphan_item(root, rec->ino);
3196 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197 if (can_free_inode_rec(rec)) {
3198 free_inode_rec(rec);
3203 if (!rec->found_inode_item)
3204 rec->errors |= I_ERR_NO_INODE_ITEM;
3205 if (rec->found_link != rec->nlink)
3206 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3208 ret = try_repair_inode(root, rec);
3209 if (ret == 0 && can_free_inode_rec(rec)) {
3210 free_inode_rec(rec);
3216 if (!(repair && ret == 0))
3218 print_inode_error(root, rec);
3219 list_for_each_entry(backref, &rec->backrefs, list) {
3220 if (!backref->found_dir_item)
3221 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222 if (!backref->found_dir_index)
3223 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224 if (!backref->found_inode_ref)
3225 backref->errors |= REF_ERR_NO_INODE_REF;
3226 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227 " namelen %u name %s filetype %d errors %x",
3228 (unsigned long long)backref->dir,
3229 (unsigned long long)backref->index,
3230 backref->namelen, backref->name,
3231 backref->filetype, backref->errors);
3232 print_ref_error(backref->errors);
3234 free_inode_rec(rec);
3236 return (error > 0) ? -1 : 0;
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3242 struct cache_extent *cache;
3243 struct root_record *rec = NULL;
3246 cache = lookup_cache_extent(root_cache, objectid, 1);
3248 rec = container_of(cache, struct root_record, cache);
3250 rec = calloc(1, sizeof(*rec));
3252 return ERR_PTR(-ENOMEM);
3253 rec->objectid = objectid;
3254 INIT_LIST_HEAD(&rec->backrefs);
3255 rec->cache.start = objectid;
3256 rec->cache.size = 1;
3258 ret = insert_cache_extent(root_cache, &rec->cache);
3260 return ERR_PTR(-EEXIST);
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266 u64 ref_root, u64 dir, u64 index,
3267 const char *name, int namelen)
3269 struct root_backref *backref;
3271 list_for_each_entry(backref, &rec->backrefs, list) {
3272 if (backref->ref_root != ref_root || backref->dir != dir ||
3273 backref->namelen != namelen)
3275 if (memcmp(name, backref->name, namelen))
3280 backref = calloc(1, sizeof(*backref) + namelen + 1);
3283 backref->ref_root = ref_root;
3285 backref->index = index;
3286 backref->namelen = namelen;
3287 memcpy(backref->name, name, namelen);
3288 backref->name[namelen] = '\0';
3289 list_add_tail(&backref->list, &rec->backrefs);
3293 static void free_root_record(struct cache_extent *cache)
3295 struct root_record *rec;
3296 struct root_backref *backref;
3298 rec = container_of(cache, struct root_record, cache);
3299 while (!list_empty(&rec->backrefs)) {
3300 backref = to_root_backref(rec->backrefs.next);
3301 list_del(&backref->list);
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3310 static int add_root_backref(struct cache_tree *root_cache,
3311 u64 root_id, u64 ref_root, u64 dir, u64 index,
3312 const char *name, int namelen,
3313 int item_type, int errors)
3315 struct root_record *rec;
3316 struct root_backref *backref;
3318 rec = get_root_rec(root_cache, root_id);
3319 BUG_ON(IS_ERR(rec));
3320 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3323 backref->errors |= errors;
3325 if (item_type != BTRFS_DIR_ITEM_KEY) {
3326 if (backref->found_dir_index || backref->found_back_ref ||
3327 backref->found_forward_ref) {
3328 if (backref->index != index)
3329 backref->errors |= REF_ERR_INDEX_UNMATCH;
3331 backref->index = index;
3335 if (item_type == BTRFS_DIR_ITEM_KEY) {
3336 if (backref->found_forward_ref)
3338 backref->found_dir_item = 1;
3339 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340 backref->found_dir_index = 1;
3341 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342 if (backref->found_forward_ref)
3343 backref->errors |= REF_ERR_DUP_ROOT_REF;
3344 else if (backref->found_dir_item)
3346 backref->found_forward_ref = 1;
3347 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348 if (backref->found_back_ref)
3349 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350 backref->found_back_ref = 1;
3355 if (backref->found_forward_ref && backref->found_dir_item)
3356 backref->reachable = 1;
3360 static int merge_root_recs(struct btrfs_root *root,
3361 struct cache_tree *src_cache,
3362 struct cache_tree *dst_cache)
3364 struct cache_extent *cache;
3365 struct ptr_node *node;
3366 struct inode_record *rec;
3367 struct inode_backref *backref;
3370 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371 free_inode_recs_tree(src_cache);
3376 cache = search_cache_extent(src_cache, 0);
3379 node = container_of(cache, struct ptr_node, cache);
3381 remove_cache_extent(src_cache, &node->cache);
3384 ret = is_child_root(root, root->objectid, rec->ino);
3390 list_for_each_entry(backref, &rec->backrefs, list) {
3391 BUG_ON(backref->found_inode_ref);
3392 if (backref->found_dir_item)
3393 add_root_backref(dst_cache, rec->ino,
3394 root->root_key.objectid, backref->dir,
3395 backref->index, backref->name,
3396 backref->namelen, BTRFS_DIR_ITEM_KEY,
3398 if (backref->found_dir_index)
3399 add_root_backref(dst_cache, rec->ino,
3400 root->root_key.objectid, backref->dir,
3401 backref->index, backref->name,
3402 backref->namelen, BTRFS_DIR_INDEX_KEY,
3406 free_inode_rec(rec);
3413 static int check_root_refs(struct btrfs_root *root,
3414 struct cache_tree *root_cache)
3416 struct root_record *rec;
3417 struct root_record *ref_root;
3418 struct root_backref *backref;
3419 struct cache_extent *cache;
3425 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426 BUG_ON(IS_ERR(rec));
3429 /* fixme: this can not detect circular references */
3432 cache = search_cache_extent(root_cache, 0);
3436 rec = container_of(cache, struct root_record, cache);
3437 cache = next_cache_extent(cache);
3439 if (rec->found_ref == 0)
3442 list_for_each_entry(backref, &rec->backrefs, list) {
3443 if (!backref->reachable)
3446 ref_root = get_root_rec(root_cache,
3448 BUG_ON(IS_ERR(ref_root));
3449 if (ref_root->found_ref > 0)
3452 backref->reachable = 0;
3454 if (rec->found_ref == 0)
3460 cache = search_cache_extent(root_cache, 0);
3464 rec = container_of(cache, struct root_record, cache);
3465 cache = next_cache_extent(cache);
3467 if (rec->found_ref == 0 &&
3468 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470 ret = check_orphan_item(root->fs_info->tree_root,
3476 * If we don't have a root item then we likely just have
3477 * a dir item in a snapshot for this root but no actual
3478 * ref key or anything so it's meaningless.
3480 if (!rec->found_root_item)
3483 fprintf(stderr, "fs tree %llu not referenced\n",
3484 (unsigned long long)rec->objectid);
3488 if (rec->found_ref > 0 && !rec->found_root_item)
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (!backref->found_dir_item)
3492 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493 if (!backref->found_dir_index)
3494 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495 if (!backref->found_back_ref)
3496 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497 if (!backref->found_forward_ref)
3498 backref->errors |= REF_ERR_NO_ROOT_REF;
3499 if (backref->reachable && backref->errors)
3506 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507 (unsigned long long)rec->objectid, rec->found_ref,
3508 rec->found_root_item ? "" : "not found");
3510 list_for_each_entry(backref, &rec->backrefs, list) {
3511 if (!backref->reachable)
3513 if (!backref->errors && rec->found_root_item)
3515 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516 " index %llu namelen %u name %s errors %x\n",
3517 (unsigned long long)backref->ref_root,
3518 (unsigned long long)backref->dir,
3519 (unsigned long long)backref->index,
3520 backref->namelen, backref->name,
3522 print_ref_error(backref->errors);
3525 return errors > 0 ? 1 : 0;
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529 struct btrfs_key *key,
3530 struct cache_tree *root_cache)
3536 struct btrfs_root_ref *ref;
3537 char namebuf[BTRFS_NAME_LEN];
3540 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3542 dirid = btrfs_root_ref_dirid(eb, ref);
3543 index = btrfs_root_ref_sequence(eb, ref);
3544 name_len = btrfs_root_ref_name_len(eb, ref);
3546 if (name_len <= BTRFS_NAME_LEN) {
3550 len = BTRFS_NAME_LEN;
3551 error = REF_ERR_NAME_TOO_LONG;
3553 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3555 if (key->type == BTRFS_ROOT_REF_KEY) {
3556 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557 index, namebuf, len, key->type, error);
3559 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560 index, namebuf, len, key->type, error);
3565 static void free_corrupt_block(struct cache_extent *cache)
3567 struct btrfs_corrupt_block *corrupt;
3569 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3576 * Repair the btree of the given root.
3578 * The fix is to remove the node key in corrupt_blocks cache_tree.
3579 * and rebalance the tree.
3580 * After the fix, the btree should be writeable.
3582 static int repair_btree(struct btrfs_root *root,
3583 struct cache_tree *corrupt_blocks)
3585 struct btrfs_trans_handle *trans;
3586 struct btrfs_path *path;
3587 struct btrfs_corrupt_block *corrupt;
3588 struct cache_extent *cache;
3589 struct btrfs_key key;
3594 if (cache_tree_empty(corrupt_blocks))
3597 path = btrfs_alloc_path();
3601 trans = btrfs_start_transaction(root, 1);
3602 if (IS_ERR(trans)) {
3603 ret = PTR_ERR(trans);
3604 fprintf(stderr, "Error starting transaction: %s\n",
3608 cache = first_cache_extent(corrupt_blocks);
3610 corrupt = container_of(cache, struct btrfs_corrupt_block,
3612 level = corrupt->level;
3613 path->lowest_level = level;
3614 key.objectid = corrupt->key.objectid;
3615 key.type = corrupt->key.type;
3616 key.offset = corrupt->key.offset;
3619 * Here we don't want to do any tree balance, since it may
3620 * cause a balance with corrupted brother leaf/node,
3621 * so ins_len set to 0 here.
3622 * Balance will be done after all corrupt node/leaf is deleted.
3624 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3627 offset = btrfs_node_blockptr(path->nodes[level],
3628 path->slots[level]);
3630 /* Remove the ptr */
3631 ret = btrfs_del_ptr(trans, root, path, level,
3632 path->slots[level]);
3636 * Remove the corresponding extent
3637 * return value is not concerned.
3639 btrfs_release_path(path);
3640 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641 0, root->root_key.objectid,
3643 cache = next_cache_extent(cache);
3646 /* Balance the btree using btrfs_search_slot() */
3647 cache = first_cache_extent(corrupt_blocks);
3649 corrupt = container_of(cache, struct btrfs_corrupt_block,
3651 memcpy(&key, &corrupt->key, sizeof(key));
3652 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3655 /* return will always >0 since it won't find the item */
3657 btrfs_release_path(path);
3658 cache = next_cache_extent(cache);
3661 btrfs_commit_transaction(trans, root);
3663 btrfs_free_path(path);
3667 static int check_fs_root(struct btrfs_root *root,
3668 struct cache_tree *root_cache,
3669 struct walk_control *wc)
3675 struct btrfs_path path;
3676 struct shared_node root_node;
3677 struct root_record *rec;
3678 struct btrfs_root_item *root_item = &root->root_item;
3679 struct cache_tree corrupt_blocks;
3680 struct orphan_data_extent *orphan;
3681 struct orphan_data_extent *tmp;
3682 enum btrfs_tree_block_status status;
3683 struct node_refs nrefs;
3686 * Reuse the corrupt_block cache tree to record corrupted tree block
3688 * Unlike the usage in extent tree check, here we do it in a per
3689 * fs/subvol tree base.
3691 cache_tree_init(&corrupt_blocks);
3692 root->fs_info->corrupt_blocks = &corrupt_blocks;
3694 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695 rec = get_root_rec(root_cache, root->root_key.objectid);
3696 BUG_ON(IS_ERR(rec));
3697 if (btrfs_root_refs(root_item) > 0)
3698 rec->found_root_item = 1;
3701 btrfs_init_path(&path);
3702 memset(&root_node, 0, sizeof(root_node));
3703 cache_tree_init(&root_node.root_cache);
3704 cache_tree_init(&root_node.inode_cache);
3705 memset(&nrefs, 0, sizeof(nrefs));
3707 /* Move the orphan extent record to corresponding inode_record */
3708 list_for_each_entry_safe(orphan, tmp,
3709 &root->orphan_data_extents, list) {
3710 struct inode_record *inode;
3712 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3714 BUG_ON(IS_ERR(inode));
3715 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716 list_move(&orphan->list, &inode->orphan_extents);
3719 level = btrfs_header_level(root->node);
3720 memset(wc->nodes, 0, sizeof(wc->nodes));
3721 wc->nodes[level] = &root_node;
3722 wc->active_node = level;
3723 wc->root_level = level;
3725 /* We may not have checked the root block, lets do that now */
3726 if (btrfs_is_leaf(root->node))
3727 status = btrfs_check_leaf(root, NULL, root->node);
3729 status = btrfs_check_node(root, NULL, root->node);
3730 if (status != BTRFS_TREE_BLOCK_CLEAN)
3733 if (btrfs_root_refs(root_item) > 0 ||
3734 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735 path.nodes[level] = root->node;
3736 extent_buffer_get(root->node);
3737 path.slots[level] = 0;
3739 struct btrfs_key key;
3740 struct btrfs_disk_key found_key;
3742 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743 level = root_item->drop_level;
3744 path.lowest_level = level;
3745 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3748 btrfs_node_key(path.nodes[level], &found_key,
3750 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3751 sizeof(found_key)));
3755 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3761 wret = walk_up_tree(root, &path, wc, &level);
3768 btrfs_release_path(&path);
3770 if (!cache_tree_empty(&corrupt_blocks)) {
3771 struct cache_extent *cache;
3772 struct btrfs_corrupt_block *corrupt;
3774 printf("The following tree block(s) is corrupted in tree %llu:\n",
3775 root->root_key.objectid);
3776 cache = first_cache_extent(&corrupt_blocks);
3778 corrupt = container_of(cache,
3779 struct btrfs_corrupt_block,
3781 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3782 cache->start, corrupt->level,
3783 corrupt->key.objectid, corrupt->key.type,
3784 corrupt->key.offset);
3785 cache = next_cache_extent(cache);
3788 printf("Try to repair the btree for root %llu\n",
3789 root->root_key.objectid);
3790 ret = repair_btree(root, &corrupt_blocks);
3792 fprintf(stderr, "Failed to repair btree: %s\n",
3795 printf("Btree for root %llu is fixed\n",
3796 root->root_key.objectid);
3800 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3804 if (root_node.current) {
3805 root_node.current->checked = 1;
3806 maybe_free_inode_rec(&root_node.inode_cache,
3810 err = check_inode_recs(root, &root_node.inode_cache);
3814 free_corrupt_blocks_tree(&corrupt_blocks);
3815 root->fs_info->corrupt_blocks = NULL;
3816 free_orphan_data_extents(&root->orphan_data_extents);
3820 static int fs_root_objectid(u64 objectid)
3822 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3823 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3825 return is_fstree(objectid);
3828 static int check_fs_roots(struct btrfs_root *root,
3829 struct cache_tree *root_cache)
3831 struct btrfs_path path;
3832 struct btrfs_key key;
3833 struct walk_control wc;
3834 struct extent_buffer *leaf, *tree_node;
3835 struct btrfs_root *tmp_root;
3836 struct btrfs_root *tree_root = root->fs_info->tree_root;
3840 if (ctx.progress_enabled) {
3841 ctx.tp = TASK_FS_ROOTS;
3842 task_start(ctx.info);
3846 * Just in case we made any changes to the extent tree that weren't
3847 * reflected into the free space cache yet.
3850 reset_cached_block_groups(root->fs_info);
3851 memset(&wc, 0, sizeof(wc));
3852 cache_tree_init(&wc.shared);
3853 btrfs_init_path(&path);
3858 key.type = BTRFS_ROOT_ITEM_KEY;
3859 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3864 tree_node = tree_root->node;
3866 if (tree_node != tree_root->node) {
3867 free_root_recs_tree(root_cache);
3868 btrfs_release_path(&path);
3871 leaf = path.nodes[0];
3872 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3873 ret = btrfs_next_leaf(tree_root, &path);
3879 leaf = path.nodes[0];
3881 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3882 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3883 fs_root_objectid(key.objectid)) {
3884 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3885 tmp_root = btrfs_read_fs_root_no_cache(
3886 root->fs_info, &key);
3888 key.offset = (u64)-1;
3889 tmp_root = btrfs_read_fs_root(
3890 root->fs_info, &key);
3892 if (IS_ERR(tmp_root)) {
3896 ret = check_fs_root(tmp_root, root_cache, &wc);
3897 if (ret == -EAGAIN) {
3898 free_root_recs_tree(root_cache);
3899 btrfs_release_path(&path);
3904 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3905 btrfs_free_fs_root(tmp_root);
3906 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3907 key.type == BTRFS_ROOT_BACKREF_KEY) {
3908 process_root_ref(leaf, path.slots[0], &key,
3915 btrfs_release_path(&path);
3917 free_extent_cache_tree(&wc.shared);
3918 if (!cache_tree_empty(&wc.shared))
3919 fprintf(stderr, "warning line %d\n", __LINE__);
3921 task_stop(ctx.info);
3926 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3929 struct extent_backref *back;
3930 struct tree_backref *tback;
3931 struct data_backref *dback;
3935 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3936 back = rb_node_to_extent_backref(n);
3937 if (!back->found_extent_tree) {
3941 if (back->is_data) {
3942 dback = to_data_backref(back);
3943 fprintf(stderr, "Backref %llu %s %llu"
3944 " owner %llu offset %llu num_refs %lu"
3945 " not found in extent tree\n",
3946 (unsigned long long)rec->start,
3947 back->full_backref ?
3949 back->full_backref ?
3950 (unsigned long long)dback->parent:
3951 (unsigned long long)dback->root,
3952 (unsigned long long)dback->owner,
3953 (unsigned long long)dback->offset,
3954 (unsigned long)dback->num_refs);
3956 tback = to_tree_backref(back);
3957 fprintf(stderr, "Backref %llu parent %llu"
3958 " root %llu not found in extent tree\n",
3959 (unsigned long long)rec->start,
3960 (unsigned long long)tback->parent,
3961 (unsigned long long)tback->root);
3964 if (!back->is_data && !back->found_ref) {
3968 tback = to_tree_backref(back);
3969 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3970 (unsigned long long)rec->start,
3971 back->full_backref ? "parent" : "root",
3972 back->full_backref ?
3973 (unsigned long long)tback->parent :
3974 (unsigned long long)tback->root, back);
3976 if (back->is_data) {
3977 dback = to_data_backref(back);
3978 if (dback->found_ref != dback->num_refs) {
3982 fprintf(stderr, "Incorrect local backref count"
3983 " on %llu %s %llu owner %llu"
3984 " offset %llu found %u wanted %u back %p\n",
3985 (unsigned long long)rec->start,
3986 back->full_backref ?
3988 back->full_backref ?
3989 (unsigned long long)dback->parent:
3990 (unsigned long long)dback->root,
3991 (unsigned long long)dback->owner,
3992 (unsigned long long)dback->offset,
3993 dback->found_ref, dback->num_refs, back);
3995 if (dback->disk_bytenr != rec->start) {
3999 fprintf(stderr, "Backref disk bytenr does not"
4000 " match extent record, bytenr=%llu, "
4001 "ref bytenr=%llu\n",
4002 (unsigned long long)rec->start,
4003 (unsigned long long)dback->disk_bytenr);
4006 if (dback->bytes != rec->nr) {
4010 fprintf(stderr, "Backref bytes do not match "
4011 "extent backref, bytenr=%llu, ref "
4012 "bytes=%llu, backref bytes=%llu\n",
4013 (unsigned long long)rec->start,
4014 (unsigned long long)rec->nr,
4015 (unsigned long long)dback->bytes);
4018 if (!back->is_data) {
4021 dback = to_data_backref(back);
4022 found += dback->found_ref;
4025 if (found != rec->refs) {
4029 fprintf(stderr, "Incorrect global backref count "
4030 "on %llu found %llu wanted %llu\n",
4031 (unsigned long long)rec->start,
4032 (unsigned long long)found,
4033 (unsigned long long)rec->refs);
4039 static void __free_one_backref(struct rb_node *node)
4041 struct extent_backref *back = rb_node_to_extent_backref(node);
4046 static void free_all_extent_backrefs(struct extent_record *rec)
4048 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4051 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4052 struct cache_tree *extent_cache)
4054 struct cache_extent *cache;
4055 struct extent_record *rec;
4058 cache = first_cache_extent(extent_cache);
4061 rec = container_of(cache, struct extent_record, cache);
4062 remove_cache_extent(extent_cache, cache);
4063 free_all_extent_backrefs(rec);
4068 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4069 struct extent_record *rec)
4071 if (rec->content_checked && rec->owner_ref_checked &&
4072 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4073 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4074 !rec->bad_full_backref && !rec->crossing_stripes &&
4075 !rec->wrong_chunk_type) {
4076 remove_cache_extent(extent_cache, &rec->cache);
4077 free_all_extent_backrefs(rec);
4078 list_del_init(&rec->list);
4084 static int check_owner_ref(struct btrfs_root *root,
4085 struct extent_record *rec,
4086 struct extent_buffer *buf)
4088 struct extent_backref *node, *tmp;
4089 struct tree_backref *back;
4090 struct btrfs_root *ref_root;
4091 struct btrfs_key key;
4092 struct btrfs_path path;
4093 struct extent_buffer *parent;
4098 rbtree_postorder_for_each_entry_safe(node, tmp,
4099 &rec->backref_tree, node) {
4102 if (!node->found_ref)
4104 if (node->full_backref)
4106 back = to_tree_backref(node);
4107 if (btrfs_header_owner(buf) == back->root)
4110 BUG_ON(rec->is_root);
4112 /* try to find the block by search corresponding fs tree */
4113 key.objectid = btrfs_header_owner(buf);
4114 key.type = BTRFS_ROOT_ITEM_KEY;
4115 key.offset = (u64)-1;
4117 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4118 if (IS_ERR(ref_root))
4121 level = btrfs_header_level(buf);
4123 btrfs_item_key_to_cpu(buf, &key, 0);
4125 btrfs_node_key_to_cpu(buf, &key, 0);
4127 btrfs_init_path(&path);
4128 path.lowest_level = level + 1;
4129 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4133 parent = path.nodes[level + 1];
4134 if (parent && buf->start == btrfs_node_blockptr(parent,
4135 path.slots[level + 1]))
4138 btrfs_release_path(&path);
4139 return found ? 0 : 1;
4142 static int is_extent_tree_record(struct extent_record *rec)
4144 struct extent_backref *ref, *tmp;
4145 struct tree_backref *back;
4148 rbtree_postorder_for_each_entry_safe(ref, tmp,
4149 &rec->backref_tree, node) {
4152 back = to_tree_backref(ref);
4153 if (ref->full_backref)
4155 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4162 static int record_bad_block_io(struct btrfs_fs_info *info,
4163 struct cache_tree *extent_cache,
4166 struct extent_record *rec;
4167 struct cache_extent *cache;
4168 struct btrfs_key key;
4170 cache = lookup_cache_extent(extent_cache, start, len);
4174 rec = container_of(cache, struct extent_record, cache);
4175 if (!is_extent_tree_record(rec))
4178 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4179 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4182 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4183 struct extent_buffer *buf, int slot)
4185 if (btrfs_header_level(buf)) {
4186 struct btrfs_key_ptr ptr1, ptr2;
4188 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4189 sizeof(struct btrfs_key_ptr));
4190 read_extent_buffer(buf, &ptr2,
4191 btrfs_node_key_ptr_offset(slot + 1),
4192 sizeof(struct btrfs_key_ptr));
4193 write_extent_buffer(buf, &ptr1,
4194 btrfs_node_key_ptr_offset(slot + 1),
4195 sizeof(struct btrfs_key_ptr));
4196 write_extent_buffer(buf, &ptr2,
4197 btrfs_node_key_ptr_offset(slot),
4198 sizeof(struct btrfs_key_ptr));
4200 struct btrfs_disk_key key;
4201 btrfs_node_key(buf, &key, 0);
4202 btrfs_fixup_low_keys(root, path, &key,
4203 btrfs_header_level(buf) + 1);
4206 struct btrfs_item *item1, *item2;
4207 struct btrfs_key k1, k2;
4208 char *item1_data, *item2_data;
4209 u32 item1_offset, item2_offset, item1_size, item2_size;
4211 item1 = btrfs_item_nr(slot);
4212 item2 = btrfs_item_nr(slot + 1);
4213 btrfs_item_key_to_cpu(buf, &k1, slot);
4214 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4215 item1_offset = btrfs_item_offset(buf, item1);
4216 item2_offset = btrfs_item_offset(buf, item2);
4217 item1_size = btrfs_item_size(buf, item1);
4218 item2_size = btrfs_item_size(buf, item2);
4220 item1_data = malloc(item1_size);
4223 item2_data = malloc(item2_size);
4229 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4230 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4232 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4233 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4237 btrfs_set_item_offset(buf, item1, item2_offset);
4238 btrfs_set_item_offset(buf, item2, item1_offset);
4239 btrfs_set_item_size(buf, item1, item2_size);
4240 btrfs_set_item_size(buf, item2, item1_size);
4242 path->slots[0] = slot;
4243 btrfs_set_item_key_unsafe(root, path, &k2);
4244 path->slots[0] = slot + 1;
4245 btrfs_set_item_key_unsafe(root, path, &k1);
4250 static int fix_key_order(struct btrfs_trans_handle *trans,
4251 struct btrfs_root *root,
4252 struct btrfs_path *path)
4254 struct extent_buffer *buf;
4255 struct btrfs_key k1, k2;
4257 int level = path->lowest_level;
4260 buf = path->nodes[level];
4261 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4263 btrfs_node_key_to_cpu(buf, &k1, i);
4264 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4266 btrfs_item_key_to_cpu(buf, &k1, i);
4267 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4269 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4271 ret = swap_values(root, path, buf, i);
4274 btrfs_mark_buffer_dirty(buf);
4280 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4281 struct btrfs_root *root,
4282 struct btrfs_path *path,
4283 struct extent_buffer *buf, int slot)
4285 struct btrfs_key key;
4286 int nritems = btrfs_header_nritems(buf);
4288 btrfs_item_key_to_cpu(buf, &key, slot);
4290 /* These are all the keys we can deal with missing. */
4291 if (key.type != BTRFS_DIR_INDEX_KEY &&
4292 key.type != BTRFS_EXTENT_ITEM_KEY &&
4293 key.type != BTRFS_METADATA_ITEM_KEY &&
4294 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4295 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4298 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4299 (unsigned long long)key.objectid, key.type,
4300 (unsigned long long)key.offset, slot, buf->start);
4301 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4302 btrfs_item_nr_offset(slot + 1),
4303 sizeof(struct btrfs_item) *
4304 (nritems - slot - 1));
4305 btrfs_set_header_nritems(buf, nritems - 1);
4307 struct btrfs_disk_key disk_key;
4309 btrfs_item_key(buf, &disk_key, 0);
4310 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4312 btrfs_mark_buffer_dirty(buf);
4316 static int fix_item_offset(struct btrfs_trans_handle *trans,
4317 struct btrfs_root *root,
4318 struct btrfs_path *path)
4320 struct extent_buffer *buf;
4324 /* We should only get this for leaves */
4325 BUG_ON(path->lowest_level);
4326 buf = path->nodes[0];
4328 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4329 unsigned int shift = 0, offset;
4331 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4332 BTRFS_LEAF_DATA_SIZE(root)) {
4333 if (btrfs_item_end_nr(buf, i) >
4334 BTRFS_LEAF_DATA_SIZE(root)) {
4335 ret = delete_bogus_item(trans, root, path,
4339 fprintf(stderr, "item is off the end of the "
4340 "leaf, can't fix\n");
4344 shift = BTRFS_LEAF_DATA_SIZE(root) -
4345 btrfs_item_end_nr(buf, i);
4346 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4347 btrfs_item_offset_nr(buf, i - 1)) {
4348 if (btrfs_item_end_nr(buf, i) >
4349 btrfs_item_offset_nr(buf, i - 1)) {
4350 ret = delete_bogus_item(trans, root, path,
4354 fprintf(stderr, "items overlap, can't fix\n");
4358 shift = btrfs_item_offset_nr(buf, i - 1) -
4359 btrfs_item_end_nr(buf, i);
4364 printf("Shifting item nr %d by %u bytes in block %llu\n",
4365 i, shift, (unsigned long long)buf->start);
4366 offset = btrfs_item_offset_nr(buf, i);
4367 memmove_extent_buffer(buf,
4368 btrfs_leaf_data(buf) + offset + shift,
4369 btrfs_leaf_data(buf) + offset,
4370 btrfs_item_size_nr(buf, i));
4371 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4373 btrfs_mark_buffer_dirty(buf);
4377 * We may have moved things, in which case we want to exit so we don't
4378 * write those changes out. Once we have proper abort functionality in
4379 * progs this can be changed to something nicer.
4386 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4387 * then just return -EIO.
4389 static int try_to_fix_bad_block(struct btrfs_root *root,
4390 struct extent_buffer *buf,
4391 enum btrfs_tree_block_status status)
4393 struct btrfs_trans_handle *trans;
4394 struct ulist *roots;
4395 struct ulist_node *node;
4396 struct btrfs_root *search_root;
4397 struct btrfs_path *path;
4398 struct ulist_iterator iter;
4399 struct btrfs_key root_key, key;
4402 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4403 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4406 path = btrfs_alloc_path();
4410 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4413 btrfs_free_path(path);
4417 ULIST_ITER_INIT(&iter);
4418 while ((node = ulist_next(roots, &iter))) {
4419 root_key.objectid = node->val;
4420 root_key.type = BTRFS_ROOT_ITEM_KEY;
4421 root_key.offset = (u64)-1;
4423 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4430 trans = btrfs_start_transaction(search_root, 0);
4431 if (IS_ERR(trans)) {
4432 ret = PTR_ERR(trans);
4436 path->lowest_level = btrfs_header_level(buf);
4437 path->skip_check_block = 1;
4438 if (path->lowest_level)
4439 btrfs_node_key_to_cpu(buf, &key, 0);
4441 btrfs_item_key_to_cpu(buf, &key, 0);
4442 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4445 btrfs_commit_transaction(trans, search_root);
4448 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4449 ret = fix_key_order(trans, search_root, path);
4450 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4451 ret = fix_item_offset(trans, search_root, path);
4453 btrfs_commit_transaction(trans, search_root);
4456 btrfs_release_path(path);
4457 btrfs_commit_transaction(trans, search_root);
4460 btrfs_free_path(path);
4464 static int check_block(struct btrfs_root *root,
4465 struct cache_tree *extent_cache,
4466 struct extent_buffer *buf, u64 flags)
4468 struct extent_record *rec;
4469 struct cache_extent *cache;
4470 struct btrfs_key key;
4471 enum btrfs_tree_block_status status;
4475 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4478 rec = container_of(cache, struct extent_record, cache);
4479 rec->generation = btrfs_header_generation(buf);
4481 level = btrfs_header_level(buf);
4482 if (btrfs_header_nritems(buf) > 0) {
4485 btrfs_item_key_to_cpu(buf, &key, 0);
4487 btrfs_node_key_to_cpu(buf, &key, 0);
4489 rec->info_objectid = key.objectid;
4491 rec->info_level = level;
4493 if (btrfs_is_leaf(buf))
4494 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4496 status = btrfs_check_node(root, &rec->parent_key, buf);
4498 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4500 status = try_to_fix_bad_block(root, buf, status);
4501 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4503 fprintf(stderr, "bad block %llu\n",
4504 (unsigned long long)buf->start);
4507 * Signal to callers we need to start the scan over
4508 * again since we'll have cowed blocks.
4513 rec->content_checked = 1;
4514 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4515 rec->owner_ref_checked = 1;
4517 ret = check_owner_ref(root, rec, buf);
4519 rec->owner_ref_checked = 1;
4523 maybe_free_extent_rec(extent_cache, rec);
4528 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4529 u64 parent, u64 root)
4531 struct rb_node *node;
4532 struct tree_backref *back = NULL;
4533 struct tree_backref match = {
4540 match.parent = parent;
4541 match.node.full_backref = 1;
4546 node = rb_search(&rec->backref_tree, &match.node.node,
4547 (rb_compare_keys)compare_extent_backref, NULL);
4549 back = to_tree_backref(rb_node_to_extent_backref(node));
4554 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4555 u64 parent, u64 root)
4557 struct tree_backref *ref = malloc(sizeof(*ref));
4561 memset(&ref->node, 0, sizeof(ref->node));
4563 ref->parent = parent;
4564 ref->node.full_backref = 1;
4567 ref->node.full_backref = 0;
4569 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4574 static struct data_backref *find_data_backref(struct extent_record *rec,
4575 u64 parent, u64 root,
4576 u64 owner, u64 offset,
4578 u64 disk_bytenr, u64 bytes)
4580 struct rb_node *node;
4581 struct data_backref *back = NULL;
4582 struct data_backref match = {
4589 .found_ref = found_ref,
4590 .disk_bytenr = disk_bytenr,
4594 match.parent = parent;
4595 match.node.full_backref = 1;
4600 node = rb_search(&rec->backref_tree, &match.node.node,
4601 (rb_compare_keys)compare_extent_backref, NULL);
4603 back = to_data_backref(rb_node_to_extent_backref(node));
4608 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4609 u64 parent, u64 root,
4610 u64 owner, u64 offset,
4613 struct data_backref *ref = malloc(sizeof(*ref));
4617 memset(&ref->node, 0, sizeof(ref->node));
4618 ref->node.is_data = 1;
4621 ref->parent = parent;
4624 ref->node.full_backref = 1;
4628 ref->offset = offset;
4629 ref->node.full_backref = 0;
4631 ref->bytes = max_size;
4634 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4635 if (max_size > rec->max_size)
4636 rec->max_size = max_size;
4640 /* Check if the type of extent matches with its chunk */
4641 static void check_extent_type(struct extent_record *rec)
4643 struct btrfs_block_group_cache *bg_cache;
4645 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4649 /* data extent, check chunk directly*/
4650 if (!rec->metadata) {
4651 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4652 rec->wrong_chunk_type = 1;
4656 /* metadata extent, check the obvious case first */
4657 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4658 BTRFS_BLOCK_GROUP_METADATA))) {
4659 rec->wrong_chunk_type = 1;
4664 * Check SYSTEM extent, as it's also marked as metadata, we can only
4665 * make sure it's a SYSTEM extent by its backref
4667 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4668 struct extent_backref *node;
4669 struct tree_backref *tback;
4672 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4673 if (node->is_data) {
4674 /* tree block shouldn't have data backref */
4675 rec->wrong_chunk_type = 1;
4678 tback = container_of(node, struct tree_backref, node);
4680 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4681 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4683 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4684 if (!(bg_cache->flags & bg_type))
4685 rec->wrong_chunk_type = 1;
4690 * Allocate a new extent record, fill default values from @tmpl and insert int
4691 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4692 * the cache, otherwise it fails.
4694 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4695 struct extent_record *tmpl)
4697 struct extent_record *rec;
4700 rec = malloc(sizeof(*rec));
4703 rec->start = tmpl->start;
4704 rec->max_size = tmpl->max_size;
4705 rec->nr = max(tmpl->nr, tmpl->max_size);
4706 rec->found_rec = tmpl->found_rec;
4707 rec->content_checked = tmpl->content_checked;
4708 rec->owner_ref_checked = tmpl->owner_ref_checked;
4709 rec->num_duplicates = 0;
4710 rec->metadata = tmpl->metadata;
4711 rec->flag_block_full_backref = FLAG_UNSET;
4712 rec->bad_full_backref = 0;
4713 rec->crossing_stripes = 0;
4714 rec->wrong_chunk_type = 0;
4715 rec->is_root = tmpl->is_root;
4716 rec->refs = tmpl->refs;
4717 rec->extent_item_refs = tmpl->extent_item_refs;
4718 rec->parent_generation = tmpl->parent_generation;
4719 INIT_LIST_HEAD(&rec->backrefs);
4720 INIT_LIST_HEAD(&rec->dups);
4721 INIT_LIST_HEAD(&rec->list);
4722 rec->backref_tree = RB_ROOT;
4723 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4724 rec->cache.start = tmpl->start;
4725 rec->cache.size = tmpl->nr;
4726 ret = insert_cache_extent(extent_cache, &rec->cache);
4728 bytes_used += rec->nr;
4731 rec->crossing_stripes = check_crossing_stripes(rec->start,
4732 global_info->tree_root->nodesize);
4733 check_extent_type(rec);
4738 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4740 * - refs - if found, increase refs
4741 * - is_root - if found, set
4742 * - content_checked - if found, set
4743 * - owner_ref_checked - if found, set
4745 * If not found, create a new one, initialize and insert.
4747 static int add_extent_rec(struct cache_tree *extent_cache,
4748 struct extent_record *tmpl)
4750 struct extent_record *rec;
4751 struct cache_extent *cache;
4755 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4757 rec = container_of(cache, struct extent_record, cache);
4761 rec->nr = max(tmpl->nr, tmpl->max_size);
4764 * We need to make sure to reset nr to whatever the extent
4765 * record says was the real size, this way we can compare it to
4768 if (tmpl->found_rec) {
4769 if (tmpl->start != rec->start || rec->found_rec) {
4770 struct extent_record *tmp;
4773 if (list_empty(&rec->list))
4774 list_add_tail(&rec->list,
4775 &duplicate_extents);
4778 * We have to do this song and dance in case we
4779 * find an extent record that falls inside of
4780 * our current extent record but does not have
4781 * the same objectid.
4783 tmp = malloc(sizeof(*tmp));
4786 tmp->start = tmpl->start;
4787 tmp->max_size = tmpl->max_size;
4790 tmp->metadata = tmpl->metadata;
4791 tmp->extent_item_refs = tmpl->extent_item_refs;
4792 INIT_LIST_HEAD(&tmp->list);
4793 list_add_tail(&tmp->list, &rec->dups);
4794 rec->num_duplicates++;
4801 if (tmpl->extent_item_refs && !dup) {
4802 if (rec->extent_item_refs) {
4803 fprintf(stderr, "block %llu rec "
4804 "extent_item_refs %llu, passed %llu\n",
4805 (unsigned long long)tmpl->start,
4806 (unsigned long long)
4807 rec->extent_item_refs,
4808 (unsigned long long)tmpl->extent_item_refs);
4810 rec->extent_item_refs = tmpl->extent_item_refs;
4814 if (tmpl->content_checked)
4815 rec->content_checked = 1;
4816 if (tmpl->owner_ref_checked)
4817 rec->owner_ref_checked = 1;
4818 memcpy(&rec->parent_key, &tmpl->parent_key,
4819 sizeof(tmpl->parent_key));
4820 if (tmpl->parent_generation)
4821 rec->parent_generation = tmpl->parent_generation;
4822 if (rec->max_size < tmpl->max_size)
4823 rec->max_size = tmpl->max_size;
4826 * A metadata extent can't cross stripe_len boundary, otherwise
4827 * kernel scrub won't be able to handle it.
4828 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4832 rec->crossing_stripes = check_crossing_stripes(
4833 rec->start, global_info->tree_root->nodesize);
4834 check_extent_type(rec);
4835 maybe_free_extent_rec(extent_cache, rec);
4839 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4844 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4845 u64 parent, u64 root, int found_ref)
4847 struct extent_record *rec;
4848 struct tree_backref *back;
4849 struct cache_extent *cache;
4851 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4853 struct extent_record tmpl;
4855 memset(&tmpl, 0, sizeof(tmpl));
4856 tmpl.start = bytenr;
4860 add_extent_rec_nolookup(extent_cache, &tmpl);
4862 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4867 rec = container_of(cache, struct extent_record, cache);
4868 if (rec->start != bytenr) {
4872 back = find_tree_backref(rec, parent, root);
4874 back = alloc_tree_backref(rec, parent, root);
4879 if (back->node.found_ref) {
4880 fprintf(stderr, "Extent back ref already exists "
4881 "for %llu parent %llu root %llu \n",
4882 (unsigned long long)bytenr,
4883 (unsigned long long)parent,
4884 (unsigned long long)root);
4886 back->node.found_ref = 1;
4888 if (back->node.found_extent_tree) {
4889 fprintf(stderr, "Extent back ref already exists "
4890 "for %llu parent %llu root %llu \n",
4891 (unsigned long long)bytenr,
4892 (unsigned long long)parent,
4893 (unsigned long long)root);
4895 back->node.found_extent_tree = 1;
4897 check_extent_type(rec);
4898 maybe_free_extent_rec(extent_cache, rec);
4902 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4903 u64 parent, u64 root, u64 owner, u64 offset,
4904 u32 num_refs, int found_ref, u64 max_size)
4906 struct extent_record *rec;
4907 struct data_backref *back;
4908 struct cache_extent *cache;
4910 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4912 struct extent_record tmpl;
4914 memset(&tmpl, 0, sizeof(tmpl));
4915 tmpl.start = bytenr;
4917 tmpl.max_size = max_size;
4919 add_extent_rec_nolookup(extent_cache, &tmpl);
4921 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4926 rec = container_of(cache, struct extent_record, cache);
4927 if (rec->max_size < max_size)
4928 rec->max_size = max_size;
4931 * If found_ref is set then max_size is the real size and must match the
4932 * existing refs. So if we have already found a ref then we need to
4933 * make sure that this ref matches the existing one, otherwise we need
4934 * to add a new backref so we can notice that the backrefs don't match
4935 * and we need to figure out who is telling the truth. This is to
4936 * account for that awful fsync bug I introduced where we'd end up with
4937 * a btrfs_file_extent_item that would have its length include multiple
4938 * prealloc extents or point inside of a prealloc extent.
4940 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4943 back = alloc_data_backref(rec, parent, root, owner, offset,
4949 BUG_ON(num_refs != 1);
4950 if (back->node.found_ref)
4951 BUG_ON(back->bytes != max_size);
4952 back->node.found_ref = 1;
4953 back->found_ref += 1;
4954 back->bytes = max_size;
4955 back->disk_bytenr = bytenr;
4957 rec->content_checked = 1;
4958 rec->owner_ref_checked = 1;
4960 if (back->node.found_extent_tree) {
4961 fprintf(stderr, "Extent back ref already exists "
4962 "for %llu parent %llu root %llu "
4963 "owner %llu offset %llu num_refs %lu\n",
4964 (unsigned long long)bytenr,
4965 (unsigned long long)parent,
4966 (unsigned long long)root,
4967 (unsigned long long)owner,
4968 (unsigned long long)offset,
4969 (unsigned long)num_refs);
4971 back->num_refs = num_refs;
4972 back->node.found_extent_tree = 1;
4974 maybe_free_extent_rec(extent_cache, rec);
4978 static int add_pending(struct cache_tree *pending,
4979 struct cache_tree *seen, u64 bytenr, u32 size)
4982 ret = add_cache_extent(seen, bytenr, size);
4985 add_cache_extent(pending, bytenr, size);
4989 static int pick_next_pending(struct cache_tree *pending,
4990 struct cache_tree *reada,
4991 struct cache_tree *nodes,
4992 u64 last, struct block_info *bits, int bits_nr,
4995 unsigned long node_start = last;
4996 struct cache_extent *cache;
4999 cache = search_cache_extent(reada, 0);
5001 bits[0].start = cache->start;
5002 bits[0].size = cache->size;
5007 if (node_start > 32768)
5008 node_start -= 32768;
5010 cache = search_cache_extent(nodes, node_start);
5012 cache = search_cache_extent(nodes, 0);
5015 cache = search_cache_extent(pending, 0);
5020 bits[ret].start = cache->start;
5021 bits[ret].size = cache->size;
5022 cache = next_cache_extent(cache);
5024 } while (cache && ret < bits_nr);
5030 bits[ret].start = cache->start;
5031 bits[ret].size = cache->size;
5032 cache = next_cache_extent(cache);
5034 } while (cache && ret < bits_nr);
5036 if (bits_nr - ret > 8) {
5037 u64 lookup = bits[0].start + bits[0].size;
5038 struct cache_extent *next;
5039 next = search_cache_extent(pending, lookup);
5041 if (next->start - lookup > 32768)
5043 bits[ret].start = next->start;
5044 bits[ret].size = next->size;
5045 lookup = next->start + next->size;
5049 next = next_cache_extent(next);
5057 static void free_chunk_record(struct cache_extent *cache)
5059 struct chunk_record *rec;
5061 rec = container_of(cache, struct chunk_record, cache);
5062 list_del_init(&rec->list);
5063 list_del_init(&rec->dextents);
5067 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5069 cache_tree_free_extents(chunk_cache, free_chunk_record);
5072 static void free_device_record(struct rb_node *node)
5074 struct device_record *rec;
5076 rec = container_of(node, struct device_record, node);
5080 FREE_RB_BASED_TREE(device_cache, free_device_record);
5082 int insert_block_group_record(struct block_group_tree *tree,
5083 struct block_group_record *bg_rec)
5087 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5091 list_add_tail(&bg_rec->list, &tree->block_groups);
5095 static void free_block_group_record(struct cache_extent *cache)
5097 struct block_group_record *rec;
5099 rec = container_of(cache, struct block_group_record, cache);
5100 list_del_init(&rec->list);
5104 void free_block_group_tree(struct block_group_tree *tree)
5106 cache_tree_free_extents(&tree->tree, free_block_group_record);
5109 int insert_device_extent_record(struct device_extent_tree *tree,
5110 struct device_extent_record *de_rec)
5115 * Device extent is a bit different from the other extents, because
5116 * the extents which belong to the different devices may have the
5117 * same start and size, so we need use the special extent cache
5118 * search/insert functions.
5120 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5124 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5125 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5129 static void free_device_extent_record(struct cache_extent *cache)
5131 struct device_extent_record *rec;
5133 rec = container_of(cache, struct device_extent_record, cache);
5134 if (!list_empty(&rec->chunk_list))
5135 list_del_init(&rec->chunk_list);
5136 if (!list_empty(&rec->device_list))
5137 list_del_init(&rec->device_list);
5141 void free_device_extent_tree(struct device_extent_tree *tree)
5143 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5147 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5148 struct extent_buffer *leaf, int slot)
5150 struct btrfs_extent_ref_v0 *ref0;
5151 struct btrfs_key key;
5153 btrfs_item_key_to_cpu(leaf, &key, slot);
5154 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5155 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5156 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5158 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5159 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5165 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5166 struct btrfs_key *key,
5169 struct btrfs_chunk *ptr;
5170 struct chunk_record *rec;
5173 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5174 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5176 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5178 fprintf(stderr, "memory allocation failed\n");
5182 INIT_LIST_HEAD(&rec->list);
5183 INIT_LIST_HEAD(&rec->dextents);
5186 rec->cache.start = key->offset;
5187 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5189 rec->generation = btrfs_header_generation(leaf);
5191 rec->objectid = key->objectid;
5192 rec->type = key->type;
5193 rec->offset = key->offset;
5195 rec->length = rec->cache.size;
5196 rec->owner = btrfs_chunk_owner(leaf, ptr);
5197 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5198 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5199 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5200 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5201 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5202 rec->num_stripes = num_stripes;
5203 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5205 for (i = 0; i < rec->num_stripes; ++i) {
5206 rec->stripes[i].devid =
5207 btrfs_stripe_devid_nr(leaf, ptr, i);
5208 rec->stripes[i].offset =
5209 btrfs_stripe_offset_nr(leaf, ptr, i);
5210 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5211 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5218 static int process_chunk_item(struct cache_tree *chunk_cache,
5219 struct btrfs_key *key, struct extent_buffer *eb,
5222 struct chunk_record *rec;
5225 rec = btrfs_new_chunk_record(eb, key, slot);
5226 ret = insert_cache_extent(chunk_cache, &rec->cache);
5228 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5229 rec->offset, rec->length);
5236 static int process_device_item(struct rb_root *dev_cache,
5237 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5239 struct btrfs_dev_item *ptr;
5240 struct device_record *rec;
5243 ptr = btrfs_item_ptr(eb,
5244 slot, struct btrfs_dev_item);
5246 rec = malloc(sizeof(*rec));
5248 fprintf(stderr, "memory allocation failed\n");
5252 rec->devid = key->offset;
5253 rec->generation = btrfs_header_generation(eb);
5255 rec->objectid = key->objectid;
5256 rec->type = key->type;
5257 rec->offset = key->offset;
5259 rec->devid = btrfs_device_id(eb, ptr);
5260 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5261 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5263 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5265 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5272 struct block_group_record *
5273 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5276 struct btrfs_block_group_item *ptr;
5277 struct block_group_record *rec;
5279 rec = calloc(1, sizeof(*rec));
5281 fprintf(stderr, "memory allocation failed\n");
5285 rec->cache.start = key->objectid;
5286 rec->cache.size = key->offset;
5288 rec->generation = btrfs_header_generation(leaf);
5290 rec->objectid = key->objectid;
5291 rec->type = key->type;
5292 rec->offset = key->offset;
5294 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5295 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5297 INIT_LIST_HEAD(&rec->list);
5302 static int process_block_group_item(struct block_group_tree *block_group_cache,
5303 struct btrfs_key *key,
5304 struct extent_buffer *eb, int slot)
5306 struct block_group_record *rec;
5309 rec = btrfs_new_block_group_record(eb, key, slot);
5310 ret = insert_block_group_record(block_group_cache, rec);
5312 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5313 rec->objectid, rec->offset);
5320 struct device_extent_record *
5321 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5322 struct btrfs_key *key, int slot)
5324 struct device_extent_record *rec;
5325 struct btrfs_dev_extent *ptr;
5327 rec = calloc(1, sizeof(*rec));
5329 fprintf(stderr, "memory allocation failed\n");
5333 rec->cache.objectid = key->objectid;
5334 rec->cache.start = key->offset;
5336 rec->generation = btrfs_header_generation(leaf);
5338 rec->objectid = key->objectid;
5339 rec->type = key->type;
5340 rec->offset = key->offset;
5342 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5343 rec->chunk_objecteid =
5344 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5346 btrfs_dev_extent_chunk_offset(leaf, ptr);
5347 rec->length = btrfs_dev_extent_length(leaf, ptr);
5348 rec->cache.size = rec->length;
5350 INIT_LIST_HEAD(&rec->chunk_list);
5351 INIT_LIST_HEAD(&rec->device_list);
5357 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5358 struct btrfs_key *key, struct extent_buffer *eb,
5361 struct device_extent_record *rec;
5364 rec = btrfs_new_device_extent_record(eb, key, slot);
5365 ret = insert_device_extent_record(dev_extent_cache, rec);
5368 "Device extent[%llu, %llu, %llu] existed.\n",
5369 rec->objectid, rec->offset, rec->length);
5376 static int process_extent_item(struct btrfs_root *root,
5377 struct cache_tree *extent_cache,
5378 struct extent_buffer *eb, int slot)
5380 struct btrfs_extent_item *ei;
5381 struct btrfs_extent_inline_ref *iref;
5382 struct btrfs_extent_data_ref *dref;
5383 struct btrfs_shared_data_ref *sref;
5384 struct btrfs_key key;
5385 struct extent_record tmpl;
5389 u32 item_size = btrfs_item_size_nr(eb, slot);
5395 btrfs_item_key_to_cpu(eb, &key, slot);
5397 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5399 num_bytes = root->nodesize;
5401 num_bytes = key.offset;
5404 if (item_size < sizeof(*ei)) {
5405 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5406 struct btrfs_extent_item_v0 *ei0;
5407 BUG_ON(item_size != sizeof(*ei0));
5408 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5409 refs = btrfs_extent_refs_v0(eb, ei0);
5413 memset(&tmpl, 0, sizeof(tmpl));
5414 tmpl.start = key.objectid;
5415 tmpl.nr = num_bytes;
5416 tmpl.extent_item_refs = refs;
5417 tmpl.metadata = metadata;
5419 tmpl.max_size = num_bytes;
5421 return add_extent_rec(extent_cache, &tmpl);
5424 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5425 refs = btrfs_extent_refs(eb, ei);
5426 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5431 memset(&tmpl, 0, sizeof(tmpl));
5432 tmpl.start = key.objectid;
5433 tmpl.nr = num_bytes;
5434 tmpl.extent_item_refs = refs;
5435 tmpl.metadata = metadata;
5437 tmpl.max_size = num_bytes;
5438 add_extent_rec(extent_cache, &tmpl);
5440 ptr = (unsigned long)(ei + 1);
5441 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5442 key.type == BTRFS_EXTENT_ITEM_KEY)
5443 ptr += sizeof(struct btrfs_tree_block_info);
5445 end = (unsigned long)ei + item_size;
5447 iref = (struct btrfs_extent_inline_ref *)ptr;
5448 type = btrfs_extent_inline_ref_type(eb, iref);
5449 offset = btrfs_extent_inline_ref_offset(eb, iref);
5451 case BTRFS_TREE_BLOCK_REF_KEY:
5452 add_tree_backref(extent_cache, key.objectid,
5455 case BTRFS_SHARED_BLOCK_REF_KEY:
5456 add_tree_backref(extent_cache, key.objectid,
5459 case BTRFS_EXTENT_DATA_REF_KEY:
5460 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5461 add_data_backref(extent_cache, key.objectid, 0,
5462 btrfs_extent_data_ref_root(eb, dref),
5463 btrfs_extent_data_ref_objectid(eb,
5465 btrfs_extent_data_ref_offset(eb, dref),
5466 btrfs_extent_data_ref_count(eb, dref),
5469 case BTRFS_SHARED_DATA_REF_KEY:
5470 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5471 add_data_backref(extent_cache, key.objectid, offset,
5473 btrfs_shared_data_ref_count(eb, sref),
5477 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5478 key.objectid, key.type, num_bytes);
5481 ptr += btrfs_extent_inline_ref_size(type);
5488 static int check_cache_range(struct btrfs_root *root,
5489 struct btrfs_block_group_cache *cache,
5490 u64 offset, u64 bytes)
5492 struct btrfs_free_space *entry;
5498 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5499 bytenr = btrfs_sb_offset(i);
5500 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5501 cache->key.objectid, bytenr, 0,
5502 &logical, &nr, &stripe_len);
5507 if (logical[nr] + stripe_len <= offset)
5509 if (offset + bytes <= logical[nr])
5511 if (logical[nr] == offset) {
5512 if (stripe_len >= bytes) {
5516 bytes -= stripe_len;
5517 offset += stripe_len;
5518 } else if (logical[nr] < offset) {
5519 if (logical[nr] + stripe_len >=
5524 bytes = (offset + bytes) -
5525 (logical[nr] + stripe_len);
5526 offset = logical[nr] + stripe_len;
5529 * Could be tricky, the super may land in the
5530 * middle of the area we're checking. First
5531 * check the easiest case, it's at the end.
5533 if (logical[nr] + stripe_len >=
5535 bytes = logical[nr] - offset;
5539 /* Check the left side */
5540 ret = check_cache_range(root, cache,
5542 logical[nr] - offset);
5548 /* Now we continue with the right side */
5549 bytes = (offset + bytes) -
5550 (logical[nr] + stripe_len);
5551 offset = logical[nr] + stripe_len;
5558 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5560 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5561 offset, offset+bytes);
5565 if (entry->offset != offset) {
5566 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5571 if (entry->bytes != bytes) {
5572 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5573 bytes, entry->bytes, offset);
5577 unlink_free_space(cache->free_space_ctl, entry);
5582 static int verify_space_cache(struct btrfs_root *root,
5583 struct btrfs_block_group_cache *cache)
5585 struct btrfs_path *path;
5586 struct extent_buffer *leaf;
5587 struct btrfs_key key;
5591 path = btrfs_alloc_path();
5595 root = root->fs_info->extent_root;
5597 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5599 key.objectid = last;
5601 key.type = BTRFS_EXTENT_ITEM_KEY;
5603 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5608 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5609 ret = btrfs_next_leaf(root, path);
5617 leaf = path->nodes[0];
5618 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5619 if (key.objectid >= cache->key.offset + cache->key.objectid)
5621 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5622 key.type != BTRFS_METADATA_ITEM_KEY) {
5627 if (last == key.objectid) {
5628 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5629 last = key.objectid + key.offset;
5631 last = key.objectid + root->nodesize;
5636 ret = check_cache_range(root, cache, last,
5637 key.objectid - last);
5640 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5641 last = key.objectid + key.offset;
5643 last = key.objectid + root->nodesize;
5647 if (last < cache->key.objectid + cache->key.offset)
5648 ret = check_cache_range(root, cache, last,
5649 cache->key.objectid +
5650 cache->key.offset - last);
5653 btrfs_free_path(path);
5656 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5657 fprintf(stderr, "There are still entries left in the space "
5665 static int check_space_cache(struct btrfs_root *root)
5667 struct btrfs_block_group_cache *cache;
5668 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5672 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5673 btrfs_super_generation(root->fs_info->super_copy) !=
5674 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5675 printf("cache and super generation don't match, space cache "
5676 "will be invalidated\n");
5680 if (ctx.progress_enabled) {
5681 ctx.tp = TASK_FREE_SPACE;
5682 task_start(ctx.info);
5686 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5690 start = cache->key.objectid + cache->key.offset;
5691 if (!cache->free_space_ctl) {
5692 if (btrfs_init_free_space_ctl(cache,
5693 root->sectorsize)) {
5698 btrfs_remove_free_space_cache(cache);
5701 if (btrfs_fs_compat_ro(root->fs_info,
5702 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5703 ret = exclude_super_stripes(root, cache);
5705 fprintf(stderr, "could not exclude super stripes: %s\n",
5710 ret = load_free_space_tree(root->fs_info, cache);
5711 free_excluded_extents(root, cache);
5713 fprintf(stderr, "could not load free space tree: %s\n",
5720 ret = load_free_space_cache(root->fs_info, cache);
5725 ret = verify_space_cache(root, cache);
5727 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5728 cache->key.objectid);
5733 task_stop(ctx.info);
5735 return error ? -EINVAL : 0;
5738 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5739 u64 num_bytes, unsigned long leaf_offset,
5740 struct extent_buffer *eb) {
5743 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5745 unsigned long csum_offset;
5749 u64 data_checked = 0;
5755 if (num_bytes % root->sectorsize)
5758 data = malloc(num_bytes);
5762 while (offset < num_bytes) {
5765 read_len = num_bytes - offset;
5766 /* read as much space once a time */
5767 ret = read_extent_data(root, data + offset,
5768 bytenr + offset, &read_len, mirror);
5772 /* verify every 4k data's checksum */
5773 while (data_checked < read_len) {
5775 tmp = offset + data_checked;
5777 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5778 csum, root->sectorsize);
5779 btrfs_csum_final(csum, (char *)&csum);
5781 csum_offset = leaf_offset +
5782 tmp / root->sectorsize * csum_size;
5783 read_extent_buffer(eb, (char *)&csum_expected,
5784 csum_offset, csum_size);
5785 /* try another mirror */
5786 if (csum != csum_expected) {
5787 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5788 mirror, bytenr + tmp,
5789 csum, csum_expected);
5790 num_copies = btrfs_num_copies(
5791 &root->fs_info->mapping_tree,
5793 if (mirror < num_copies - 1) {
5798 data_checked += root->sectorsize;
5807 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5810 struct btrfs_path *path;
5811 struct extent_buffer *leaf;
5812 struct btrfs_key key;
5815 path = btrfs_alloc_path();
5817 fprintf(stderr, "Error allocating path\n");
5821 key.objectid = bytenr;
5822 key.type = BTRFS_EXTENT_ITEM_KEY;
5823 key.offset = (u64)-1;
5826 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5829 fprintf(stderr, "Error looking up extent record %d\n", ret);
5830 btrfs_free_path(path);
5833 if (path->slots[0] > 0) {
5836 ret = btrfs_prev_leaf(root, path);
5839 } else if (ret > 0) {
5846 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5849 * Block group items come before extent items if they have the same
5850 * bytenr, so walk back one more just in case. Dear future traveller,
5851 * first congrats on mastering time travel. Now if it's not too much
5852 * trouble could you go back to 2006 and tell Chris to make the
5853 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5854 * EXTENT_ITEM_KEY please?
5856 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5857 if (path->slots[0] > 0) {
5860 ret = btrfs_prev_leaf(root, path);
5863 } else if (ret > 0) {
5868 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5872 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5873 ret = btrfs_next_leaf(root, path);
5875 fprintf(stderr, "Error going to next leaf "
5877 btrfs_free_path(path);
5883 leaf = path->nodes[0];
5884 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5885 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5889 if (key.objectid + key.offset < bytenr) {
5893 if (key.objectid > bytenr + num_bytes)
5896 if (key.objectid == bytenr) {
5897 if (key.offset >= num_bytes) {
5901 num_bytes -= key.offset;
5902 bytenr += key.offset;
5903 } else if (key.objectid < bytenr) {
5904 if (key.objectid + key.offset >= bytenr + num_bytes) {
5908 num_bytes = (bytenr + num_bytes) -
5909 (key.objectid + key.offset);
5910 bytenr = key.objectid + key.offset;
5912 if (key.objectid + key.offset < bytenr + num_bytes) {
5913 u64 new_start = key.objectid + key.offset;
5914 u64 new_bytes = bytenr + num_bytes - new_start;
5917 * Weird case, the extent is in the middle of
5918 * our range, we'll have to search one side
5919 * and then the other. Not sure if this happens
5920 * in real life, but no harm in coding it up
5921 * anyway just in case.
5923 btrfs_release_path(path);
5924 ret = check_extent_exists(root, new_start,
5927 fprintf(stderr, "Right section didn't "
5931 num_bytes = key.objectid - bytenr;
5934 num_bytes = key.objectid - bytenr;
5941 if (num_bytes && !ret) {
5942 fprintf(stderr, "There are no extents for csum range "
5943 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5947 btrfs_free_path(path);
5951 static int check_csums(struct btrfs_root *root)
5953 struct btrfs_path *path;
5954 struct extent_buffer *leaf;
5955 struct btrfs_key key;
5956 u64 offset = 0, num_bytes = 0;
5957 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5961 unsigned long leaf_offset;
5963 root = root->fs_info->csum_root;
5964 if (!extent_buffer_uptodate(root->node)) {
5965 fprintf(stderr, "No valid csum tree found\n");
5969 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5970 key.type = BTRFS_EXTENT_CSUM_KEY;
5973 path = btrfs_alloc_path();
5977 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5979 fprintf(stderr, "Error searching csum tree %d\n", ret);
5980 btrfs_free_path(path);
5984 if (ret > 0 && path->slots[0])
5989 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5990 ret = btrfs_next_leaf(root, path);
5992 fprintf(stderr, "Error going to next leaf "
5999 leaf = path->nodes[0];
6001 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6002 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6007 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6008 csum_size) * root->sectorsize;
6009 if (!check_data_csum)
6010 goto skip_csum_check;
6011 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6012 ret = check_extent_csums(root, key.offset, data_len,
6018 offset = key.offset;
6019 } else if (key.offset != offset + num_bytes) {
6020 ret = check_extent_exists(root, offset, num_bytes);
6022 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6023 "there is no extent record\n",
6024 offset, offset+num_bytes);
6027 offset = key.offset;
6030 num_bytes += data_len;
6034 btrfs_free_path(path);
6038 static int is_dropped_key(struct btrfs_key *key,
6039 struct btrfs_key *drop_key) {
6040 if (key->objectid < drop_key->objectid)
6042 else if (key->objectid == drop_key->objectid) {
6043 if (key->type < drop_key->type)
6045 else if (key->type == drop_key->type) {
6046 if (key->offset < drop_key->offset)
6054 * Here are the rules for FULL_BACKREF.
6056 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6057 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6059 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6060 * if it happened after the relocation occurred since we'll have dropped the
6061 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6062 * have no real way to know for sure.
6064 * We process the blocks one root at a time, and we start from the lowest root
6065 * objectid and go to the highest. So we can just lookup the owner backref for
6066 * the record and if we don't find it then we know it doesn't exist and we have
6069 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6070 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6071 * be set or not and then we can check later once we've gathered all the refs.
6073 static int calc_extent_flag(struct btrfs_root *root,
6074 struct cache_tree *extent_cache,
6075 struct extent_buffer *buf,
6076 struct root_item_record *ri,
6079 struct extent_record *rec;
6080 struct cache_extent *cache;
6081 struct tree_backref *tback;
6084 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6085 /* we have added this extent before */
6087 rec = container_of(cache, struct extent_record, cache);
6090 * Except file/reloc tree, we can not have
6093 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6098 if (buf->start == ri->bytenr)
6101 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6104 owner = btrfs_header_owner(buf);
6105 if (owner == ri->objectid)
6108 tback = find_tree_backref(rec, 0, owner);
6113 if (rec->flag_block_full_backref != FLAG_UNSET &&
6114 rec->flag_block_full_backref != 0)
6115 rec->bad_full_backref = 1;
6118 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6119 if (rec->flag_block_full_backref != FLAG_UNSET &&
6120 rec->flag_block_full_backref != 1)
6121 rec->bad_full_backref = 1;
6125 static int run_next_block(struct btrfs_root *root,
6126 struct block_info *bits,
6129 struct cache_tree *pending,
6130 struct cache_tree *seen,
6131 struct cache_tree *reada,
6132 struct cache_tree *nodes,
6133 struct cache_tree *extent_cache,
6134 struct cache_tree *chunk_cache,
6135 struct rb_root *dev_cache,
6136 struct block_group_tree *block_group_cache,
6137 struct device_extent_tree *dev_extent_cache,
6138 struct root_item_record *ri)
6140 struct extent_buffer *buf;
6141 struct extent_record *rec = NULL;
6152 struct btrfs_key key;
6153 struct cache_extent *cache;
6156 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6157 bits_nr, &reada_bits);
6162 for(i = 0; i < nritems; i++) {
6163 ret = add_cache_extent(reada, bits[i].start,
6168 /* fixme, get the parent transid */
6169 readahead_tree_block(root, bits[i].start,
6173 *last = bits[0].start;
6174 bytenr = bits[0].start;
6175 size = bits[0].size;
6177 cache = lookup_cache_extent(pending, bytenr, size);
6179 remove_cache_extent(pending, cache);
6182 cache = lookup_cache_extent(reada, bytenr, size);
6184 remove_cache_extent(reada, cache);
6187 cache = lookup_cache_extent(nodes, bytenr, size);
6189 remove_cache_extent(nodes, cache);
6192 cache = lookup_cache_extent(extent_cache, bytenr, size);
6194 rec = container_of(cache, struct extent_record, cache);
6195 gen = rec->parent_generation;
6198 /* fixme, get the real parent transid */
6199 buf = read_tree_block(root, bytenr, size, gen);
6200 if (!extent_buffer_uptodate(buf)) {
6201 record_bad_block_io(root->fs_info,
6202 extent_cache, bytenr, size);
6206 nritems = btrfs_header_nritems(buf);
6209 if (!init_extent_tree) {
6210 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6211 btrfs_header_level(buf), 1, NULL,
6214 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6216 fprintf(stderr, "Couldn't calc extent flags\n");
6217 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6222 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6224 fprintf(stderr, "Couldn't calc extent flags\n");
6225 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6229 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6231 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6232 ri->objectid == btrfs_header_owner(buf)) {
6234 * Ok we got to this block from it's original owner and
6235 * we have FULL_BACKREF set. Relocation can leave
6236 * converted blocks over so this is altogether possible,
6237 * however it's not possible if the generation > the
6238 * last snapshot, so check for this case.
6240 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6241 btrfs_header_generation(buf) > ri->last_snapshot) {
6242 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6243 rec->bad_full_backref = 1;
6248 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6249 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6250 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6251 rec->bad_full_backref = 1;
6255 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6256 rec->flag_block_full_backref = 1;
6260 rec->flag_block_full_backref = 0;
6262 owner = btrfs_header_owner(buf);
6265 ret = check_block(root, extent_cache, buf, flags);
6269 if (btrfs_is_leaf(buf)) {
6270 btree_space_waste += btrfs_leaf_free_space(root, buf);
6271 for (i = 0; i < nritems; i++) {
6272 struct btrfs_file_extent_item *fi;
6273 btrfs_item_key_to_cpu(buf, &key, i);
6274 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6275 process_extent_item(root, extent_cache, buf,
6279 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6280 process_extent_item(root, extent_cache, buf,
6284 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6286 btrfs_item_size_nr(buf, i);
6289 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6290 process_chunk_item(chunk_cache, &key, buf, i);
6293 if (key.type == BTRFS_DEV_ITEM_KEY) {
6294 process_device_item(dev_cache, &key, buf, i);
6297 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6298 process_block_group_item(block_group_cache,
6302 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6303 process_device_extent_item(dev_extent_cache,
6308 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6309 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6310 process_extent_ref_v0(extent_cache, buf, i);
6317 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6318 add_tree_backref(extent_cache, key.objectid, 0,
6322 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6323 add_tree_backref(extent_cache, key.objectid,
6327 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6328 struct btrfs_extent_data_ref *ref;
6329 ref = btrfs_item_ptr(buf, i,
6330 struct btrfs_extent_data_ref);
6331 add_data_backref(extent_cache,
6333 btrfs_extent_data_ref_root(buf, ref),
6334 btrfs_extent_data_ref_objectid(buf,
6336 btrfs_extent_data_ref_offset(buf, ref),
6337 btrfs_extent_data_ref_count(buf, ref),
6338 0, root->sectorsize);
6341 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6342 struct btrfs_shared_data_ref *ref;
6343 ref = btrfs_item_ptr(buf, i,
6344 struct btrfs_shared_data_ref);
6345 add_data_backref(extent_cache,
6346 key.objectid, key.offset, 0, 0, 0,
6347 btrfs_shared_data_ref_count(buf, ref),
6348 0, root->sectorsize);
6351 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6352 struct bad_item *bad;
6354 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6358 bad = malloc(sizeof(struct bad_item));
6361 INIT_LIST_HEAD(&bad->list);
6362 memcpy(&bad->key, &key,
6363 sizeof(struct btrfs_key));
6364 bad->root_id = owner;
6365 list_add_tail(&bad->list, &delete_items);
6368 if (key.type != BTRFS_EXTENT_DATA_KEY)
6370 fi = btrfs_item_ptr(buf, i,
6371 struct btrfs_file_extent_item);
6372 if (btrfs_file_extent_type(buf, fi) ==
6373 BTRFS_FILE_EXTENT_INLINE)
6375 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6378 data_bytes_allocated +=
6379 btrfs_file_extent_disk_num_bytes(buf, fi);
6380 if (data_bytes_allocated < root->sectorsize) {
6383 data_bytes_referenced +=
6384 btrfs_file_extent_num_bytes(buf, fi);
6385 add_data_backref(extent_cache,
6386 btrfs_file_extent_disk_bytenr(buf, fi),
6387 parent, owner, key.objectid, key.offset -
6388 btrfs_file_extent_offset(buf, fi), 1, 1,
6389 btrfs_file_extent_disk_num_bytes(buf, fi));
6393 struct btrfs_key first_key;
6395 first_key.objectid = 0;
6398 btrfs_item_key_to_cpu(buf, &first_key, 0);
6399 level = btrfs_header_level(buf);
6400 for (i = 0; i < nritems; i++) {
6401 struct extent_record tmpl;
6403 ptr = btrfs_node_blockptr(buf, i);
6404 size = root->nodesize;
6405 btrfs_node_key_to_cpu(buf, &key, i);
6407 if ((level == ri->drop_level)
6408 && is_dropped_key(&key, &ri->drop_key)) {
6413 memset(&tmpl, 0, sizeof(tmpl));
6414 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6415 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6420 tmpl.max_size = size;
6421 ret = add_extent_rec(extent_cache, &tmpl);
6424 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6427 add_pending(nodes, seen, ptr, size);
6429 add_pending(pending, seen, ptr, size);
6432 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6433 nritems) * sizeof(struct btrfs_key_ptr);
6435 total_btree_bytes += buf->len;
6436 if (fs_root_objectid(btrfs_header_owner(buf)))
6437 total_fs_tree_bytes += buf->len;
6438 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6439 total_extent_tree_bytes += buf->len;
6440 if (!found_old_backref &&
6441 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6442 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6443 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6444 found_old_backref = 1;
6446 free_extent_buffer(buf);
6450 static int add_root_to_pending(struct extent_buffer *buf,
6451 struct cache_tree *extent_cache,
6452 struct cache_tree *pending,
6453 struct cache_tree *seen,
6454 struct cache_tree *nodes,
6457 struct extent_record tmpl;
6459 if (btrfs_header_level(buf) > 0)
6460 add_pending(nodes, seen, buf->start, buf->len);
6462 add_pending(pending, seen, buf->start, buf->len);
6464 memset(&tmpl, 0, sizeof(tmpl));
6465 tmpl.start = buf->start;
6470 tmpl.max_size = buf->len;
6471 add_extent_rec(extent_cache, &tmpl);
6473 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6474 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6475 add_tree_backref(extent_cache, buf->start, buf->start,
6478 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6482 /* as we fix the tree, we might be deleting blocks that
6483 * we're tracking for repair. This hook makes sure we
6484 * remove any backrefs for blocks as we are fixing them.
6486 static int free_extent_hook(struct btrfs_trans_handle *trans,
6487 struct btrfs_root *root,
6488 u64 bytenr, u64 num_bytes, u64 parent,
6489 u64 root_objectid, u64 owner, u64 offset,
6492 struct extent_record *rec;
6493 struct cache_extent *cache;
6495 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6497 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6498 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6502 rec = container_of(cache, struct extent_record, cache);
6504 struct data_backref *back;
6505 back = find_data_backref(rec, parent, root_objectid, owner,
6506 offset, 1, bytenr, num_bytes);
6509 if (back->node.found_ref) {
6510 back->found_ref -= refs_to_drop;
6512 rec->refs -= refs_to_drop;
6514 if (back->node.found_extent_tree) {
6515 back->num_refs -= refs_to_drop;
6516 if (rec->extent_item_refs)
6517 rec->extent_item_refs -= refs_to_drop;
6519 if (back->found_ref == 0)
6520 back->node.found_ref = 0;
6521 if (back->num_refs == 0)
6522 back->node.found_extent_tree = 0;
6524 if (!back->node.found_extent_tree && back->node.found_ref) {
6525 rb_erase(&back->node.node, &rec->backref_tree);
6529 struct tree_backref *back;
6530 back = find_tree_backref(rec, parent, root_objectid);
6533 if (back->node.found_ref) {
6536 back->node.found_ref = 0;
6538 if (back->node.found_extent_tree) {
6539 if (rec->extent_item_refs)
6540 rec->extent_item_refs--;
6541 back->node.found_extent_tree = 0;
6543 if (!back->node.found_extent_tree && back->node.found_ref) {
6544 rb_erase(&back->node.node, &rec->backref_tree);
6548 maybe_free_extent_rec(extent_cache, rec);
6553 static int delete_extent_records(struct btrfs_trans_handle *trans,
6554 struct btrfs_root *root,
6555 struct btrfs_path *path,
6556 u64 bytenr, u64 new_len)
6558 struct btrfs_key key;
6559 struct btrfs_key found_key;
6560 struct extent_buffer *leaf;
6565 key.objectid = bytenr;
6567 key.offset = (u64)-1;
6570 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6577 if (path->slots[0] == 0)
6583 leaf = path->nodes[0];
6584 slot = path->slots[0];
6586 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6587 if (found_key.objectid != bytenr)
6590 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6591 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6592 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6593 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6594 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6595 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6596 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6597 btrfs_release_path(path);
6598 if (found_key.type == 0) {
6599 if (found_key.offset == 0)
6601 key.offset = found_key.offset - 1;
6602 key.type = found_key.type;
6604 key.type = found_key.type - 1;
6605 key.offset = (u64)-1;
6609 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6610 found_key.objectid, found_key.type, found_key.offset);
6612 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6615 btrfs_release_path(path);
6617 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6618 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6619 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6620 found_key.offset : root->nodesize;
6622 ret = btrfs_update_block_group(trans, root, bytenr,
6629 btrfs_release_path(path);
6634 * for a single backref, this will allocate a new extent
6635 * and add the backref to it.
6637 static int record_extent(struct btrfs_trans_handle *trans,
6638 struct btrfs_fs_info *info,
6639 struct btrfs_path *path,
6640 struct extent_record *rec,
6641 struct extent_backref *back,
6642 int allocated, u64 flags)
6645 struct btrfs_root *extent_root = info->extent_root;
6646 struct extent_buffer *leaf;
6647 struct btrfs_key ins_key;
6648 struct btrfs_extent_item *ei;
6649 struct tree_backref *tback;
6650 struct data_backref *dback;
6651 struct btrfs_tree_block_info *bi;
6654 rec->max_size = max_t(u64, rec->max_size,
6655 info->extent_root->nodesize);
6658 u32 item_size = sizeof(*ei);
6661 item_size += sizeof(*bi);
6663 ins_key.objectid = rec->start;
6664 ins_key.offset = rec->max_size;
6665 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6667 ret = btrfs_insert_empty_item(trans, extent_root, path,
6668 &ins_key, item_size);
6672 leaf = path->nodes[0];
6673 ei = btrfs_item_ptr(leaf, path->slots[0],
6674 struct btrfs_extent_item);
6676 btrfs_set_extent_refs(leaf, ei, 0);
6677 btrfs_set_extent_generation(leaf, ei, rec->generation);
6679 if (back->is_data) {
6680 btrfs_set_extent_flags(leaf, ei,
6681 BTRFS_EXTENT_FLAG_DATA);
6683 struct btrfs_disk_key copy_key;;
6685 tback = to_tree_backref(back);
6686 bi = (struct btrfs_tree_block_info *)(ei + 1);
6687 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6690 btrfs_set_disk_key_objectid(©_key,
6691 rec->info_objectid);
6692 btrfs_set_disk_key_type(©_key, 0);
6693 btrfs_set_disk_key_offset(©_key, 0);
6695 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6696 btrfs_set_tree_block_key(leaf, bi, ©_key);
6698 btrfs_set_extent_flags(leaf, ei,
6699 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6702 btrfs_mark_buffer_dirty(leaf);
6703 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6704 rec->max_size, 1, 0);
6707 btrfs_release_path(path);
6710 if (back->is_data) {
6714 dback = to_data_backref(back);
6715 if (back->full_backref)
6716 parent = dback->parent;
6720 for (i = 0; i < dback->found_ref; i++) {
6721 /* if parent != 0, we're doing a full backref
6722 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6723 * just makes the backref allocator create a data
6726 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6727 rec->start, rec->max_size,
6731 BTRFS_FIRST_FREE_OBJECTID :
6737 fprintf(stderr, "adding new data backref"
6738 " on %llu %s %llu owner %llu"
6739 " offset %llu found %d\n",
6740 (unsigned long long)rec->start,
6741 back->full_backref ?
6743 back->full_backref ?
6744 (unsigned long long)parent :
6745 (unsigned long long)dback->root,
6746 (unsigned long long)dback->owner,
6747 (unsigned long long)dback->offset,
6752 tback = to_tree_backref(back);
6753 if (back->full_backref)
6754 parent = tback->parent;
6758 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6759 rec->start, rec->max_size,
6760 parent, tback->root, 0, 0);
6761 fprintf(stderr, "adding new tree backref on "
6762 "start %llu len %llu parent %llu root %llu\n",
6763 rec->start, rec->max_size, parent, tback->root);
6766 btrfs_release_path(path);
6770 static struct extent_entry *find_entry(struct list_head *entries,
6771 u64 bytenr, u64 bytes)
6773 struct extent_entry *entry = NULL;
6775 list_for_each_entry(entry, entries, list) {
6776 if (entry->bytenr == bytenr && entry->bytes == bytes)
6783 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6785 struct extent_entry *entry, *best = NULL, *prev = NULL;
6787 list_for_each_entry(entry, entries, list) {
6794 * If there are as many broken entries as entries then we know
6795 * not to trust this particular entry.
6797 if (entry->broken == entry->count)
6801 * If our current entry == best then we can't be sure our best
6802 * is really the best, so we need to keep searching.
6804 if (best && best->count == entry->count) {
6810 /* Prev == entry, not good enough, have to keep searching */
6811 if (!prev->broken && prev->count == entry->count)
6815 best = (prev->count > entry->count) ? prev : entry;
6816 else if (best->count < entry->count)
6824 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6825 struct data_backref *dback, struct extent_entry *entry)
6827 struct btrfs_trans_handle *trans;
6828 struct btrfs_root *root;
6829 struct btrfs_file_extent_item *fi;
6830 struct extent_buffer *leaf;
6831 struct btrfs_key key;
6835 key.objectid = dback->root;
6836 key.type = BTRFS_ROOT_ITEM_KEY;
6837 key.offset = (u64)-1;
6838 root = btrfs_read_fs_root(info, &key);
6840 fprintf(stderr, "Couldn't find root for our ref\n");
6845 * The backref points to the original offset of the extent if it was
6846 * split, so we need to search down to the offset we have and then walk
6847 * forward until we find the backref we're looking for.
6849 key.objectid = dback->owner;
6850 key.type = BTRFS_EXTENT_DATA_KEY;
6851 key.offset = dback->offset;
6852 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6854 fprintf(stderr, "Error looking up ref %d\n", ret);
6859 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6860 ret = btrfs_next_leaf(root, path);
6862 fprintf(stderr, "Couldn't find our ref, next\n");
6866 leaf = path->nodes[0];
6867 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6868 if (key.objectid != dback->owner ||
6869 key.type != BTRFS_EXTENT_DATA_KEY) {
6870 fprintf(stderr, "Couldn't find our ref, search\n");
6873 fi = btrfs_item_ptr(leaf, path->slots[0],
6874 struct btrfs_file_extent_item);
6875 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6876 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6878 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6883 btrfs_release_path(path);
6885 trans = btrfs_start_transaction(root, 1);
6887 return PTR_ERR(trans);
6890 * Ok we have the key of the file extent we want to fix, now we can cow
6891 * down to the thing and fix it.
6893 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6895 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6896 key.objectid, key.type, key.offset, ret);
6900 fprintf(stderr, "Well that's odd, we just found this key "
6901 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6906 leaf = path->nodes[0];
6907 fi = btrfs_item_ptr(leaf, path->slots[0],
6908 struct btrfs_file_extent_item);
6910 if (btrfs_file_extent_compression(leaf, fi) &&
6911 dback->disk_bytenr != entry->bytenr) {
6912 fprintf(stderr, "Ref doesn't match the record start and is "
6913 "compressed, please take a btrfs-image of this file "
6914 "system and send it to a btrfs developer so they can "
6915 "complete this functionality for bytenr %Lu\n",
6916 dback->disk_bytenr);
6921 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6922 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6923 } else if (dback->disk_bytenr > entry->bytenr) {
6924 u64 off_diff, offset;
6926 off_diff = dback->disk_bytenr - entry->bytenr;
6927 offset = btrfs_file_extent_offset(leaf, fi);
6928 if (dback->disk_bytenr + offset +
6929 btrfs_file_extent_num_bytes(leaf, fi) >
6930 entry->bytenr + entry->bytes) {
6931 fprintf(stderr, "Ref is past the entry end, please "
6932 "take a btrfs-image of this file system and "
6933 "send it to a btrfs developer, ref %Lu\n",
6934 dback->disk_bytenr);
6939 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6940 btrfs_set_file_extent_offset(leaf, fi, offset);
6941 } else if (dback->disk_bytenr < entry->bytenr) {
6944 offset = btrfs_file_extent_offset(leaf, fi);
6945 if (dback->disk_bytenr + offset < entry->bytenr) {
6946 fprintf(stderr, "Ref is before the entry start, please"
6947 " take a btrfs-image of this file system and "
6948 "send it to a btrfs developer, ref %Lu\n",
6949 dback->disk_bytenr);
6954 offset += dback->disk_bytenr;
6955 offset -= entry->bytenr;
6956 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6957 btrfs_set_file_extent_offset(leaf, fi, offset);
6960 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6963 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6964 * only do this if we aren't using compression, otherwise it's a
6967 if (!btrfs_file_extent_compression(leaf, fi))
6968 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6970 printf("ram bytes may be wrong?\n");
6971 btrfs_mark_buffer_dirty(leaf);
6973 err = btrfs_commit_transaction(trans, root);
6974 btrfs_release_path(path);
6975 return ret ? ret : err;
6978 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6979 struct extent_record *rec)
6981 struct extent_backref *back, *tmp;
6982 struct data_backref *dback;
6983 struct extent_entry *entry, *best = NULL;
6986 int broken_entries = 0;
6991 * Metadata is easy and the backrefs should always agree on bytenr and
6992 * size, if not we've got bigger issues.
6997 rbtree_postorder_for_each_entry_safe(back, tmp,
6998 &rec->backref_tree, node) {
6999 if (back->full_backref || !back->is_data)
7002 dback = to_data_backref(back);
7005 * We only pay attention to backrefs that we found a real
7008 if (dback->found_ref == 0)
7012 * For now we only catch when the bytes don't match, not the
7013 * bytenr. We can easily do this at the same time, but I want
7014 * to have a fs image to test on before we just add repair
7015 * functionality willy-nilly so we know we won't screw up the
7019 entry = find_entry(&entries, dback->disk_bytenr,
7022 entry = malloc(sizeof(struct extent_entry));
7027 memset(entry, 0, sizeof(*entry));
7028 entry->bytenr = dback->disk_bytenr;
7029 entry->bytes = dback->bytes;
7030 list_add_tail(&entry->list, &entries);
7035 * If we only have on entry we may think the entries agree when
7036 * in reality they don't so we have to do some extra checking.
7038 if (dback->disk_bytenr != rec->start ||
7039 dback->bytes != rec->nr || back->broken)
7050 /* Yay all the backrefs agree, carry on good sir */
7051 if (nr_entries <= 1 && !mismatch)
7054 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7055 "%Lu\n", rec->start);
7058 * First we want to see if the backrefs can agree amongst themselves who
7059 * is right, so figure out which one of the entries has the highest
7062 best = find_most_right_entry(&entries);
7065 * Ok so we may have an even split between what the backrefs think, so
7066 * this is where we use the extent ref to see what it thinks.
7069 entry = find_entry(&entries, rec->start, rec->nr);
7070 if (!entry && (!broken_entries || !rec->found_rec)) {
7071 fprintf(stderr, "Backrefs don't agree with each other "
7072 "and extent record doesn't agree with anybody,"
7073 " so we can't fix bytenr %Lu bytes %Lu\n",
7074 rec->start, rec->nr);
7077 } else if (!entry) {
7079 * Ok our backrefs were broken, we'll assume this is the
7080 * correct value and add an entry for this range.
7082 entry = malloc(sizeof(struct extent_entry));
7087 memset(entry, 0, sizeof(*entry));
7088 entry->bytenr = rec->start;
7089 entry->bytes = rec->nr;
7090 list_add_tail(&entry->list, &entries);
7094 best = find_most_right_entry(&entries);
7096 fprintf(stderr, "Backrefs and extent record evenly "
7097 "split on who is right, this is going to "
7098 "require user input to fix bytenr %Lu bytes "
7099 "%Lu\n", rec->start, rec->nr);
7106 * I don't think this can happen currently as we'll abort() if we catch
7107 * this case higher up, but in case somebody removes that we still can't
7108 * deal with it properly here yet, so just bail out of that's the case.
7110 if (best->bytenr != rec->start) {
7111 fprintf(stderr, "Extent start and backref starts don't match, "
7112 "please use btrfs-image on this file system and send "
7113 "it to a btrfs developer so they can make fsck fix "
7114 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7115 rec->start, rec->nr);
7121 * Ok great we all agreed on an extent record, let's go find the real
7122 * references and fix up the ones that don't match.
7124 rbtree_postorder_for_each_entry_safe(back, tmp,
7125 &rec->backref_tree, node) {
7126 if (back->full_backref || !back->is_data)
7129 dback = to_data_backref(back);
7132 * Still ignoring backrefs that don't have a real ref attached
7135 if (dback->found_ref == 0)
7138 if (dback->bytes == best->bytes &&
7139 dback->disk_bytenr == best->bytenr)
7142 ret = repair_ref(info, path, dback, best);
7148 * Ok we messed with the actual refs, which means we need to drop our
7149 * entire cache and go back and rescan. I know this is a huge pain and
7150 * adds a lot of extra work, but it's the only way to be safe. Once all
7151 * the backrefs agree we may not need to do anything to the extent
7156 while (!list_empty(&entries)) {
7157 entry = list_entry(entries.next, struct extent_entry, list);
7158 list_del_init(&entry->list);
7164 static int process_duplicates(struct btrfs_root *root,
7165 struct cache_tree *extent_cache,
7166 struct extent_record *rec)
7168 struct extent_record *good, *tmp;
7169 struct cache_extent *cache;
7173 * If we found a extent record for this extent then return, or if we
7174 * have more than one duplicate we are likely going to need to delete
7177 if (rec->found_rec || rec->num_duplicates > 1)
7180 /* Shouldn't happen but just in case */
7181 BUG_ON(!rec->num_duplicates);
7184 * So this happens if we end up with a backref that doesn't match the
7185 * actual extent entry. So either the backref is bad or the extent
7186 * entry is bad. Either way we want to have the extent_record actually
7187 * reflect what we found in the extent_tree, so we need to take the
7188 * duplicate out and use that as the extent_record since the only way we
7189 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7191 remove_cache_extent(extent_cache, &rec->cache);
7193 good = to_extent_record(rec->dups.next);
7194 list_del_init(&good->list);
7195 INIT_LIST_HEAD(&good->backrefs);
7196 INIT_LIST_HEAD(&good->dups);
7197 good->cache.start = good->start;
7198 good->cache.size = good->nr;
7199 good->content_checked = 0;
7200 good->owner_ref_checked = 0;
7201 good->num_duplicates = 0;
7202 good->refs = rec->refs;
7203 list_splice_init(&rec->backrefs, &good->backrefs);
7205 cache = lookup_cache_extent(extent_cache, good->start,
7209 tmp = container_of(cache, struct extent_record, cache);
7212 * If we find another overlapping extent and it's found_rec is
7213 * set then it's a duplicate and we need to try and delete
7216 if (tmp->found_rec || tmp->num_duplicates > 0) {
7217 if (list_empty(&good->list))
7218 list_add_tail(&good->list,
7219 &duplicate_extents);
7220 good->num_duplicates += tmp->num_duplicates + 1;
7221 list_splice_init(&tmp->dups, &good->dups);
7222 list_del_init(&tmp->list);
7223 list_add_tail(&tmp->list, &good->dups);
7224 remove_cache_extent(extent_cache, &tmp->cache);
7229 * Ok we have another non extent item backed extent rec, so lets
7230 * just add it to this extent and carry on like we did above.
7232 good->refs += tmp->refs;
7233 list_splice_init(&tmp->backrefs, &good->backrefs);
7234 remove_cache_extent(extent_cache, &tmp->cache);
7237 ret = insert_cache_extent(extent_cache, &good->cache);
7240 return good->num_duplicates ? 0 : 1;
7243 static int delete_duplicate_records(struct btrfs_root *root,
7244 struct extent_record *rec)
7246 struct btrfs_trans_handle *trans;
7247 LIST_HEAD(delete_list);
7248 struct btrfs_path *path;
7249 struct extent_record *tmp, *good, *n;
7252 struct btrfs_key key;
7254 path = btrfs_alloc_path();
7261 /* Find the record that covers all of the duplicates. */
7262 list_for_each_entry(tmp, &rec->dups, list) {
7263 if (good->start < tmp->start)
7265 if (good->nr > tmp->nr)
7268 if (tmp->start + tmp->nr < good->start + good->nr) {
7269 fprintf(stderr, "Ok we have overlapping extents that "
7270 "aren't completely covered by each other, this "
7271 "is going to require more careful thought. "
7272 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7273 tmp->start, tmp->nr, good->start, good->nr);
7280 list_add_tail(&rec->list, &delete_list);
7282 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7285 list_move_tail(&tmp->list, &delete_list);
7288 root = root->fs_info->extent_root;
7289 trans = btrfs_start_transaction(root, 1);
7290 if (IS_ERR(trans)) {
7291 ret = PTR_ERR(trans);
7295 list_for_each_entry(tmp, &delete_list, list) {
7296 if (tmp->found_rec == 0)
7298 key.objectid = tmp->start;
7299 key.type = BTRFS_EXTENT_ITEM_KEY;
7300 key.offset = tmp->nr;
7302 /* Shouldn't happen but just in case */
7303 if (tmp->metadata) {
7304 fprintf(stderr, "Well this shouldn't happen, extent "
7305 "record overlaps but is metadata? "
7306 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7310 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7316 ret = btrfs_del_item(trans, root, path);
7319 btrfs_release_path(path);
7322 err = btrfs_commit_transaction(trans, root);
7326 while (!list_empty(&delete_list)) {
7327 tmp = to_extent_record(delete_list.next);
7328 list_del_init(&tmp->list);
7334 while (!list_empty(&rec->dups)) {
7335 tmp = to_extent_record(rec->dups.next);
7336 list_del_init(&tmp->list);
7340 btrfs_free_path(path);
7342 if (!ret && !nr_del)
7343 rec->num_duplicates = 0;
7345 return ret ? ret : nr_del;
7348 static int find_possible_backrefs(struct btrfs_fs_info *info,
7349 struct btrfs_path *path,
7350 struct cache_tree *extent_cache,
7351 struct extent_record *rec)
7353 struct btrfs_root *root;
7354 struct extent_backref *back, *tmp;
7355 struct data_backref *dback;
7356 struct cache_extent *cache;
7357 struct btrfs_file_extent_item *fi;
7358 struct btrfs_key key;
7362 rbtree_postorder_for_each_entry_safe(back, tmp,
7363 &rec->backref_tree, node) {
7364 /* Don't care about full backrefs (poor unloved backrefs) */
7365 if (back->full_backref || !back->is_data)
7368 dback = to_data_backref(back);
7370 /* We found this one, we don't need to do a lookup */
7371 if (dback->found_ref)
7374 key.objectid = dback->root;
7375 key.type = BTRFS_ROOT_ITEM_KEY;
7376 key.offset = (u64)-1;
7378 root = btrfs_read_fs_root(info, &key);
7380 /* No root, definitely a bad ref, skip */
7381 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7383 /* Other err, exit */
7385 return PTR_ERR(root);
7387 key.objectid = dback->owner;
7388 key.type = BTRFS_EXTENT_DATA_KEY;
7389 key.offset = dback->offset;
7390 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7392 btrfs_release_path(path);
7395 /* Didn't find it, we can carry on */
7400 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7401 struct btrfs_file_extent_item);
7402 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7403 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7404 btrfs_release_path(path);
7405 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7407 struct extent_record *tmp;
7408 tmp = container_of(cache, struct extent_record, cache);
7411 * If we found an extent record for the bytenr for this
7412 * particular backref then we can't add it to our
7413 * current extent record. We only want to add backrefs
7414 * that don't have a corresponding extent item in the
7415 * extent tree since they likely belong to this record
7416 * and we need to fix it if it doesn't match bytenrs.
7422 dback->found_ref += 1;
7423 dback->disk_bytenr = bytenr;
7424 dback->bytes = bytes;
7427 * Set this so the verify backref code knows not to trust the
7428 * values in this backref.
7437 * Record orphan data ref into corresponding root.
7439 * Return 0 if the extent item contains data ref and recorded.
7440 * Return 1 if the extent item contains no useful data ref
7441 * On that case, it may contains only shared_dataref or metadata backref
7442 * or the file extent exists(this should be handled by the extent bytenr
7444 * Return <0 if something goes wrong.
7446 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7447 struct extent_record *rec)
7449 struct btrfs_key key;
7450 struct btrfs_root *dest_root;
7451 struct extent_backref *back, *tmp;
7452 struct data_backref *dback;
7453 struct orphan_data_extent *orphan;
7454 struct btrfs_path *path;
7455 int recorded_data_ref = 0;
7460 path = btrfs_alloc_path();
7463 rbtree_postorder_for_each_entry_safe(back, tmp,
7464 &rec->backref_tree, node) {
7465 if (back->full_backref || !back->is_data ||
7466 !back->found_extent_tree)
7468 dback = to_data_backref(back);
7469 if (dback->found_ref)
7471 key.objectid = dback->root;
7472 key.type = BTRFS_ROOT_ITEM_KEY;
7473 key.offset = (u64)-1;
7475 dest_root = btrfs_read_fs_root(fs_info, &key);
7477 /* For non-exist root we just skip it */
7478 if (IS_ERR(dest_root) || !dest_root)
7481 key.objectid = dback->owner;
7482 key.type = BTRFS_EXTENT_DATA_KEY;
7483 key.offset = dback->offset;
7485 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7487 * For ret < 0, it's OK since the fs-tree may be corrupted,
7488 * we need to record it for inode/file extent rebuild.
7489 * For ret > 0, we record it only for file extent rebuild.
7490 * For ret == 0, the file extent exists but only bytenr
7491 * mismatch, let the original bytenr fix routine to handle,
7497 orphan = malloc(sizeof(*orphan));
7502 INIT_LIST_HEAD(&orphan->list);
7503 orphan->root = dback->root;
7504 orphan->objectid = dback->owner;
7505 orphan->offset = dback->offset;
7506 orphan->disk_bytenr = rec->cache.start;
7507 orphan->disk_len = rec->cache.size;
7508 list_add(&dest_root->orphan_data_extents, &orphan->list);
7509 recorded_data_ref = 1;
7512 btrfs_free_path(path);
7514 return !recorded_data_ref;
7520 * when an incorrect extent item is found, this will delete
7521 * all of the existing entries for it and recreate them
7522 * based on what the tree scan found.
7524 static int fixup_extent_refs(struct btrfs_fs_info *info,
7525 struct cache_tree *extent_cache,
7526 struct extent_record *rec)
7528 struct btrfs_trans_handle *trans = NULL;
7530 struct btrfs_path *path;
7531 struct cache_extent *cache;
7532 struct extent_backref *back, *tmp;
7536 if (rec->flag_block_full_backref)
7537 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7539 path = btrfs_alloc_path();
7543 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7545 * Sometimes the backrefs themselves are so broken they don't
7546 * get attached to any meaningful rec, so first go back and
7547 * check any of our backrefs that we couldn't find and throw
7548 * them into the list if we find the backref so that
7549 * verify_backrefs can figure out what to do.
7551 ret = find_possible_backrefs(info, path, extent_cache, rec);
7556 /* step one, make sure all of the backrefs agree */
7557 ret = verify_backrefs(info, path, rec);
7561 trans = btrfs_start_transaction(info->extent_root, 1);
7562 if (IS_ERR(trans)) {
7563 ret = PTR_ERR(trans);
7567 /* step two, delete all the existing records */
7568 ret = delete_extent_records(trans, info->extent_root, path,
7569 rec->start, rec->max_size);
7574 /* was this block corrupt? If so, don't add references to it */
7575 cache = lookup_cache_extent(info->corrupt_blocks,
7576 rec->start, rec->max_size);
7582 /* step three, recreate all the refs we did find */
7583 rbtree_postorder_for_each_entry_safe(back, tmp,
7584 &rec->backref_tree, node) {
7586 * if we didn't find any references, don't create a
7589 if (!back->found_ref)
7592 rec->bad_full_backref = 0;
7593 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7601 int err = btrfs_commit_transaction(trans, info->extent_root);
7606 btrfs_free_path(path);
7610 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7611 struct extent_record *rec)
7613 struct btrfs_trans_handle *trans;
7614 struct btrfs_root *root = fs_info->extent_root;
7615 struct btrfs_path *path;
7616 struct btrfs_extent_item *ei;
7617 struct btrfs_key key;
7621 key.objectid = rec->start;
7622 if (rec->metadata) {
7623 key.type = BTRFS_METADATA_ITEM_KEY;
7624 key.offset = rec->info_level;
7626 key.type = BTRFS_EXTENT_ITEM_KEY;
7627 key.offset = rec->max_size;
7630 path = btrfs_alloc_path();
7634 trans = btrfs_start_transaction(root, 0);
7635 if (IS_ERR(trans)) {
7636 btrfs_free_path(path);
7637 return PTR_ERR(trans);
7640 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7642 btrfs_free_path(path);
7643 btrfs_commit_transaction(trans, root);
7646 fprintf(stderr, "Didn't find extent for %llu\n",
7647 (unsigned long long)rec->start);
7648 btrfs_free_path(path);
7649 btrfs_commit_transaction(trans, root);
7653 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7654 struct btrfs_extent_item);
7655 flags = btrfs_extent_flags(path->nodes[0], ei);
7656 if (rec->flag_block_full_backref) {
7657 fprintf(stderr, "setting full backref on %llu\n",
7658 (unsigned long long)key.objectid);
7659 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7661 fprintf(stderr, "clearing full backref on %llu\n",
7662 (unsigned long long)key.objectid);
7663 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7665 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7666 btrfs_mark_buffer_dirty(path->nodes[0]);
7667 btrfs_free_path(path);
7668 return btrfs_commit_transaction(trans, root);
7671 /* right now we only prune from the extent allocation tree */
7672 static int prune_one_block(struct btrfs_trans_handle *trans,
7673 struct btrfs_fs_info *info,
7674 struct btrfs_corrupt_block *corrupt)
7677 struct btrfs_path path;
7678 struct extent_buffer *eb;
7682 int level = corrupt->level + 1;
7684 btrfs_init_path(&path);
7686 /* we want to stop at the parent to our busted block */
7687 path.lowest_level = level;
7689 ret = btrfs_search_slot(trans, info->extent_root,
7690 &corrupt->key, &path, -1, 1);
7695 eb = path.nodes[level];
7702 * hopefully the search gave us the block we want to prune,
7703 * lets try that first
7705 slot = path.slots[level];
7706 found = btrfs_node_blockptr(eb, slot);
7707 if (found == corrupt->cache.start)
7710 nritems = btrfs_header_nritems(eb);
7712 /* the search failed, lets scan this node and hope we find it */
7713 for (slot = 0; slot < nritems; slot++) {
7714 found = btrfs_node_blockptr(eb, slot);
7715 if (found == corrupt->cache.start)
7719 * we couldn't find the bad block. TODO, search all the nodes for pointers
7722 if (eb == info->extent_root->node) {
7727 btrfs_release_path(&path);
7732 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7733 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7736 btrfs_release_path(&path);
7740 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7742 struct btrfs_trans_handle *trans = NULL;
7743 struct cache_extent *cache;
7744 struct btrfs_corrupt_block *corrupt;
7747 cache = search_cache_extent(info->corrupt_blocks, 0);
7751 trans = btrfs_start_transaction(info->extent_root, 1);
7753 return PTR_ERR(trans);
7755 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7756 prune_one_block(trans, info, corrupt);
7757 remove_cache_extent(info->corrupt_blocks, cache);
7760 return btrfs_commit_transaction(trans, info->extent_root);
7764 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7766 struct btrfs_block_group_cache *cache;
7771 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7772 &start, &end, EXTENT_DIRTY);
7775 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7781 cache = btrfs_lookup_first_block_group(fs_info, start);
7786 start = cache->key.objectid + cache->key.offset;
7790 static int check_extent_refs(struct btrfs_root *root,
7791 struct cache_tree *extent_cache)
7793 struct extent_record *rec;
7794 struct cache_extent *cache;
7803 * if we're doing a repair, we have to make sure
7804 * we don't allocate from the problem extents.
7805 * In the worst case, this will be all the
7808 cache = search_cache_extent(extent_cache, 0);
7810 rec = container_of(cache, struct extent_record, cache);
7811 set_extent_dirty(root->fs_info->excluded_extents,
7813 rec->start + rec->max_size - 1,
7815 cache = next_cache_extent(cache);
7818 /* pin down all the corrupted blocks too */
7819 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7821 set_extent_dirty(root->fs_info->excluded_extents,
7823 cache->start + cache->size - 1,
7825 cache = next_cache_extent(cache);
7827 prune_corrupt_blocks(root->fs_info);
7828 reset_cached_block_groups(root->fs_info);
7831 reset_cached_block_groups(root->fs_info);
7834 * We need to delete any duplicate entries we find first otherwise we
7835 * could mess up the extent tree when we have backrefs that actually
7836 * belong to a different extent item and not the weird duplicate one.
7838 while (repair && !list_empty(&duplicate_extents)) {
7839 rec = to_extent_record(duplicate_extents.next);
7840 list_del_init(&rec->list);
7842 /* Sometimes we can find a backref before we find an actual
7843 * extent, so we need to process it a little bit to see if there
7844 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7845 * if this is a backref screwup. If we need to delete stuff
7846 * process_duplicates() will return 0, otherwise it will return
7849 if (process_duplicates(root, extent_cache, rec))
7851 ret = delete_duplicate_records(root, rec);
7855 * delete_duplicate_records will return the number of entries
7856 * deleted, so if it's greater than 0 then we know we actually
7857 * did something and we need to remove.
7871 cache = search_cache_extent(extent_cache, 0);
7874 rec = container_of(cache, struct extent_record, cache);
7875 if (rec->num_duplicates) {
7876 fprintf(stderr, "extent item %llu has multiple extent "
7877 "items\n", (unsigned long long)rec->start);
7882 if (rec->refs != rec->extent_item_refs) {
7883 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7884 (unsigned long long)rec->start,
7885 (unsigned long long)rec->nr);
7886 fprintf(stderr, "extent item %llu, found %llu\n",
7887 (unsigned long long)rec->extent_item_refs,
7888 (unsigned long long)rec->refs);
7889 ret = record_orphan_data_extents(root->fs_info, rec);
7896 * we can't use the extent to repair file
7897 * extent, let the fallback method handle it.
7899 if (!fixed && repair) {
7900 ret = fixup_extent_refs(
7911 if (all_backpointers_checked(rec, 1)) {
7912 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7913 (unsigned long long)rec->start,
7914 (unsigned long long)rec->nr);
7916 if (!fixed && !recorded && repair) {
7917 ret = fixup_extent_refs(root->fs_info,
7926 if (!rec->owner_ref_checked) {
7927 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7928 (unsigned long long)rec->start,
7929 (unsigned long long)rec->nr);
7930 if (!fixed && !recorded && repair) {
7931 ret = fixup_extent_refs(root->fs_info,
7940 if (rec->bad_full_backref) {
7941 fprintf(stderr, "bad full backref, on [%llu]\n",
7942 (unsigned long long)rec->start);
7944 ret = fixup_extent_flags(root->fs_info, rec);
7953 * Although it's not a extent ref's problem, we reuse this
7954 * routine for error reporting.
7955 * No repair function yet.
7957 if (rec->crossing_stripes) {
7959 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7960 rec->start, rec->start + rec->max_size);
7965 if (rec->wrong_chunk_type) {
7967 "bad extent [%llu, %llu), type mismatch with chunk\n",
7968 rec->start, rec->start + rec->max_size);
7973 remove_cache_extent(extent_cache, cache);
7974 free_all_extent_backrefs(rec);
7975 if (!init_extent_tree && repair && (!cur_err || fixed))
7976 clear_extent_dirty(root->fs_info->excluded_extents,
7978 rec->start + rec->max_size - 1,
7984 if (ret && ret != -EAGAIN) {
7985 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7988 struct btrfs_trans_handle *trans;
7990 root = root->fs_info->extent_root;
7991 trans = btrfs_start_transaction(root, 1);
7992 if (IS_ERR(trans)) {
7993 ret = PTR_ERR(trans);
7997 btrfs_fix_block_accounting(trans, root);
7998 ret = btrfs_commit_transaction(trans, root);
8003 fprintf(stderr, "repaired damaged extent references\n");
8009 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8013 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8014 stripe_size = length;
8015 stripe_size /= num_stripes;
8016 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8017 stripe_size = length * 2;
8018 stripe_size /= num_stripes;
8019 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8020 stripe_size = length;
8021 stripe_size /= (num_stripes - 1);
8022 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8023 stripe_size = length;
8024 stripe_size /= (num_stripes - 2);
8026 stripe_size = length;
8032 * Check the chunk with its block group/dev list ref:
8033 * Return 0 if all refs seems valid.
8034 * Return 1 if part of refs seems valid, need later check for rebuild ref
8035 * like missing block group and needs to search extent tree to rebuild them.
8036 * Return -1 if essential refs are missing and unable to rebuild.
8038 static int check_chunk_refs(struct chunk_record *chunk_rec,
8039 struct block_group_tree *block_group_cache,
8040 struct device_extent_tree *dev_extent_cache,
8043 struct cache_extent *block_group_item;
8044 struct block_group_record *block_group_rec;
8045 struct cache_extent *dev_extent_item;
8046 struct device_extent_record *dev_extent_rec;
8050 int metadump_v2 = 0;
8054 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8057 if (block_group_item) {
8058 block_group_rec = container_of(block_group_item,
8059 struct block_group_record,
8061 if (chunk_rec->length != block_group_rec->offset ||
8062 chunk_rec->offset != block_group_rec->objectid ||
8064 chunk_rec->type_flags != block_group_rec->flags)) {
8067 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8068 chunk_rec->objectid,
8073 chunk_rec->type_flags,
8074 block_group_rec->objectid,
8075 block_group_rec->type,
8076 block_group_rec->offset,
8077 block_group_rec->offset,
8078 block_group_rec->objectid,
8079 block_group_rec->flags);
8082 list_del_init(&block_group_rec->list);
8083 chunk_rec->bg_rec = block_group_rec;
8088 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8089 chunk_rec->objectid,
8094 chunk_rec->type_flags);
8101 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8102 chunk_rec->num_stripes);
8103 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8104 devid = chunk_rec->stripes[i].devid;
8105 offset = chunk_rec->stripes[i].offset;
8106 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8107 devid, offset, length);
8108 if (dev_extent_item) {
8109 dev_extent_rec = container_of(dev_extent_item,
8110 struct device_extent_record,
8112 if (dev_extent_rec->objectid != devid ||
8113 dev_extent_rec->offset != offset ||
8114 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8115 dev_extent_rec->length != length) {
8118 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8119 chunk_rec->objectid,
8122 chunk_rec->stripes[i].devid,
8123 chunk_rec->stripes[i].offset,
8124 dev_extent_rec->objectid,
8125 dev_extent_rec->offset,
8126 dev_extent_rec->length);
8129 list_move(&dev_extent_rec->chunk_list,
8130 &chunk_rec->dextents);
8135 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8136 chunk_rec->objectid,
8139 chunk_rec->stripes[i].devid,
8140 chunk_rec->stripes[i].offset);
8147 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8148 int check_chunks(struct cache_tree *chunk_cache,
8149 struct block_group_tree *block_group_cache,
8150 struct device_extent_tree *dev_extent_cache,
8151 struct list_head *good, struct list_head *bad,
8152 struct list_head *rebuild, int silent)
8154 struct cache_extent *chunk_item;
8155 struct chunk_record *chunk_rec;
8156 struct block_group_record *bg_rec;
8157 struct device_extent_record *dext_rec;
8161 chunk_item = first_cache_extent(chunk_cache);
8162 while (chunk_item) {
8163 chunk_rec = container_of(chunk_item, struct chunk_record,
8165 err = check_chunk_refs(chunk_rec, block_group_cache,
8166 dev_extent_cache, silent);
8169 if (err == 0 && good)
8170 list_add_tail(&chunk_rec->list, good);
8171 if (err > 0 && rebuild)
8172 list_add_tail(&chunk_rec->list, rebuild);
8174 list_add_tail(&chunk_rec->list, bad);
8175 chunk_item = next_cache_extent(chunk_item);
8178 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8181 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8189 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8193 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8204 static int check_device_used(struct device_record *dev_rec,
8205 struct device_extent_tree *dext_cache)
8207 struct cache_extent *cache;
8208 struct device_extent_record *dev_extent_rec;
8211 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8213 dev_extent_rec = container_of(cache,
8214 struct device_extent_record,
8216 if (dev_extent_rec->objectid != dev_rec->devid)
8219 list_del_init(&dev_extent_rec->device_list);
8220 total_byte += dev_extent_rec->length;
8221 cache = next_cache_extent(cache);
8224 if (total_byte != dev_rec->byte_used) {
8226 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8227 total_byte, dev_rec->byte_used, dev_rec->objectid,
8228 dev_rec->type, dev_rec->offset);
8235 /* check btrfs_dev_item -> btrfs_dev_extent */
8236 static int check_devices(struct rb_root *dev_cache,
8237 struct device_extent_tree *dev_extent_cache)
8239 struct rb_node *dev_node;
8240 struct device_record *dev_rec;
8241 struct device_extent_record *dext_rec;
8245 dev_node = rb_first(dev_cache);
8247 dev_rec = container_of(dev_node, struct device_record, node);
8248 err = check_device_used(dev_rec, dev_extent_cache);
8252 dev_node = rb_next(dev_node);
8254 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8257 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8258 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8265 static int add_root_item_to_list(struct list_head *head,
8266 u64 objectid, u64 bytenr, u64 last_snapshot,
8267 u8 level, u8 drop_level,
8268 int level_size, struct btrfs_key *drop_key)
8271 struct root_item_record *ri_rec;
8272 ri_rec = malloc(sizeof(*ri_rec));
8275 ri_rec->bytenr = bytenr;
8276 ri_rec->objectid = objectid;
8277 ri_rec->level = level;
8278 ri_rec->level_size = level_size;
8279 ri_rec->drop_level = drop_level;
8280 ri_rec->last_snapshot = last_snapshot;
8282 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8283 list_add_tail(&ri_rec->list, head);
8288 static void free_root_item_list(struct list_head *list)
8290 struct root_item_record *ri_rec;
8292 while (!list_empty(list)) {
8293 ri_rec = list_first_entry(list, struct root_item_record,
8295 list_del_init(&ri_rec->list);
8300 static int deal_root_from_list(struct list_head *list,
8301 struct btrfs_root *root,
8302 struct block_info *bits,
8304 struct cache_tree *pending,
8305 struct cache_tree *seen,
8306 struct cache_tree *reada,
8307 struct cache_tree *nodes,
8308 struct cache_tree *extent_cache,
8309 struct cache_tree *chunk_cache,
8310 struct rb_root *dev_cache,
8311 struct block_group_tree *block_group_cache,
8312 struct device_extent_tree *dev_extent_cache)
8317 while (!list_empty(list)) {
8318 struct root_item_record *rec;
8319 struct extent_buffer *buf;
8320 rec = list_entry(list->next,
8321 struct root_item_record, list);
8323 buf = read_tree_block(root->fs_info->tree_root,
8324 rec->bytenr, rec->level_size, 0);
8325 if (!extent_buffer_uptodate(buf)) {
8326 free_extent_buffer(buf);
8330 add_root_to_pending(buf, extent_cache, pending,
8331 seen, nodes, rec->objectid);
8333 * To rebuild extent tree, we need deal with snapshot
8334 * one by one, otherwise we deal with node firstly which
8335 * can maximize readahead.
8338 ret = run_next_block(root, bits, bits_nr, &last,
8339 pending, seen, reada, nodes,
8340 extent_cache, chunk_cache,
8341 dev_cache, block_group_cache,
8342 dev_extent_cache, rec);
8346 free_extent_buffer(buf);
8347 list_del(&rec->list);
8353 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8354 reada, nodes, extent_cache, chunk_cache,
8355 dev_cache, block_group_cache,
8356 dev_extent_cache, NULL);
8366 static int check_chunks_and_extents(struct btrfs_root *root)
8368 struct rb_root dev_cache;
8369 struct cache_tree chunk_cache;
8370 struct block_group_tree block_group_cache;
8371 struct device_extent_tree dev_extent_cache;
8372 struct cache_tree extent_cache;
8373 struct cache_tree seen;
8374 struct cache_tree pending;
8375 struct cache_tree reada;
8376 struct cache_tree nodes;
8377 struct extent_io_tree excluded_extents;
8378 struct cache_tree corrupt_blocks;
8379 struct btrfs_path path;
8380 struct btrfs_key key;
8381 struct btrfs_key found_key;
8383 struct block_info *bits;
8385 struct extent_buffer *leaf;
8387 struct btrfs_root_item ri;
8388 struct list_head dropping_trees;
8389 struct list_head normal_trees;
8390 struct btrfs_root *root1;
8395 dev_cache = RB_ROOT;
8396 cache_tree_init(&chunk_cache);
8397 block_group_tree_init(&block_group_cache);
8398 device_extent_tree_init(&dev_extent_cache);
8400 cache_tree_init(&extent_cache);
8401 cache_tree_init(&seen);
8402 cache_tree_init(&pending);
8403 cache_tree_init(&nodes);
8404 cache_tree_init(&reada);
8405 cache_tree_init(&corrupt_blocks);
8406 extent_io_tree_init(&excluded_extents);
8407 INIT_LIST_HEAD(&dropping_trees);
8408 INIT_LIST_HEAD(&normal_trees);
8411 root->fs_info->excluded_extents = &excluded_extents;
8412 root->fs_info->fsck_extent_cache = &extent_cache;
8413 root->fs_info->free_extent_hook = free_extent_hook;
8414 root->fs_info->corrupt_blocks = &corrupt_blocks;
8418 bits = malloc(bits_nr * sizeof(struct block_info));
8424 if (ctx.progress_enabled) {
8425 ctx.tp = TASK_EXTENTS;
8426 task_start(ctx.info);
8430 root1 = root->fs_info->tree_root;
8431 level = btrfs_header_level(root1->node);
8432 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8433 root1->node->start, 0, level, 0,
8434 root1->nodesize, NULL);
8437 root1 = root->fs_info->chunk_root;
8438 level = btrfs_header_level(root1->node);
8439 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8440 root1->node->start, 0, level, 0,
8441 root1->nodesize, NULL);
8444 btrfs_init_path(&path);
8447 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8448 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8453 leaf = path.nodes[0];
8454 slot = path.slots[0];
8455 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8456 ret = btrfs_next_leaf(root, &path);
8459 leaf = path.nodes[0];
8460 slot = path.slots[0];
8462 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8463 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8464 unsigned long offset;
8467 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8468 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8469 last_snapshot = btrfs_root_last_snapshot(&ri);
8470 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8471 level = btrfs_root_level(&ri);
8472 level_size = root->nodesize;
8473 ret = add_root_item_to_list(&normal_trees,
8475 btrfs_root_bytenr(&ri),
8476 last_snapshot, level,
8477 0, level_size, NULL);
8481 level = btrfs_root_level(&ri);
8482 level_size = root->nodesize;
8483 objectid = found_key.objectid;
8484 btrfs_disk_key_to_cpu(&found_key,
8486 ret = add_root_item_to_list(&dropping_trees,
8488 btrfs_root_bytenr(&ri),
8489 last_snapshot, level,
8491 level_size, &found_key);
8498 btrfs_release_path(&path);
8501 * check_block can return -EAGAIN if it fixes something, please keep
8502 * this in mind when dealing with return values from these functions, if
8503 * we get -EAGAIN we want to fall through and restart the loop.
8505 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8506 &seen, &reada, &nodes, &extent_cache,
8507 &chunk_cache, &dev_cache, &block_group_cache,
8514 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8515 &pending, &seen, &reada, &nodes,
8516 &extent_cache, &chunk_cache, &dev_cache,
8517 &block_group_cache, &dev_extent_cache);
8524 ret = check_chunks(&chunk_cache, &block_group_cache,
8525 &dev_extent_cache, NULL, NULL, NULL, 0);
8532 ret = check_extent_refs(root, &extent_cache);
8539 ret = check_devices(&dev_cache, &dev_extent_cache);
8544 task_stop(ctx.info);
8546 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8547 extent_io_tree_cleanup(&excluded_extents);
8548 root->fs_info->fsck_extent_cache = NULL;
8549 root->fs_info->free_extent_hook = NULL;
8550 root->fs_info->corrupt_blocks = NULL;
8551 root->fs_info->excluded_extents = NULL;
8554 free_chunk_cache_tree(&chunk_cache);
8555 free_device_cache_tree(&dev_cache);
8556 free_block_group_tree(&block_group_cache);
8557 free_device_extent_tree(&dev_extent_cache);
8558 free_extent_cache_tree(&seen);
8559 free_extent_cache_tree(&pending);
8560 free_extent_cache_tree(&reada);
8561 free_extent_cache_tree(&nodes);
8564 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8565 free_extent_cache_tree(&seen);
8566 free_extent_cache_tree(&pending);
8567 free_extent_cache_tree(&reada);
8568 free_extent_cache_tree(&nodes);
8569 free_chunk_cache_tree(&chunk_cache);
8570 free_block_group_tree(&block_group_cache);
8571 free_device_cache_tree(&dev_cache);
8572 free_device_extent_tree(&dev_extent_cache);
8573 free_extent_record_cache(root->fs_info, &extent_cache);
8574 free_root_item_list(&normal_trees);
8575 free_root_item_list(&dropping_trees);
8576 extent_io_tree_cleanup(&excluded_extents);
8581 * Check backrefs of a tree block given by @bytenr or @eb.
8583 * @root: the root containing the @bytenr or @eb
8584 * @eb: tree block extent buffer, can be NULL
8585 * @bytenr: bytenr of the tree block to search
8586 * @level: tree level of the tree block
8587 * @owner: owner of the tree block
8589 * Return >0 for any error found and output error message
8590 * Return 0 for no error found
8592 static int check_tree_block_ref(struct btrfs_root *root,
8593 struct extent_buffer *eb, u64 bytenr,
8594 int level, u64 owner)
8596 struct btrfs_key key;
8597 struct btrfs_root *extent_root = root->fs_info->extent_root;
8598 struct btrfs_path path;
8599 struct btrfs_extent_item *ei;
8600 struct btrfs_extent_inline_ref *iref;
8601 struct extent_buffer *leaf;
8607 u32 nodesize = root->nodesize;
8614 btrfs_init_path(&path);
8615 key.objectid = bytenr;
8616 if (btrfs_fs_incompat(root->fs_info,
8617 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8618 key.type = BTRFS_METADATA_ITEM_KEY;
8620 key.type = BTRFS_EXTENT_ITEM_KEY;
8621 key.offset = (u64)-1;
8623 /* Search for the backref in extent tree */
8624 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8626 err |= BACKREF_MISSING;
8629 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8631 err |= BACKREF_MISSING;
8635 leaf = path.nodes[0];
8636 slot = path.slots[0];
8637 btrfs_item_key_to_cpu(leaf, &key, slot);
8639 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8641 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8642 skinny_level = (int)key.offset;
8643 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8645 struct btrfs_tree_block_info *info;
8647 info = (struct btrfs_tree_block_info *)(ei + 1);
8648 skinny_level = btrfs_tree_block_level(leaf, info);
8649 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8656 if (!(btrfs_extent_flags(leaf, ei) &
8657 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8659 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8660 key.objectid, nodesize,
8661 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8662 err = BACKREF_MISMATCH;
8664 header_gen = btrfs_header_generation(eb);
8665 extent_gen = btrfs_extent_generation(leaf, ei);
8666 if (header_gen != extent_gen) {
8668 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8669 key.objectid, nodesize, header_gen,
8671 err = BACKREF_MISMATCH;
8673 if (level != skinny_level) {
8675 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8676 key.objectid, nodesize, level, skinny_level);
8677 err = BACKREF_MISMATCH;
8679 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8681 "extent[%llu %u] is referred by other roots than %llu",
8682 key.objectid, nodesize, root->objectid);
8683 err = BACKREF_MISMATCH;
8688 * Iterate the extent/metadata item to find the exact backref
8690 item_size = btrfs_item_size_nr(leaf, slot);
8691 ptr = (unsigned long)iref;
8692 end = (unsigned long)ei + item_size;
8694 iref = (struct btrfs_extent_inline_ref *)ptr;
8695 type = btrfs_extent_inline_ref_type(leaf, iref);
8696 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8698 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8699 (offset == root->objectid || offset == owner)) {
8701 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8702 /* Check if the backref points to valid referencer */
8703 found_ref = !check_tree_block_ref(root, NULL, offset,
8709 ptr += btrfs_extent_inline_ref_size(type);
8713 * Inlined extent item doesn't have what we need, check
8714 * TREE_BLOCK_REF_KEY
8717 btrfs_release_path(&path);
8718 key.objectid = bytenr;
8719 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8720 key.offset = root->objectid;
8722 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8727 err |= BACKREF_MISSING;
8729 btrfs_release_path(&path);
8730 if (eb && (err & BACKREF_MISSING))
8731 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8732 bytenr, nodesize, owner, level);
8737 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8739 * Return >0 any error found and output error message
8740 * Return 0 for no error found
8742 static int check_extent_data_item(struct btrfs_root *root,
8743 struct extent_buffer *eb, int slot)
8745 struct btrfs_file_extent_item *fi;
8746 struct btrfs_path path;
8747 struct btrfs_root *extent_root = root->fs_info->extent_root;
8748 struct btrfs_key fi_key;
8749 struct btrfs_key dbref_key;
8750 struct extent_buffer *leaf;
8751 struct btrfs_extent_item *ei;
8752 struct btrfs_extent_inline_ref *iref;
8753 struct btrfs_extent_data_ref *dref;
8755 u64 file_extent_gen;
8758 u64 extent_num_bytes;
8766 int found_dbackref = 0;
8770 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8771 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8772 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8774 /* Nothing to check for hole and inline data extents */
8775 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8776 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8779 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8780 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8781 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8783 /* Check unaligned disk_num_bytes and num_bytes */
8784 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8786 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8787 fi_key.objectid, fi_key.offset, disk_num_bytes,
8789 err |= BYTES_UNALIGNED;
8791 data_bytes_allocated += disk_num_bytes;
8793 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8795 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8796 fi_key.objectid, fi_key.offset, extent_num_bytes,
8798 err |= BYTES_UNALIGNED;
8800 data_bytes_referenced += extent_num_bytes;
8802 owner = btrfs_header_owner(eb);
8804 /* Check the extent item of the file extent in extent tree */
8805 btrfs_init_path(&path);
8806 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8807 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8808 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8810 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8812 err |= BACKREF_MISSING;
8816 leaf = path.nodes[0];
8817 slot = path.slots[0];
8818 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8820 extent_flags = btrfs_extent_flags(leaf, ei);
8821 extent_gen = btrfs_extent_generation(leaf, ei);
8823 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8825 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8826 disk_bytenr, disk_num_bytes,
8827 BTRFS_EXTENT_FLAG_DATA);
8828 err |= BACKREF_MISMATCH;
8831 if (file_extent_gen < extent_gen) {
8833 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8834 disk_bytenr, disk_num_bytes, file_extent_gen,
8836 err |= BACKREF_MISMATCH;
8839 /* Check data backref inside that extent item */
8840 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8841 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8842 ptr = (unsigned long)iref;
8843 end = (unsigned long)ei + item_size;
8845 iref = (struct btrfs_extent_inline_ref *)ptr;
8846 type = btrfs_extent_inline_ref_type(leaf, iref);
8847 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8849 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8850 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8851 if (ref_root == owner || ref_root == root->objectid)
8853 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8854 found_dbackref = !check_tree_block_ref(root, NULL,
8855 btrfs_extent_inline_ref_offset(leaf, iref),
8861 ptr += btrfs_extent_inline_ref_size(type);
8864 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8865 if (!found_dbackref) {
8866 btrfs_release_path(&path);
8868 btrfs_init_path(&path);
8869 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8870 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8871 dbref_key.offset = hash_extent_data_ref(root->objectid,
8872 fi_key.objectid, fi_key.offset);
8874 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8875 &dbref_key, &path, 0, 0);
8880 if (!found_dbackref)
8881 err |= BACKREF_MISSING;
8883 btrfs_release_path(&path);
8884 if (err & BACKREF_MISSING) {
8885 error("data extent[%llu %llu] backref lost",
8886 disk_bytenr, disk_num_bytes);
8892 * Get real tree block level for the case like shared block
8893 * Return >= 0 as tree level
8894 * Return <0 for error
8896 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8898 struct extent_buffer *eb;
8899 struct btrfs_path path;
8900 struct btrfs_key key;
8901 struct btrfs_extent_item *ei;
8904 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8909 /* Search extent tree for extent generation and level */
8910 key.objectid = bytenr;
8911 key.type = BTRFS_METADATA_ITEM_KEY;
8912 key.offset = (u64)-1;
8914 btrfs_init_path(&path);
8915 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8918 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8926 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8927 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8928 struct btrfs_extent_item);
8929 flags = btrfs_extent_flags(path.nodes[0], ei);
8930 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8935 /* Get transid for later read_tree_block() check */
8936 transid = btrfs_extent_generation(path.nodes[0], ei);
8938 /* Get backref level as one source */
8939 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8940 backref_level = key.offset;
8942 struct btrfs_tree_block_info *info;
8944 info = (struct btrfs_tree_block_info *)(ei + 1);
8945 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8947 btrfs_release_path(&path);
8949 /* Get level from tree block as an alternative source */
8950 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8951 if (!extent_buffer_uptodate(eb)) {
8952 free_extent_buffer(eb);
8955 header_level = btrfs_header_level(eb);
8956 free_extent_buffer(eb);
8958 if (header_level != backref_level)
8960 return header_level;
8963 btrfs_release_path(&path);
8968 * Check if a tree block backref is valid (points to a valid tree block)
8969 * if level == -1, level will be resolved
8970 * Return >0 for any error found and print error message
8972 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8973 u64 bytenr, int level)
8975 struct btrfs_root *root;
8976 struct btrfs_key key;
8977 struct btrfs_path path;
8978 struct extent_buffer *eb;
8979 struct extent_buffer *node;
8980 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8984 /* Query level for level == -1 special case */
8986 level = query_tree_block_level(fs_info, bytenr);
8988 err |= REFERENCER_MISSING;
8992 key.objectid = root_id;
8993 key.type = BTRFS_ROOT_ITEM_KEY;
8994 key.offset = (u64)-1;
8996 root = btrfs_read_fs_root(fs_info, &key);
8998 err |= REFERENCER_MISSING;
9002 /* Read out the tree block to get item/node key */
9003 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9004 if (!extent_buffer_uptodate(eb)) {
9005 err |= REFERENCER_MISSING;
9006 free_extent_buffer(eb);
9010 /* Empty tree, no need to check key */
9011 if (!btrfs_header_nritems(eb) && !level) {
9012 free_extent_buffer(eb);
9017 btrfs_node_key_to_cpu(eb, &key, 0);
9019 btrfs_item_key_to_cpu(eb, &key, 0);
9021 free_extent_buffer(eb);
9023 btrfs_init_path(&path);
9024 /* Search with the first key, to ensure we can reach it */
9025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9027 err |= REFERENCER_MISSING;
9031 node = path.nodes[level];
9032 if (btrfs_header_bytenr(node) != bytenr) {
9034 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9035 bytenr, nodesize, bytenr,
9036 btrfs_header_bytenr(node));
9037 err |= REFERENCER_MISMATCH;
9039 if (btrfs_header_level(node) != level) {
9041 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9042 bytenr, nodesize, level,
9043 btrfs_header_level(node));
9044 err |= REFERENCER_MISMATCH;
9048 btrfs_release_path(&path);
9050 if (err & REFERENCER_MISSING) {
9052 error("extent [%llu %d] lost referencer (owner: %llu)",
9053 bytenr, nodesize, root_id);
9056 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9057 bytenr, nodesize, root_id, level);
9064 * Check referencer for shared block backref
9065 * If level == -1, this function will resolve the level.
9067 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9068 u64 parent, u64 bytenr, int level)
9070 struct extent_buffer *eb;
9071 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9073 int found_parent = 0;
9076 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9077 if (!extent_buffer_uptodate(eb))
9081 level = query_tree_block_level(fs_info, bytenr);
9085 if (level + 1 != btrfs_header_level(eb))
9088 nr = btrfs_header_nritems(eb);
9089 for (i = 0; i < nr; i++) {
9090 if (bytenr == btrfs_node_blockptr(eb, i)) {
9096 free_extent_buffer(eb);
9097 if (!found_parent) {
9099 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9100 bytenr, nodesize, parent, level);
9101 return REFERENCER_MISSING;
9107 * Check referencer for normal (inlined) data ref
9108 * If len == 0, it will be resolved by searching in extent tree
9110 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9111 u64 root_id, u64 objectid, u64 offset,
9112 u64 bytenr, u64 len, u32 count)
9114 struct btrfs_root *root;
9115 struct btrfs_root *extent_root = fs_info->extent_root;
9116 struct btrfs_key key;
9117 struct btrfs_path path;
9118 struct extent_buffer *leaf;
9119 struct btrfs_file_extent_item *fi;
9120 u32 found_count = 0;
9125 key.objectid = bytenr;
9126 key.type = BTRFS_EXTENT_ITEM_KEY;
9127 key.offset = (u64)-1;
9129 btrfs_init_path(&path);
9130 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9133 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9136 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9137 if (key.objectid != bytenr ||
9138 key.type != BTRFS_EXTENT_ITEM_KEY)
9141 btrfs_release_path(&path);
9143 key.objectid = root_id;
9144 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9145 key.offset = (u64)-1;
9146 btrfs_init_path(&path);
9148 root = btrfs_read_fs_root(fs_info, &key);
9152 key.objectid = objectid;
9153 key.type = BTRFS_EXTENT_DATA_KEY;
9155 * It can be nasty as data backref offset is
9156 * file offset - file extent offset, which is smaller or
9157 * equal to original backref offset. The only special case is
9158 * overflow. So we need to special check and do further search.
9160 key.offset = offset & (1ULL << 63) ? 0 : offset;
9162 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9167 * Search afterwards to get correct one
9168 * NOTE: As we must do a comprehensive check on the data backref to
9169 * make sure the dref count also matches, we must iterate all file
9170 * extents for that inode.
9173 leaf = path.nodes[0];
9174 slot = path.slots[0];
9176 btrfs_item_key_to_cpu(leaf, &key, slot);
9177 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9179 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9181 * Except normal disk bytenr and disk num bytes, we still
9182 * need to do extra check on dbackref offset as
9183 * dbackref offset = file_offset - file_extent_offset
9185 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9186 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9187 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9191 ret = btrfs_next_item(root, &path);
9196 btrfs_release_path(&path);
9197 if (found_count != count) {
9199 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9200 bytenr, len, root_id, objectid, offset, count, found_count);
9201 return REFERENCER_MISSING;
9207 * Check if the referencer of a shared data backref exists
9209 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9210 u64 parent, u64 bytenr)
9212 struct extent_buffer *eb;
9213 struct btrfs_key key;
9214 struct btrfs_file_extent_item *fi;
9215 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9217 int found_parent = 0;
9220 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9221 if (!extent_buffer_uptodate(eb))
9224 nr = btrfs_header_nritems(eb);
9225 for (i = 0; i < nr; i++) {
9226 btrfs_item_key_to_cpu(eb, &key, i);
9227 if (key.type != BTRFS_EXTENT_DATA_KEY)
9230 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9231 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9234 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9241 free_extent_buffer(eb);
9242 if (!found_parent) {
9243 error("shared extent %llu referencer lost (parent: %llu)",
9245 return REFERENCER_MISSING;
9251 * This function will check a given extent item, including its backref and
9252 * itself (like crossing stripe boundary and type)
9254 * Since we don't use extent_record anymore, introduce new error bit
9256 static int check_extent_item(struct btrfs_fs_info *fs_info,
9257 struct extent_buffer *eb, int slot)
9259 struct btrfs_extent_item *ei;
9260 struct btrfs_extent_inline_ref *iref;
9261 struct btrfs_extent_data_ref *dref;
9265 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9266 u32 item_size = btrfs_item_size_nr(eb, slot);
9271 struct btrfs_key key;
9275 btrfs_item_key_to_cpu(eb, &key, slot);
9276 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9277 bytes_used += key.offset;
9279 bytes_used += nodesize;
9281 if (item_size < sizeof(*ei)) {
9283 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9284 * old thing when on disk format is still un-determined.
9285 * No need to care about it anymore
9287 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9291 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9292 flags = btrfs_extent_flags(eb, ei);
9294 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9296 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9297 error("bad metadata [%llu, %llu) crossing stripe boundary",
9298 key.objectid, key.objectid + nodesize);
9299 err |= CROSSING_STRIPE_BOUNDARY;
9302 ptr = (unsigned long)(ei + 1);
9304 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9305 /* Old EXTENT_ITEM metadata */
9306 struct btrfs_tree_block_info *info;
9308 info = (struct btrfs_tree_block_info *)ptr;
9309 level = btrfs_tree_block_level(eb, info);
9310 ptr += sizeof(struct btrfs_tree_block_info);
9312 /* New METADATA_ITEM */
9315 end = (unsigned long)ei + item_size;
9318 err |= ITEM_SIZE_MISMATCH;
9322 /* Now check every backref in this extent item */
9324 iref = (struct btrfs_extent_inline_ref *)ptr;
9325 type = btrfs_extent_inline_ref_type(eb, iref);
9326 offset = btrfs_extent_inline_ref_offset(eb, iref);
9328 case BTRFS_TREE_BLOCK_REF_KEY:
9329 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9333 case BTRFS_SHARED_BLOCK_REF_KEY:
9334 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9338 case BTRFS_EXTENT_DATA_REF_KEY:
9339 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9340 ret = check_extent_data_backref(fs_info,
9341 btrfs_extent_data_ref_root(eb, dref),
9342 btrfs_extent_data_ref_objectid(eb, dref),
9343 btrfs_extent_data_ref_offset(eb, dref),
9344 key.objectid, key.offset,
9345 btrfs_extent_data_ref_count(eb, dref));
9348 case BTRFS_SHARED_DATA_REF_KEY:
9349 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9353 error("extent[%llu %d %llu] has unknown ref type: %d",
9354 key.objectid, key.type, key.offset, type);
9355 err |= UNKNOWN_TYPE;
9359 ptr += btrfs_extent_inline_ref_size(type);
9368 * Check if a dev extent item is referred correctly by its chunk
9370 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9371 struct extent_buffer *eb, int slot)
9373 struct btrfs_root *chunk_root = fs_info->chunk_root;
9374 struct btrfs_dev_extent *ptr;
9375 struct btrfs_path path;
9376 struct btrfs_key chunk_key;
9377 struct btrfs_key devext_key;
9378 struct btrfs_chunk *chunk;
9379 struct extent_buffer *l;
9383 int found_chunk = 0;
9386 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9387 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9388 length = btrfs_dev_extent_length(eb, ptr);
9390 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9391 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9392 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9394 btrfs_init_path(&path);
9395 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9400 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9401 if (btrfs_chunk_length(l, chunk) != length)
9404 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9405 for (i = 0; i < num_stripes; i++) {
9406 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9407 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9409 if (devid == devext_key.objectid &&
9410 offset == devext_key.offset) {
9416 btrfs_release_path(&path);
9419 "device extent[%llu, %llu, %llu] did not find the related chunk",
9420 devext_key.objectid, devext_key.offset, length);
9421 return REFERENCER_MISSING;
9427 * Check if the used space is correct with the dev item
9429 static int check_dev_item(struct btrfs_fs_info *fs_info,
9430 struct extent_buffer *eb, int slot)
9432 struct btrfs_root *dev_root = fs_info->dev_root;
9433 struct btrfs_dev_item *dev_item;
9434 struct btrfs_path path;
9435 struct btrfs_key key;
9436 struct btrfs_dev_extent *ptr;
9442 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9443 dev_id = btrfs_device_id(eb, dev_item);
9444 used = btrfs_device_bytes_used(eb, dev_item);
9446 key.objectid = dev_id;
9447 key.type = BTRFS_DEV_EXTENT_KEY;
9450 btrfs_init_path(&path);
9451 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9453 btrfs_item_key_to_cpu(eb, &key, slot);
9454 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9455 key.objectid, key.type, key.offset);
9456 btrfs_release_path(&path);
9457 return REFERENCER_MISSING;
9460 /* Iterate dev_extents to calculate the used space of a device */
9462 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9464 if (key.objectid > dev_id)
9466 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9469 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9470 struct btrfs_dev_extent);
9471 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9473 ret = btrfs_next_item(dev_root, &path);
9477 btrfs_release_path(&path);
9479 if (used != total) {
9480 btrfs_item_key_to_cpu(eb, &key, slot);
9482 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9483 total, used, BTRFS_ROOT_TREE_OBJECTID,
9484 BTRFS_DEV_EXTENT_KEY, dev_id);
9485 return ACCOUNTING_MISMATCH;
9491 * Check a block group item with its referener (chunk) and its used space
9492 * with extent/metadata item
9494 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9495 struct extent_buffer *eb, int slot)
9497 struct btrfs_root *extent_root = fs_info->extent_root;
9498 struct btrfs_root *chunk_root = fs_info->chunk_root;
9499 struct btrfs_block_group_item *bi;
9500 struct btrfs_block_group_item bg_item;
9501 struct btrfs_path path;
9502 struct btrfs_key bg_key;
9503 struct btrfs_key chunk_key;
9504 struct btrfs_key extent_key;
9505 struct btrfs_chunk *chunk;
9506 struct extent_buffer *leaf;
9507 struct btrfs_extent_item *ei;
9508 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9516 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9517 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9518 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9519 used = btrfs_block_group_used(&bg_item);
9520 bg_flags = btrfs_block_group_flags(&bg_item);
9522 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9523 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9524 chunk_key.offset = bg_key.objectid;
9526 btrfs_init_path(&path);
9527 /* Search for the referencer chunk */
9528 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9531 "block group[%llu %llu] did not find the related chunk item",
9532 bg_key.objectid, bg_key.offset);
9533 err |= REFERENCER_MISSING;
9535 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9536 struct btrfs_chunk);
9537 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9540 "block group[%llu %llu] related chunk item length does not match",
9541 bg_key.objectid, bg_key.offset);
9542 err |= REFERENCER_MISMATCH;
9545 btrfs_release_path(&path);
9547 /* Search from the block group bytenr */
9548 extent_key.objectid = bg_key.objectid;
9549 extent_key.type = 0;
9550 extent_key.offset = 0;
9552 btrfs_init_path(&path);
9553 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9557 /* Iterate extent tree to account used space */
9559 leaf = path.nodes[0];
9560 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9561 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9564 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9565 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9567 if (extent_key.objectid < bg_key.objectid)
9570 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9573 total += extent_key.offset;
9575 ei = btrfs_item_ptr(leaf, path.slots[0],
9576 struct btrfs_extent_item);
9577 flags = btrfs_extent_flags(leaf, ei);
9578 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9579 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9581 "bad extent[%llu, %llu) type mismatch with chunk",
9582 extent_key.objectid,
9583 extent_key.objectid + extent_key.offset);
9584 err |= CHUNK_TYPE_MISMATCH;
9586 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9587 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9588 BTRFS_BLOCK_GROUP_METADATA))) {
9590 "bad extent[%llu, %llu) type mismatch with chunk",
9591 extent_key.objectid,
9592 extent_key.objectid + nodesize);
9593 err |= CHUNK_TYPE_MISMATCH;
9597 ret = btrfs_next_item(extent_root, &path);
9603 btrfs_release_path(&path);
9605 if (total != used) {
9607 "block group[%llu %llu] used %llu but extent items used %llu",
9608 bg_key.objectid, bg_key.offset, used, total);
9609 err |= ACCOUNTING_MISMATCH;
9615 * Check a chunk item.
9616 * Including checking all referred dev_extents and block group
9618 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9619 struct extent_buffer *eb, int slot)
9621 struct btrfs_root *extent_root = fs_info->extent_root;
9622 struct btrfs_root *dev_root = fs_info->dev_root;
9623 struct btrfs_path path;
9624 struct btrfs_key chunk_key;
9625 struct btrfs_key bg_key;
9626 struct btrfs_key devext_key;
9627 struct btrfs_chunk *chunk;
9628 struct extent_buffer *leaf;
9629 struct btrfs_block_group_item *bi;
9630 struct btrfs_block_group_item bg_item;
9631 struct btrfs_dev_extent *ptr;
9632 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9644 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9645 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9646 length = btrfs_chunk_length(eb, chunk);
9647 chunk_end = chunk_key.offset + length;
9648 if (!IS_ALIGNED(length, sectorsize)) {
9649 error("chunk[%llu %llu) not aligned to %u",
9650 chunk_key.offset, chunk_end, sectorsize);
9651 err |= BYTES_UNALIGNED;
9655 type = btrfs_chunk_type(eb, chunk);
9656 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9657 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9658 error("chunk[%llu %llu) has no chunk type",
9659 chunk_key.offset, chunk_end);
9660 err |= UNKNOWN_TYPE;
9662 if (profile && (profile & (profile - 1))) {
9663 error("chunk[%llu %llu) multiple profiles detected: %llx",
9664 chunk_key.offset, chunk_end, profile);
9665 err |= UNKNOWN_TYPE;
9668 bg_key.objectid = chunk_key.offset;
9669 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9670 bg_key.offset = length;
9672 btrfs_init_path(&path);
9673 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9676 "chunk[%llu %llu) did not find the related block group item",
9677 chunk_key.offset, chunk_end);
9678 err |= REFERENCER_MISSING;
9680 leaf = path.nodes[0];
9681 bi = btrfs_item_ptr(leaf, path.slots[0],
9682 struct btrfs_block_group_item);
9683 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9685 if (btrfs_block_group_flags(&bg_item) != type) {
9687 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9688 chunk_key.offset, chunk_end, type,
9689 btrfs_block_group_flags(&bg_item));
9690 err |= REFERENCER_MISSING;
9694 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9695 for (i = 0; i < num_stripes; i++) {
9696 btrfs_release_path(&path);
9697 btrfs_init_path(&path);
9698 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9699 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9700 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9702 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9707 leaf = path.nodes[0];
9708 ptr = btrfs_item_ptr(leaf, path.slots[0],
9709 struct btrfs_dev_extent);
9710 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9711 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9712 if (objectid != chunk_key.objectid ||
9713 offset != chunk_key.offset ||
9714 btrfs_dev_extent_length(leaf, ptr) != length)
9718 err |= BACKREF_MISSING;
9720 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9721 chunk_key.objectid, chunk_end, i);
9724 btrfs_release_path(&path);
9730 * Main entry function to check known items and update related accounting info
9732 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9734 struct btrfs_fs_info *fs_info = root->fs_info;
9735 struct btrfs_key key;
9738 struct btrfs_extent_data_ref *dref;
9743 btrfs_item_key_to_cpu(eb, &key, slot);
9744 type = btrfs_key_type(&key);
9747 case BTRFS_EXTENT_DATA_KEY:
9748 ret = check_extent_data_item(root, eb, slot);
9751 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9752 ret = check_block_group_item(fs_info, eb, slot);
9755 case BTRFS_DEV_ITEM_KEY:
9756 ret = check_dev_item(fs_info, eb, slot);
9759 case BTRFS_CHUNK_ITEM_KEY:
9760 ret = check_chunk_item(fs_info, eb, slot);
9763 case BTRFS_DEV_EXTENT_KEY:
9764 ret = check_dev_extent_item(fs_info, eb, slot);
9767 case BTRFS_EXTENT_ITEM_KEY:
9768 case BTRFS_METADATA_ITEM_KEY:
9769 ret = check_extent_item(fs_info, eb, slot);
9772 case BTRFS_EXTENT_CSUM_KEY:
9773 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9775 case BTRFS_TREE_BLOCK_REF_KEY:
9776 ret = check_tree_block_backref(fs_info, key.offset,
9780 case BTRFS_EXTENT_DATA_REF_KEY:
9781 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9782 ret = check_extent_data_backref(fs_info,
9783 btrfs_extent_data_ref_root(eb, dref),
9784 btrfs_extent_data_ref_objectid(eb, dref),
9785 btrfs_extent_data_ref_offset(eb, dref),
9787 btrfs_extent_data_ref_count(eb, dref));
9790 case BTRFS_SHARED_BLOCK_REF_KEY:
9791 ret = check_shared_block_backref(fs_info, key.offset,
9795 case BTRFS_SHARED_DATA_REF_KEY:
9796 ret = check_shared_data_backref(fs_info, key.offset,
9804 if (++slot < btrfs_header_nritems(eb))
9811 * Helper function for later fs/subvol tree check. To determine if a tree
9812 * block should be checked.
9813 * This function will ensure only the direct referencer with lowest rootid to
9814 * check a fs/subvolume tree block.
9816 * Backref check at extent tree would detect errors like missing subvolume
9817 * tree, so we can do aggressive check to reduce duplicated checks.
9819 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9821 struct btrfs_root *extent_root = root->fs_info->extent_root;
9822 struct btrfs_key key;
9823 struct btrfs_path path;
9824 struct extent_buffer *leaf;
9826 struct btrfs_extent_item *ei;
9832 struct btrfs_extent_inline_ref *iref;
9835 btrfs_init_path(&path);
9836 key.objectid = btrfs_header_bytenr(eb);
9837 key.type = BTRFS_METADATA_ITEM_KEY;
9838 key.offset = (u64)-1;
9841 * Any failure in backref resolving means we can't determine
9842 * whom the tree block belongs to.
9843 * So in that case, we need to check that tree block
9845 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9849 ret = btrfs_previous_extent_item(extent_root, &path,
9850 btrfs_header_bytenr(eb));
9854 leaf = path.nodes[0];
9855 slot = path.slots[0];
9856 btrfs_item_key_to_cpu(leaf, &key, slot);
9857 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9859 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9860 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9862 struct btrfs_tree_block_info *info;
9864 info = (struct btrfs_tree_block_info *)(ei + 1);
9865 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9868 item_size = btrfs_item_size_nr(leaf, slot);
9869 ptr = (unsigned long)iref;
9870 end = (unsigned long)ei + item_size;
9872 iref = (struct btrfs_extent_inline_ref *)ptr;
9873 type = btrfs_extent_inline_ref_type(leaf, iref);
9874 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9877 * We only check the tree block if current root is
9878 * the lowest referencer of it.
9880 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9881 offset < root->objectid) {
9882 btrfs_release_path(&path);
9886 ptr += btrfs_extent_inline_ref_size(type);
9889 * Normally we should also check keyed tree block ref, but that may be
9890 * very time consuming. Inlined ref should already make us skip a lot
9891 * of refs now. So skip search keyed tree block ref.
9895 btrfs_release_path(&path);
9900 * Traversal function for tree block. We will do:
9901 * 1) Skip shared fs/subvolume tree blocks
9902 * 2) Update related bytes accounting
9903 * 3) Pre-order traversal
9905 static int traverse_tree_block(struct btrfs_root *root,
9906 struct extent_buffer *node)
9908 struct extent_buffer *eb;
9916 * Skip shared fs/subvolume tree block, in that case they will
9917 * be checked by referencer with lowest rootid
9919 if (is_fstree(root->objectid) && !should_check(root, node))
9922 /* Update bytes accounting */
9923 total_btree_bytes += node->len;
9924 if (fs_root_objectid(btrfs_header_owner(node)))
9925 total_fs_tree_bytes += node->len;
9926 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9927 total_extent_tree_bytes += node->len;
9928 if (!found_old_backref &&
9929 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9930 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9931 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9932 found_old_backref = 1;
9934 /* pre-order tranversal, check itself first */
9935 level = btrfs_header_level(node);
9936 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9937 btrfs_header_level(node),
9938 btrfs_header_owner(node));
9942 "check %s failed root %llu bytenr %llu level %d, force continue check",
9943 level ? "node":"leaf", root->objectid,
9944 btrfs_header_bytenr(node), btrfs_header_level(node));
9947 btree_space_waste += btrfs_leaf_free_space(root, node);
9948 ret = check_leaf_items(root, node);
9953 nr = btrfs_header_nritems(node);
9954 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
9955 sizeof(struct btrfs_key_ptr);
9957 /* Then check all its children */
9958 for (i = 0; i < nr; i++) {
9959 u64 blocknr = btrfs_node_blockptr(node, i);
9962 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
9963 * to call the function itself.
9965 eb = read_tree_block(root, blocknr, root->nodesize, 0);
9966 if (extent_buffer_uptodate(eb)) {
9967 ret = traverse_tree_block(root, eb);
9970 free_extent_buffer(eb);
9977 * Low memory usage version check_chunks_and_extents.
9979 static int check_chunks_and_extents_v2(struct btrfs_root *root)
9981 struct btrfs_path path;
9982 struct btrfs_key key;
9983 struct btrfs_root *root1;
9984 struct btrfs_root *cur_root;
9988 root1 = root->fs_info->chunk_root;
9989 ret = traverse_tree_block(root1, root1->node);
9992 root1 = root->fs_info->tree_root;
9993 ret = traverse_tree_block(root1, root1->node);
9996 btrfs_init_path(&path);
9997 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
9999 key.type = BTRFS_ROOT_ITEM_KEY;
10001 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10003 error("cannot find extent treet in tree_root");
10008 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10009 if (key.type != BTRFS_ROOT_ITEM_KEY)
10011 key.offset = (u64)-1;
10013 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10014 if (IS_ERR(cur_root) || !cur_root) {
10015 error("failed to read tree: %lld", key.objectid);
10019 ret = traverse_tree_block(cur_root, cur_root->node);
10023 ret = btrfs_next_item(root1, &path);
10029 btrfs_release_path(&path);
10033 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10034 struct btrfs_root *root, int overwrite)
10036 struct extent_buffer *c;
10037 struct extent_buffer *old = root->node;
10040 struct btrfs_disk_key disk_key = {0,0,0};
10046 extent_buffer_get(c);
10049 c = btrfs_alloc_free_block(trans, root,
10051 root->root_key.objectid,
10052 &disk_key, level, 0, 0);
10055 extent_buffer_get(c);
10059 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10060 btrfs_set_header_level(c, level);
10061 btrfs_set_header_bytenr(c, c->start);
10062 btrfs_set_header_generation(c, trans->transid);
10063 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10064 btrfs_set_header_owner(c, root->root_key.objectid);
10066 write_extent_buffer(c, root->fs_info->fsid,
10067 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10069 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10070 btrfs_header_chunk_tree_uuid(c),
10073 btrfs_mark_buffer_dirty(c);
10075 * this case can happen in the following case:
10077 * 1.overwrite previous root.
10079 * 2.reinit reloc data root, this is because we skip pin
10080 * down reloc data tree before which means we can allocate
10081 * same block bytenr here.
10083 if (old->start == c->start) {
10084 btrfs_set_root_generation(&root->root_item,
10086 root->root_item.level = btrfs_header_level(root->node);
10087 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10088 &root->root_key, &root->root_item);
10090 free_extent_buffer(c);
10094 free_extent_buffer(old);
10096 add_root_to_dirty_list(root);
10100 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10101 struct extent_buffer *eb, int tree_root)
10103 struct extent_buffer *tmp;
10104 struct btrfs_root_item *ri;
10105 struct btrfs_key key;
10108 int level = btrfs_header_level(eb);
10114 * If we have pinned this block before, don't pin it again.
10115 * This can not only avoid forever loop with broken filesystem
10116 * but also give us some speedups.
10118 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10119 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10122 btrfs_pin_extent(fs_info, eb->start, eb->len);
10124 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10125 nritems = btrfs_header_nritems(eb);
10126 for (i = 0; i < nritems; i++) {
10128 btrfs_item_key_to_cpu(eb, &key, i);
10129 if (key.type != BTRFS_ROOT_ITEM_KEY)
10131 /* Skip the extent root and reloc roots */
10132 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10133 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10134 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10136 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10137 bytenr = btrfs_disk_root_bytenr(eb, ri);
10140 * If at any point we start needing the real root we
10141 * will have to build a stump root for the root we are
10142 * in, but for now this doesn't actually use the root so
10143 * just pass in extent_root.
10145 tmp = read_tree_block(fs_info->extent_root, bytenr,
10147 if (!extent_buffer_uptodate(tmp)) {
10148 fprintf(stderr, "Error reading root block\n");
10151 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10152 free_extent_buffer(tmp);
10156 bytenr = btrfs_node_blockptr(eb, i);
10158 /* If we aren't the tree root don't read the block */
10159 if (level == 1 && !tree_root) {
10160 btrfs_pin_extent(fs_info, bytenr, nodesize);
10164 tmp = read_tree_block(fs_info->extent_root, bytenr,
10166 if (!extent_buffer_uptodate(tmp)) {
10167 fprintf(stderr, "Error reading tree block\n");
10170 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10171 free_extent_buffer(tmp);
10180 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10184 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10188 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10191 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10193 struct btrfs_block_group_cache *cache;
10194 struct btrfs_path *path;
10195 struct extent_buffer *leaf;
10196 struct btrfs_chunk *chunk;
10197 struct btrfs_key key;
10201 path = btrfs_alloc_path();
10206 key.type = BTRFS_CHUNK_ITEM_KEY;
10209 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10211 btrfs_free_path(path);
10216 * We do this in case the block groups were screwed up and had alloc
10217 * bits that aren't actually set on the chunks. This happens with
10218 * restored images every time and could happen in real life I guess.
10220 fs_info->avail_data_alloc_bits = 0;
10221 fs_info->avail_metadata_alloc_bits = 0;
10222 fs_info->avail_system_alloc_bits = 0;
10224 /* First we need to create the in-memory block groups */
10226 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10227 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10229 btrfs_free_path(path);
10237 leaf = path->nodes[0];
10238 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10239 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10244 chunk = btrfs_item_ptr(leaf, path->slots[0],
10245 struct btrfs_chunk);
10246 btrfs_add_block_group(fs_info, 0,
10247 btrfs_chunk_type(leaf, chunk),
10248 key.objectid, key.offset,
10249 btrfs_chunk_length(leaf, chunk));
10250 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10251 key.offset + btrfs_chunk_length(leaf, chunk),
10257 cache = btrfs_lookup_first_block_group(fs_info, start);
10261 start = cache->key.objectid + cache->key.offset;
10264 btrfs_free_path(path);
10268 static int reset_balance(struct btrfs_trans_handle *trans,
10269 struct btrfs_fs_info *fs_info)
10271 struct btrfs_root *root = fs_info->tree_root;
10272 struct btrfs_path *path;
10273 struct extent_buffer *leaf;
10274 struct btrfs_key key;
10275 int del_slot, del_nr = 0;
10279 path = btrfs_alloc_path();
10283 key.objectid = BTRFS_BALANCE_OBJECTID;
10284 key.type = BTRFS_BALANCE_ITEM_KEY;
10287 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10292 goto reinit_data_reloc;
10297 ret = btrfs_del_item(trans, root, path);
10300 btrfs_release_path(path);
10302 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10303 key.type = BTRFS_ROOT_ITEM_KEY;
10306 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10310 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10315 ret = btrfs_del_items(trans, root, path,
10322 btrfs_release_path(path);
10325 ret = btrfs_search_slot(trans, root, &key, path,
10332 leaf = path->nodes[0];
10333 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10334 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10336 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10341 del_slot = path->slots[0];
10350 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10354 btrfs_release_path(path);
10357 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10358 key.type = BTRFS_ROOT_ITEM_KEY;
10359 key.offset = (u64)-1;
10360 root = btrfs_read_fs_root(fs_info, &key);
10361 if (IS_ERR(root)) {
10362 fprintf(stderr, "Error reading data reloc tree\n");
10363 ret = PTR_ERR(root);
10366 record_root_in_trans(trans, root);
10367 ret = btrfs_fsck_reinit_root(trans, root, 0);
10370 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10372 btrfs_free_path(path);
10376 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10377 struct btrfs_fs_info *fs_info)
10383 * The only reason we don't do this is because right now we're just
10384 * walking the trees we find and pinning down their bytes, we don't look
10385 * at any of the leaves. In order to do mixed groups we'd have to check
10386 * the leaves of any fs roots and pin down the bytes for any file
10387 * extents we find. Not hard but why do it if we don't have to?
10389 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10390 fprintf(stderr, "We don't support re-initing the extent tree "
10391 "for mixed block groups yet, please notify a btrfs "
10392 "developer you want to do this so they can add this "
10393 "functionality.\n");
10398 * first we need to walk all of the trees except the extent tree and pin
10399 * down the bytes that are in use so we don't overwrite any existing
10402 ret = pin_metadata_blocks(fs_info);
10404 fprintf(stderr, "error pinning down used bytes\n");
10409 * Need to drop all the block groups since we're going to recreate all
10412 btrfs_free_block_groups(fs_info);
10413 ret = reset_block_groups(fs_info);
10415 fprintf(stderr, "error resetting the block groups\n");
10419 /* Ok we can allocate now, reinit the extent root */
10420 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10422 fprintf(stderr, "extent root initialization failed\n");
10424 * When the transaction code is updated we should end the
10425 * transaction, but for now progs only knows about commit so
10426 * just return an error.
10432 * Now we have all the in-memory block groups setup so we can make
10433 * allocations properly, and the metadata we care about is safe since we
10434 * pinned all of it above.
10437 struct btrfs_block_group_cache *cache;
10439 cache = btrfs_lookup_first_block_group(fs_info, start);
10442 start = cache->key.objectid + cache->key.offset;
10443 ret = btrfs_insert_item(trans, fs_info->extent_root,
10444 &cache->key, &cache->item,
10445 sizeof(cache->item));
10447 fprintf(stderr, "Error adding block group\n");
10450 btrfs_extent_post_op(trans, fs_info->extent_root);
10453 ret = reset_balance(trans, fs_info);
10455 fprintf(stderr, "error resetting the pending balance\n");
10460 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10462 struct btrfs_path *path;
10463 struct btrfs_trans_handle *trans;
10464 struct btrfs_key key;
10467 printf("Recowing metadata block %llu\n", eb->start);
10468 key.objectid = btrfs_header_owner(eb);
10469 key.type = BTRFS_ROOT_ITEM_KEY;
10470 key.offset = (u64)-1;
10472 root = btrfs_read_fs_root(root->fs_info, &key);
10473 if (IS_ERR(root)) {
10474 fprintf(stderr, "Couldn't find owner root %llu\n",
10476 return PTR_ERR(root);
10479 path = btrfs_alloc_path();
10483 trans = btrfs_start_transaction(root, 1);
10484 if (IS_ERR(trans)) {
10485 btrfs_free_path(path);
10486 return PTR_ERR(trans);
10489 path->lowest_level = btrfs_header_level(eb);
10490 if (path->lowest_level)
10491 btrfs_node_key_to_cpu(eb, &key, 0);
10493 btrfs_item_key_to_cpu(eb, &key, 0);
10495 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10496 btrfs_commit_transaction(trans, root);
10497 btrfs_free_path(path);
10501 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10503 struct btrfs_path *path;
10504 struct btrfs_trans_handle *trans;
10505 struct btrfs_key key;
10508 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10509 bad->key.type, bad->key.offset);
10510 key.objectid = bad->root_id;
10511 key.type = BTRFS_ROOT_ITEM_KEY;
10512 key.offset = (u64)-1;
10514 root = btrfs_read_fs_root(root->fs_info, &key);
10515 if (IS_ERR(root)) {
10516 fprintf(stderr, "Couldn't find owner root %llu\n",
10518 return PTR_ERR(root);
10521 path = btrfs_alloc_path();
10525 trans = btrfs_start_transaction(root, 1);
10526 if (IS_ERR(trans)) {
10527 btrfs_free_path(path);
10528 return PTR_ERR(trans);
10531 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10537 ret = btrfs_del_item(trans, root, path);
10539 btrfs_commit_transaction(trans, root);
10540 btrfs_free_path(path);
10544 static int zero_log_tree(struct btrfs_root *root)
10546 struct btrfs_trans_handle *trans;
10549 trans = btrfs_start_transaction(root, 1);
10550 if (IS_ERR(trans)) {
10551 ret = PTR_ERR(trans);
10554 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10555 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10556 ret = btrfs_commit_transaction(trans, root);
10560 static int populate_csum(struct btrfs_trans_handle *trans,
10561 struct btrfs_root *csum_root, char *buf, u64 start,
10568 while (offset < len) {
10569 sectorsize = csum_root->sectorsize;
10570 ret = read_extent_data(csum_root, buf, start + offset,
10574 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10575 start + offset, buf, sectorsize);
10578 offset += sectorsize;
10583 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10584 struct btrfs_root *csum_root,
10585 struct btrfs_root *cur_root)
10587 struct btrfs_path *path;
10588 struct btrfs_key key;
10589 struct extent_buffer *node;
10590 struct btrfs_file_extent_item *fi;
10597 path = btrfs_alloc_path();
10600 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10610 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10613 /* Iterate all regular file extents and fill its csum */
10615 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10617 if (key.type != BTRFS_EXTENT_DATA_KEY)
10619 node = path->nodes[0];
10620 slot = path->slots[0];
10621 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10622 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10624 start = btrfs_file_extent_disk_bytenr(node, fi);
10625 len = btrfs_file_extent_disk_num_bytes(node, fi);
10627 ret = populate_csum(trans, csum_root, buf, start, len);
10628 if (ret == -EEXIST)
10634 * TODO: if next leaf is corrupted, jump to nearest next valid
10637 ret = btrfs_next_item(cur_root, path);
10647 btrfs_free_path(path);
10652 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10653 struct btrfs_root *csum_root)
10655 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10656 struct btrfs_path *path;
10657 struct btrfs_root *tree_root = fs_info->tree_root;
10658 struct btrfs_root *cur_root;
10659 struct extent_buffer *node;
10660 struct btrfs_key key;
10664 path = btrfs_alloc_path();
10668 key.objectid = BTRFS_FS_TREE_OBJECTID;
10670 key.type = BTRFS_ROOT_ITEM_KEY;
10672 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10681 node = path->nodes[0];
10682 slot = path->slots[0];
10683 btrfs_item_key_to_cpu(node, &key, slot);
10684 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10686 if (key.type != BTRFS_ROOT_ITEM_KEY)
10688 if (!is_fstree(key.objectid))
10690 key.offset = (u64)-1;
10692 cur_root = btrfs_read_fs_root(fs_info, &key);
10693 if (IS_ERR(cur_root) || !cur_root) {
10694 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10698 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10703 ret = btrfs_next_item(tree_root, path);
10713 btrfs_free_path(path);
10717 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10718 struct btrfs_root *csum_root)
10720 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10721 struct btrfs_path *path;
10722 struct btrfs_extent_item *ei;
10723 struct extent_buffer *leaf;
10725 struct btrfs_key key;
10728 path = btrfs_alloc_path();
10733 key.type = BTRFS_EXTENT_ITEM_KEY;
10736 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10738 btrfs_free_path(path);
10742 buf = malloc(csum_root->sectorsize);
10744 btrfs_free_path(path);
10749 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10750 ret = btrfs_next_leaf(extent_root, path);
10758 leaf = path->nodes[0];
10760 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10761 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10766 ei = btrfs_item_ptr(leaf, path->slots[0],
10767 struct btrfs_extent_item);
10768 if (!(btrfs_extent_flags(leaf, ei) &
10769 BTRFS_EXTENT_FLAG_DATA)) {
10774 ret = populate_csum(trans, csum_root, buf, key.objectid,
10781 btrfs_free_path(path);
10787 * Recalculate the csum and put it into the csum tree.
10789 * Extent tree init will wipe out all the extent info, so in that case, we
10790 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10791 * will use fs/subvol trees to init the csum tree.
10793 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10794 struct btrfs_root *csum_root,
10795 int search_fs_tree)
10797 if (search_fs_tree)
10798 return fill_csum_tree_from_fs(trans, csum_root);
10800 return fill_csum_tree_from_extent(trans, csum_root);
10803 static void free_roots_info_cache(void)
10805 if (!roots_info_cache)
10808 while (!cache_tree_empty(roots_info_cache)) {
10809 struct cache_extent *entry;
10810 struct root_item_info *rii;
10812 entry = first_cache_extent(roots_info_cache);
10815 remove_cache_extent(roots_info_cache, entry);
10816 rii = container_of(entry, struct root_item_info, cache_extent);
10820 free(roots_info_cache);
10821 roots_info_cache = NULL;
10824 static int build_roots_info_cache(struct btrfs_fs_info *info)
10827 struct btrfs_key key;
10828 struct extent_buffer *leaf;
10829 struct btrfs_path *path;
10831 if (!roots_info_cache) {
10832 roots_info_cache = malloc(sizeof(*roots_info_cache));
10833 if (!roots_info_cache)
10835 cache_tree_init(roots_info_cache);
10838 path = btrfs_alloc_path();
10843 key.type = BTRFS_EXTENT_ITEM_KEY;
10846 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10849 leaf = path->nodes[0];
10852 struct btrfs_key found_key;
10853 struct btrfs_extent_item *ei;
10854 struct btrfs_extent_inline_ref *iref;
10855 int slot = path->slots[0];
10860 struct cache_extent *entry;
10861 struct root_item_info *rii;
10863 if (slot >= btrfs_header_nritems(leaf)) {
10864 ret = btrfs_next_leaf(info->extent_root, path);
10871 leaf = path->nodes[0];
10872 slot = path->slots[0];
10875 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10877 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10878 found_key.type != BTRFS_METADATA_ITEM_KEY)
10881 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10882 flags = btrfs_extent_flags(leaf, ei);
10884 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10885 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10888 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10889 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10890 level = found_key.offset;
10892 struct btrfs_tree_block_info *binfo;
10894 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10895 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10896 level = btrfs_tree_block_level(leaf, binfo);
10900 * For a root extent, it must be of the following type and the
10901 * first (and only one) iref in the item.
10903 type = btrfs_extent_inline_ref_type(leaf, iref);
10904 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10907 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10908 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10910 rii = malloc(sizeof(struct root_item_info));
10915 rii->cache_extent.start = root_id;
10916 rii->cache_extent.size = 1;
10917 rii->level = (u8)-1;
10918 entry = &rii->cache_extent;
10919 ret = insert_cache_extent(roots_info_cache, entry);
10922 rii = container_of(entry, struct root_item_info,
10926 ASSERT(rii->cache_extent.start == root_id);
10927 ASSERT(rii->cache_extent.size == 1);
10929 if (level > rii->level || rii->level == (u8)-1) {
10930 rii->level = level;
10931 rii->bytenr = found_key.objectid;
10932 rii->gen = btrfs_extent_generation(leaf, ei);
10933 rii->node_count = 1;
10934 } else if (level == rii->level) {
10942 btrfs_free_path(path);
10947 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10948 struct btrfs_path *path,
10949 const struct btrfs_key *root_key,
10950 const int read_only_mode)
10952 const u64 root_id = root_key->objectid;
10953 struct cache_extent *entry;
10954 struct root_item_info *rii;
10955 struct btrfs_root_item ri;
10956 unsigned long offset;
10958 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10961 "Error: could not find extent items for root %llu\n",
10962 root_key->objectid);
10966 rii = container_of(entry, struct root_item_info, cache_extent);
10967 ASSERT(rii->cache_extent.start == root_id);
10968 ASSERT(rii->cache_extent.size == 1);
10970 if (rii->node_count != 1) {
10972 "Error: could not find btree root extent for root %llu\n",
10977 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10978 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10980 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10981 btrfs_root_level(&ri) != rii->level ||
10982 btrfs_root_generation(&ri) != rii->gen) {
10985 * If we're in repair mode but our caller told us to not update
10986 * the root item, i.e. just check if it needs to be updated, don't
10987 * print this message, since the caller will call us again shortly
10988 * for the same root item without read only mode (the caller will
10989 * open a transaction first).
10991 if (!(read_only_mode && repair))
10993 "%sroot item for root %llu,"
10994 " current bytenr %llu, current gen %llu, current level %u,"
10995 " new bytenr %llu, new gen %llu, new level %u\n",
10996 (read_only_mode ? "" : "fixing "),
10998 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10999 btrfs_root_level(&ri),
11000 rii->bytenr, rii->gen, rii->level);
11002 if (btrfs_root_generation(&ri) > rii->gen) {
11004 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11005 root_id, btrfs_root_generation(&ri), rii->gen);
11009 if (!read_only_mode) {
11010 btrfs_set_root_bytenr(&ri, rii->bytenr);
11011 btrfs_set_root_level(&ri, rii->level);
11012 btrfs_set_root_generation(&ri, rii->gen);
11013 write_extent_buffer(path->nodes[0], &ri,
11014 offset, sizeof(ri));
11024 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11025 * caused read-only snapshots to be corrupted if they were created at a moment
11026 * when the source subvolume/snapshot had orphan items. The issue was that the
11027 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11028 * node instead of the post orphan cleanup root node.
11029 * So this function, and its callees, just detects and fixes those cases. Even
11030 * though the regression was for read-only snapshots, this function applies to
11031 * any snapshot/subvolume root.
11032 * This must be run before any other repair code - not doing it so, makes other
11033 * repair code delete or modify backrefs in the extent tree for example, which
11034 * will result in an inconsistent fs after repairing the root items.
11036 static int repair_root_items(struct btrfs_fs_info *info)
11038 struct btrfs_path *path = NULL;
11039 struct btrfs_key key;
11040 struct extent_buffer *leaf;
11041 struct btrfs_trans_handle *trans = NULL;
11044 int need_trans = 0;
11046 ret = build_roots_info_cache(info);
11050 path = btrfs_alloc_path();
11056 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11057 key.type = BTRFS_ROOT_ITEM_KEY;
11062 * Avoid opening and committing transactions if a leaf doesn't have
11063 * any root items that need to be fixed, so that we avoid rotating
11064 * backup roots unnecessarily.
11067 trans = btrfs_start_transaction(info->tree_root, 1);
11068 if (IS_ERR(trans)) {
11069 ret = PTR_ERR(trans);
11074 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11078 leaf = path->nodes[0];
11081 struct btrfs_key found_key;
11083 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11084 int no_more_keys = find_next_key(path, &key);
11086 btrfs_release_path(path);
11088 ret = btrfs_commit_transaction(trans,
11100 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11102 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11104 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11107 ret = maybe_repair_root_item(info, path, &found_key,
11112 if (!trans && repair) {
11115 btrfs_release_path(path);
11125 free_roots_info_cache();
11126 btrfs_free_path(path);
11128 btrfs_commit_transaction(trans, info->tree_root);
11135 const char * const cmd_check_usage[] = {
11136 "btrfs check [options] <device>",
11137 "Check structural integrity of a filesystem (unmounted).",
11138 "Check structural integrity of an unmounted filesystem. Verify internal",
11139 "trees' consistency and item connectivity. In the repair mode try to",
11140 "fix the problems found. ",
11141 "WARNING: the repair mode is considered dangerous",
11143 "-s|--super <superblock> use this superblock copy",
11144 "-b|--backup use the first valid backup root copy",
11145 "--repair try to repair the filesystem",
11146 "--readonly run in read-only mode (default)",
11147 "--init-csum-tree create a new CRC tree",
11148 "--init-extent-tree create a new extent tree",
11149 "--mode <MODE> select mode, allows to make some memory/IO",
11150 " trade-offs, where MODE is one of:",
11151 " original - read inodes and extents to memory (requires",
11152 " more memory, does less IO)",
11153 " lowmem - try to use less memory but read blocks again",
11155 "--check-data-csum verify checksums of data blocks",
11156 "-Q|--qgroup-report print a report on qgroup consistency",
11157 "-E|--subvol-extents <subvolid>",
11158 " print subvolume extents and sharing state",
11159 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11160 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11161 "-p|--progress indicate progress",
11165 int cmd_check(int argc, char **argv)
11167 struct cache_tree root_cache;
11168 struct btrfs_root *root;
11169 struct btrfs_fs_info *info;
11172 u64 tree_root_bytenr = 0;
11173 u64 chunk_root_bytenr = 0;
11174 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11177 int init_csum_tree = 0;
11179 int qgroup_report = 0;
11180 int qgroups_repaired = 0;
11181 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11185 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11186 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11187 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11189 static const struct option long_options[] = {
11190 { "super", required_argument, NULL, 's' },
11191 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11192 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11193 { "init-csum-tree", no_argument, NULL,
11194 GETOPT_VAL_INIT_CSUM },
11195 { "init-extent-tree", no_argument, NULL,
11196 GETOPT_VAL_INIT_EXTENT },
11197 { "check-data-csum", no_argument, NULL,
11198 GETOPT_VAL_CHECK_CSUM },
11199 { "backup", no_argument, NULL, 'b' },
11200 { "subvol-extents", required_argument, NULL, 'E' },
11201 { "qgroup-report", no_argument, NULL, 'Q' },
11202 { "tree-root", required_argument, NULL, 'r' },
11203 { "chunk-root", required_argument, NULL,
11204 GETOPT_VAL_CHUNK_TREE },
11205 { "progress", no_argument, NULL, 'p' },
11206 { "mode", required_argument, NULL,
11208 { NULL, 0, NULL, 0}
11211 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11215 case 'a': /* ignored */ break;
11217 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11220 num = arg_strtou64(optarg);
11221 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11223 "ERROR: super mirror should be less than: %d\n",
11224 BTRFS_SUPER_MIRROR_MAX);
11227 bytenr = btrfs_sb_offset(((int)num));
11228 printf("using SB copy %llu, bytenr %llu\n", num,
11229 (unsigned long long)bytenr);
11235 subvolid = arg_strtou64(optarg);
11238 tree_root_bytenr = arg_strtou64(optarg);
11240 case GETOPT_VAL_CHUNK_TREE:
11241 chunk_root_bytenr = arg_strtou64(optarg);
11244 ctx.progress_enabled = true;
11248 usage(cmd_check_usage);
11249 case GETOPT_VAL_REPAIR:
11250 printf("enabling repair mode\n");
11252 ctree_flags |= OPEN_CTREE_WRITES;
11254 case GETOPT_VAL_READONLY:
11257 case GETOPT_VAL_INIT_CSUM:
11258 printf("Creating a new CRC tree\n");
11259 init_csum_tree = 1;
11261 ctree_flags |= OPEN_CTREE_WRITES;
11263 case GETOPT_VAL_INIT_EXTENT:
11264 init_extent_tree = 1;
11265 ctree_flags |= (OPEN_CTREE_WRITES |
11266 OPEN_CTREE_NO_BLOCK_GROUPS);
11269 case GETOPT_VAL_CHECK_CSUM:
11270 check_data_csum = 1;
11272 case GETOPT_VAL_MODE:
11273 check_mode = parse_check_mode(optarg);
11274 if (check_mode == CHECK_MODE_UNKNOWN) {
11275 error("unknown mode: %s", optarg);
11282 if (check_argc_exact(argc - optind, 1))
11283 usage(cmd_check_usage);
11285 if (ctx.progress_enabled) {
11286 ctx.tp = TASK_NOTHING;
11287 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11290 /* This check is the only reason for --readonly to exist */
11291 if (readonly && repair) {
11292 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11297 * Not supported yet
11299 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11300 error("Low memory mode doesn't support repair yet");
11305 cache_tree_init(&root_cache);
11307 if((ret = check_mounted(argv[optind])) < 0) {
11308 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11311 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11316 /* only allow partial opening under repair mode */
11318 ctree_flags |= OPEN_CTREE_PARTIAL;
11320 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11321 chunk_root_bytenr, ctree_flags);
11323 fprintf(stderr, "Couldn't open file system\n");
11328 global_info = info;
11329 root = info->fs_root;
11332 * repair mode will force us to commit transaction which
11333 * will make us fail to load log tree when mounting.
11335 if (repair && btrfs_super_log_root(info->super_copy)) {
11336 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11341 ret = zero_log_tree(root);
11343 fprintf(stderr, "fail to zero log tree\n");
11348 uuid_unparse(info->super_copy->fsid, uuidbuf);
11349 if (qgroup_report) {
11350 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11352 ret = qgroup_verify_all(info);
11358 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11359 subvolid, argv[optind], uuidbuf);
11360 ret = print_extent_state(info, subvolid);
11363 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11365 if (!extent_buffer_uptodate(info->tree_root->node) ||
11366 !extent_buffer_uptodate(info->dev_root->node) ||
11367 !extent_buffer_uptodate(info->chunk_root->node)) {
11368 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11373 if (init_extent_tree || init_csum_tree) {
11374 struct btrfs_trans_handle *trans;
11376 trans = btrfs_start_transaction(info->extent_root, 0);
11377 if (IS_ERR(trans)) {
11378 fprintf(stderr, "Error starting transaction\n");
11379 ret = PTR_ERR(trans);
11383 if (init_extent_tree) {
11384 printf("Creating a new extent tree\n");
11385 ret = reinit_extent_tree(trans, info);
11390 if (init_csum_tree) {
11391 fprintf(stderr, "Reinit crc root\n");
11392 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11394 fprintf(stderr, "crc root initialization failed\n");
11399 ret = fill_csum_tree(trans, info->csum_root,
11402 fprintf(stderr, "crc refilling failed\n");
11407 * Ok now we commit and run the normal fsck, which will add
11408 * extent entries for all of the items it finds.
11410 ret = btrfs_commit_transaction(trans, info->extent_root);
11414 if (!extent_buffer_uptodate(info->extent_root->node)) {
11415 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11419 if (!extent_buffer_uptodate(info->csum_root->node)) {
11420 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11425 if (!ctx.progress_enabled)
11426 fprintf(stderr, "checking extents\n");
11427 if (check_mode == CHECK_MODE_LOWMEM)
11428 ret = check_chunks_and_extents_v2(root);
11430 ret = check_chunks_and_extents(root);
11432 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11434 ret = repair_root_items(info);
11438 fprintf(stderr, "Fixed %d roots.\n", ret);
11440 } else if (ret > 0) {
11442 "Found %d roots with an outdated root item.\n",
11445 "Please run a filesystem check with the option --repair to fix them.\n");
11450 if (!ctx.progress_enabled) {
11451 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11452 fprintf(stderr, "checking free space tree\n");
11454 fprintf(stderr, "checking free space cache\n");
11456 ret = check_space_cache(root);
11461 * We used to have to have these hole extents in between our real
11462 * extents so if we don't have this flag set we need to make sure there
11463 * are no gaps in the file extents for inodes, otherwise we can just
11464 * ignore it when this happens.
11466 no_holes = btrfs_fs_incompat(root->fs_info,
11467 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11468 if (!ctx.progress_enabled)
11469 fprintf(stderr, "checking fs roots\n");
11470 ret = check_fs_roots(root, &root_cache);
11474 fprintf(stderr, "checking csums\n");
11475 ret = check_csums(root);
11479 fprintf(stderr, "checking root refs\n");
11480 ret = check_root_refs(root, &root_cache);
11484 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11485 struct extent_buffer *eb;
11487 eb = list_first_entry(&root->fs_info->recow_ebs,
11488 struct extent_buffer, recow);
11489 list_del_init(&eb->recow);
11490 ret = recow_extent_buffer(root, eb);
11495 while (!list_empty(&delete_items)) {
11496 struct bad_item *bad;
11498 bad = list_first_entry(&delete_items, struct bad_item, list);
11499 list_del_init(&bad->list);
11501 ret = delete_bad_item(root, bad);
11505 if (info->quota_enabled) {
11507 fprintf(stderr, "checking quota groups\n");
11508 err = qgroup_verify_all(info);
11512 err = repair_qgroups(info, &qgroups_repaired);
11517 if (!list_empty(&root->fs_info->recow_ebs)) {
11518 fprintf(stderr, "Transid errors in file system\n");
11522 /* Don't override original ret */
11523 if (!ret && qgroups_repaired)
11524 ret = qgroups_repaired;
11526 if (found_old_backref) { /*
11527 * there was a disk format change when mixed
11528 * backref was in testing tree. The old format
11529 * existed about one week.
11531 printf("\n * Found old mixed backref format. "
11532 "The old format is not supported! *"
11533 "\n * Please mount the FS in readonly mode, "
11534 "backup data and re-format the FS. *\n\n");
11537 printf("found %llu bytes used err is %d\n",
11538 (unsigned long long)bytes_used, ret);
11539 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11540 printf("total tree bytes: %llu\n",
11541 (unsigned long long)total_btree_bytes);
11542 printf("total fs tree bytes: %llu\n",
11543 (unsigned long long)total_fs_tree_bytes);
11544 printf("total extent tree bytes: %llu\n",
11545 (unsigned long long)total_extent_tree_bytes);
11546 printf("btree space waste bytes: %llu\n",
11547 (unsigned long long)btree_space_waste);
11548 printf("file data blocks allocated: %llu\n referenced %llu\n",
11549 (unsigned long long)data_bytes_allocated,
11550 (unsigned long long)data_bytes_referenced);
11552 free_qgroup_counts();
11553 free_root_recs_tree(&root_cache);
11557 if (ctx.progress_enabled)
11558 task_deinit(ctx.info);