2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
140 * Much like data_backref, just removed the undetermined members
141 * and change it to use list_head.
142 * During extent scan, it is stored in root->orphan_data_extent.
143 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145 struct orphan_data_extent {
146 struct list_head list;
154 struct tree_backref {
155 struct extent_backref node;
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 return container_of(back, struct tree_backref, node);
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
170 struct extent_record {
171 struct list_head backrefs;
172 struct list_head dups;
173 struct list_head list;
174 struct cache_extent cache;
175 struct btrfs_disk_key parent_key;
180 u64 extent_item_refs;
182 u64 parent_generation;
186 unsigned int flag_block_full_backref:2;
187 unsigned int found_rec:1;
188 unsigned int content_checked:1;
189 unsigned int owner_ref_checked:1;
190 unsigned int is_root:1;
191 unsigned int metadata:1;
192 unsigned int bad_full_backref:1;
193 unsigned int crossing_stripes:1;
194 unsigned int wrong_chunk_type:1;
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 return container_of(entry, struct extent_record, list);
202 struct inode_backref {
203 struct list_head list;
204 unsigned int found_dir_item:1;
205 unsigned int found_dir_index:1;
206 unsigned int found_inode_ref:1;
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 return list_entry(entry, struct inode_backref, list);
221 struct root_item_record {
222 struct list_head list;
228 struct btrfs_key drop_key;
231 #define REF_ERR_NO_DIR_ITEM (1 << 0)
232 #define REF_ERR_NO_DIR_INDEX (1 << 1)
233 #define REF_ERR_NO_INODE_REF (1 << 2)
234 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
235 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
236 #define REF_ERR_DUP_INODE_REF (1 << 5)
237 #define REF_ERR_INDEX_UNMATCH (1 << 6)
238 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
239 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
240 #define REF_ERR_NO_ROOT_REF (1 << 9)
241 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
242 #define REF_ERR_DUP_ROOT_REF (1 << 11)
243 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
245 struct file_extent_hole {
251 struct inode_record {
252 struct list_head backrefs;
253 unsigned int checked:1;
254 unsigned int merging:1;
255 unsigned int found_inode_item:1;
256 unsigned int found_dir_item:1;
257 unsigned int found_file_extent:1;
258 unsigned int found_csum_item:1;
259 unsigned int some_csum_missing:1;
260 unsigned int nodatasum:1;
273 struct rb_root holes;
274 struct list_head orphan_extents;
279 #define I_ERR_NO_INODE_ITEM (1 << 0)
280 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
281 #define I_ERR_DUP_INODE_ITEM (1 << 2)
282 #define I_ERR_DUP_DIR_INDEX (1 << 3)
283 #define I_ERR_ODD_DIR_ITEM (1 << 4)
284 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
285 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
286 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
287 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
288 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
289 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
290 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
291 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
292 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
293 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
295 struct root_backref {
296 struct list_head list;
297 unsigned int found_dir_item:1;
298 unsigned int found_dir_index:1;
299 unsigned int found_back_ref:1;
300 unsigned int found_forward_ref:1;
301 unsigned int reachable:1;
310 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 return list_entry(entry, struct root_backref, list);
316 struct list_head backrefs;
317 struct cache_extent cache;
318 unsigned int found_root_item:1;
324 struct cache_extent cache;
329 struct cache_extent cache;
330 struct cache_tree root_cache;
331 struct cache_tree inode_cache;
332 struct inode_record *current;
341 struct walk_control {
342 struct cache_tree shared;
343 struct shared_node *nodes[BTRFS_MAX_LEVEL];
349 struct btrfs_key key;
351 struct list_head list;
354 struct extent_entry {
359 struct list_head list;
362 struct root_item_info {
363 /* level of the root */
365 /* number of nodes at this level, must be 1 for a root */
369 struct cache_extent cache_extent;
373 * Error bit for low memory mode check.
375 * Currently no caller cares about it yet. Just internal use for error
378 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
379 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
380 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
381 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
382 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
383 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
384 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
385 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
386 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
387 #define CHUNK_TYPE_MISMATCH (1 << 8)
389 static void *print_status_check(void *p)
391 struct task_ctx *priv = p;
392 const char work_indicator[] = { '.', 'o', 'O', 'o' };
394 static char *task_position_string[] = {
396 "checking free space cache",
400 task_period_start(priv->info, 1000 /* 1s */);
402 if (priv->tp == TASK_NOTHING)
406 printf("%s [%c]\r", task_position_string[priv->tp],
407 work_indicator[count % 4]);
410 task_period_wait(priv->info);
415 static int print_status_return(void *p)
423 static enum btrfs_check_mode parse_check_mode(const char *str)
425 if (strcmp(str, "lowmem") == 0)
426 return CHECK_MODE_LOWMEM;
427 if (strcmp(str, "orig") == 0)
428 return CHECK_MODE_ORIGINAL;
429 if (strcmp(str, "original") == 0)
430 return CHECK_MODE_ORIGINAL;
432 return CHECK_MODE_UNKNOWN;
435 /* Compatible function to allow reuse of old codes */
436 static u64 first_extent_gap(struct rb_root *holes)
438 struct file_extent_hole *hole;
440 if (RB_EMPTY_ROOT(holes))
443 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
447 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 struct file_extent_hole *hole1;
450 struct file_extent_hole *hole2;
452 hole1 = rb_entry(node1, struct file_extent_hole, node);
453 hole2 = rb_entry(node2, struct file_extent_hole, node);
455 if (hole1->start > hole2->start)
457 if (hole1->start < hole2->start)
459 /* Now hole1->start == hole2->start */
460 if (hole1->len >= hole2->len)
462 * Hole 1 will be merge center
463 * Same hole will be merged later
466 /* Hole 2 will be merge center */
471 * Add a hole to the record
473 * This will do hole merge for copy_file_extent_holes(),
474 * which will ensure there won't be continuous holes.
476 static int add_file_extent_hole(struct rb_root *holes,
479 struct file_extent_hole *hole;
480 struct file_extent_hole *prev = NULL;
481 struct file_extent_hole *next = NULL;
483 hole = malloc(sizeof(*hole));
488 /* Since compare will not return 0, no -EEXIST will happen */
489 rb_insert(holes, &hole->node, compare_hole);
491 /* simple merge with previous hole */
492 if (rb_prev(&hole->node))
493 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495 if (prev && prev->start + prev->len >= hole->start) {
496 hole->len = hole->start + hole->len - prev->start;
497 hole->start = prev->start;
498 rb_erase(&prev->node, holes);
503 /* iterate merge with next holes */
505 if (!rb_next(&hole->node))
507 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509 if (hole->start + hole->len >= next->start) {
510 if (hole->start + hole->len <= next->start + next->len)
511 hole->len = next->start + next->len -
513 rb_erase(&next->node, holes);
522 static int compare_hole_range(struct rb_node *node, void *data)
524 struct file_extent_hole *hole;
527 hole = (struct file_extent_hole *)data;
530 hole = rb_entry(node, struct file_extent_hole, node);
531 if (start < hole->start)
533 if (start >= hole->start && start < hole->start + hole->len)
539 * Delete a hole in the record
541 * This will do the hole split and is much restrict than add.
543 static int del_file_extent_hole(struct rb_root *holes,
546 struct file_extent_hole *hole;
547 struct file_extent_hole tmp;
552 struct rb_node *node;
559 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 hole = rb_entry(node, struct file_extent_hole, node);
563 if (start + len > hole->start + hole->len)
567 * Now there will be no overlap, delete the hole and re-add the
568 * split(s) if they exists.
570 if (start > hole->start) {
571 prev_start = hole->start;
572 prev_len = start - hole->start;
575 if (hole->start + hole->len > start + len) {
576 next_start = start + len;
577 next_len = hole->start + hole->len - start - len;
580 rb_erase(node, holes);
583 ret = add_file_extent_hole(holes, prev_start, prev_len);
588 ret = add_file_extent_hole(holes, next_start, next_len);
595 static int copy_file_extent_holes(struct rb_root *dst,
598 struct file_extent_hole *hole;
599 struct rb_node *node;
602 node = rb_first(src);
604 hole = rb_entry(node, struct file_extent_hole, node);
605 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 node = rb_next(node);
613 static void free_file_extent_holes(struct rb_root *holes)
615 struct rb_node *node;
616 struct file_extent_hole *hole;
618 node = rb_first(holes);
620 hole = rb_entry(node, struct file_extent_hole, node);
621 rb_erase(node, holes);
623 node = rb_first(holes);
627 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629 static void record_root_in_trans(struct btrfs_trans_handle *trans,
630 struct btrfs_root *root)
632 if (root->last_trans != trans->transid) {
633 root->track_dirty = 1;
634 root->last_trans = trans->transid;
635 root->commit_root = root->node;
636 extent_buffer_get(root->node);
640 static u8 imode_to_type(u32 imode)
643 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
644 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
645 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
646 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
647 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
648 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
649 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
650 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
653 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
657 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 struct device_record *rec1;
660 struct device_record *rec2;
662 rec1 = rb_entry(node1, struct device_record, node);
663 rec2 = rb_entry(node2, struct device_record, node);
664 if (rec1->devid > rec2->devid)
666 else if (rec1->devid < rec2->devid)
672 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 struct inode_record *rec;
675 struct inode_backref *backref;
676 struct inode_backref *orig;
677 struct inode_backref *tmp;
678 struct orphan_data_extent *src_orphan;
679 struct orphan_data_extent *dst_orphan;
684 rec = malloc(sizeof(*rec));
686 return ERR_PTR(-ENOMEM);
687 memcpy(rec, orig_rec, sizeof(*rec));
689 INIT_LIST_HEAD(&rec->backrefs);
690 INIT_LIST_HEAD(&rec->orphan_extents);
691 rec->holes = RB_ROOT;
693 list_for_each_entry(orig, &orig_rec->backrefs, list) {
694 size = sizeof(*orig) + orig->namelen + 1;
695 backref = malloc(size);
700 memcpy(backref, orig, size);
701 list_add_tail(&backref->list, &rec->backrefs);
703 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
704 dst_orphan = malloc(sizeof(*dst_orphan));
709 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
710 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
719 rb = rb_first(&rec->holes);
721 struct file_extent_hole *hole;
723 hole = rb_entry(rb, struct file_extent_hole, node);
729 if (!list_empty(&rec->backrefs))
730 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
731 list_del(&orig->list);
735 if (!list_empty(&rec->orphan_extents))
736 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
737 list_del(&orig->list);
746 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 struct orphan_data_extent *orphan;
751 if (list_empty(orphan_extents))
753 printf("The following data extent is lost in tree %llu:\n",
755 list_for_each_entry(orphan, orphan_extents, list) {
756 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
757 orphan->objectid, orphan->offset, orphan->disk_bytenr,
762 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 u64 root_objectid = root->root_key.objectid;
765 int errors = rec->errors;
769 /* reloc root errors, we print its corresponding fs root objectid*/
770 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
771 root_objectid = root->root_key.offset;
772 fprintf(stderr, "reloc");
774 fprintf(stderr, "root %llu inode %llu errors %x",
775 (unsigned long long) root_objectid,
776 (unsigned long long) rec->ino, rec->errors);
778 if (errors & I_ERR_NO_INODE_ITEM)
779 fprintf(stderr, ", no inode item");
780 if (errors & I_ERR_NO_ORPHAN_ITEM)
781 fprintf(stderr, ", no orphan item");
782 if (errors & I_ERR_DUP_INODE_ITEM)
783 fprintf(stderr, ", dup inode item");
784 if (errors & I_ERR_DUP_DIR_INDEX)
785 fprintf(stderr, ", dup dir index");
786 if (errors & I_ERR_ODD_DIR_ITEM)
787 fprintf(stderr, ", odd dir item");
788 if (errors & I_ERR_ODD_FILE_EXTENT)
789 fprintf(stderr, ", odd file extent");
790 if (errors & I_ERR_BAD_FILE_EXTENT)
791 fprintf(stderr, ", bad file extent");
792 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
793 fprintf(stderr, ", file extent overlap");
794 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
795 fprintf(stderr, ", file extent discount");
796 if (errors & I_ERR_DIR_ISIZE_WRONG)
797 fprintf(stderr, ", dir isize wrong");
798 if (errors & I_ERR_FILE_NBYTES_WRONG)
799 fprintf(stderr, ", nbytes wrong");
800 if (errors & I_ERR_ODD_CSUM_ITEM)
801 fprintf(stderr, ", odd csum item");
802 if (errors & I_ERR_SOME_CSUM_MISSING)
803 fprintf(stderr, ", some csum missing");
804 if (errors & I_ERR_LINK_COUNT_WRONG)
805 fprintf(stderr, ", link count wrong");
806 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
807 fprintf(stderr, ", orphan file extent");
808 fprintf(stderr, "\n");
809 /* Print the orphan extents if needed */
810 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
811 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813 /* Print the holes if needed */
814 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
815 struct file_extent_hole *hole;
816 struct rb_node *node;
819 node = rb_first(&rec->holes);
820 fprintf(stderr, "Found file extent holes:\n");
823 hole = rb_entry(node, struct file_extent_hole, node);
824 fprintf(stderr, "\tstart: %llu, len: %llu\n",
825 hole->start, hole->len);
826 node = rb_next(node);
829 fprintf(stderr, "\tstart: 0, len: %llu\n",
831 root->fs_info->sectorsize));
835 static void print_ref_error(int errors)
837 if (errors & REF_ERR_NO_DIR_ITEM)
838 fprintf(stderr, ", no dir item");
839 if (errors & REF_ERR_NO_DIR_INDEX)
840 fprintf(stderr, ", no dir index");
841 if (errors & REF_ERR_NO_INODE_REF)
842 fprintf(stderr, ", no inode ref");
843 if (errors & REF_ERR_DUP_DIR_ITEM)
844 fprintf(stderr, ", dup dir item");
845 if (errors & REF_ERR_DUP_DIR_INDEX)
846 fprintf(stderr, ", dup dir index");
847 if (errors & REF_ERR_DUP_INODE_REF)
848 fprintf(stderr, ", dup inode ref");
849 if (errors & REF_ERR_INDEX_UNMATCH)
850 fprintf(stderr, ", index mismatch");
851 if (errors & REF_ERR_FILETYPE_UNMATCH)
852 fprintf(stderr, ", filetype mismatch");
853 if (errors & REF_ERR_NAME_TOO_LONG)
854 fprintf(stderr, ", name too long");
855 if (errors & REF_ERR_NO_ROOT_REF)
856 fprintf(stderr, ", no root ref");
857 if (errors & REF_ERR_NO_ROOT_BACKREF)
858 fprintf(stderr, ", no root backref");
859 if (errors & REF_ERR_DUP_ROOT_REF)
860 fprintf(stderr, ", dup root ref");
861 if (errors & REF_ERR_DUP_ROOT_BACKREF)
862 fprintf(stderr, ", dup root backref");
863 fprintf(stderr, "\n");
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869 struct ptr_node *node;
870 struct cache_extent *cache;
871 struct inode_record *rec = NULL;
874 cache = lookup_cache_extent(inode_cache, ino, 1);
876 node = container_of(cache, struct ptr_node, cache);
878 if (mod && rec->refs > 1) {
879 node->data = clone_inode_rec(rec);
880 if (IS_ERR(node->data))
886 rec = calloc(1, sizeof(*rec));
888 return ERR_PTR(-ENOMEM);
890 rec->extent_start = (u64)-1;
892 INIT_LIST_HEAD(&rec->backrefs);
893 INIT_LIST_HEAD(&rec->orphan_extents);
894 rec->holes = RB_ROOT;
896 node = malloc(sizeof(*node));
899 return ERR_PTR(-ENOMEM);
901 node->cache.start = ino;
902 node->cache.size = 1;
905 if (ino == BTRFS_FREE_INO_OBJECTID)
908 ret = insert_cache_extent(inode_cache, &node->cache);
910 return ERR_PTR(-EEXIST);
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 struct orphan_data_extent *orphan;
919 while (!list_empty(orphan_extents)) {
920 orphan = list_entry(orphan_extents->next,
921 struct orphan_data_extent, list);
922 list_del(&orphan->list);
927 static void free_inode_rec(struct inode_record *rec)
929 struct inode_backref *backref;
934 while (!list_empty(&rec->backrefs)) {
935 backref = to_inode_backref(rec->backrefs.next);
936 list_del(&backref->list);
939 free_orphan_data_extents(&rec->orphan_extents);
940 free_file_extent_holes(&rec->holes);
944 static int can_free_inode_rec(struct inode_record *rec)
946 if (!rec->errors && rec->checked && rec->found_inode_item &&
947 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953 struct inode_record *rec)
955 struct cache_extent *cache;
956 struct inode_backref *tmp, *backref;
957 struct ptr_node *node;
960 if (!rec->found_inode_item)
963 filetype = imode_to_type(rec->imode);
964 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965 if (backref->found_dir_item && backref->found_dir_index) {
966 if (backref->filetype != filetype)
967 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968 if (!backref->errors && backref->found_inode_ref &&
969 rec->nlink == rec->found_link) {
970 list_del(&backref->list);
976 if (!rec->checked || rec->merging)
979 if (S_ISDIR(rec->imode)) {
980 if (rec->found_size != rec->isize)
981 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982 if (rec->found_file_extent)
983 rec->errors |= I_ERR_ODD_FILE_EXTENT;
984 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985 if (rec->found_dir_item)
986 rec->errors |= I_ERR_ODD_DIR_ITEM;
987 if (rec->found_size != rec->nbytes)
988 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989 if (rec->nlink > 0 && !no_holes &&
990 (rec->extent_end < rec->isize ||
991 first_extent_gap(&rec->holes) < rec->isize))
992 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996 if (rec->found_csum_item && rec->nodatasum)
997 rec->errors |= I_ERR_ODD_CSUM_ITEM;
998 if (rec->some_csum_missing && !rec->nodatasum)
999 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002 BUG_ON(rec->refs != 1);
1003 if (can_free_inode_rec(rec)) {
1004 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005 node = container_of(cache, struct ptr_node, cache);
1006 BUG_ON(node->data != rec);
1007 remove_cache_extent(inode_cache, &node->cache);
1009 free_inode_rec(rec);
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 struct btrfs_path path;
1016 struct btrfs_key key;
1019 key.objectid = BTRFS_ORPHAN_OBJECTID;
1020 key.type = BTRFS_ORPHAN_ITEM_KEY;
1023 btrfs_init_path(&path);
1024 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025 btrfs_release_path(&path);
1031 static int process_inode_item(struct extent_buffer *eb,
1032 int slot, struct btrfs_key *key,
1033 struct shared_node *active_node)
1035 struct inode_record *rec;
1036 struct btrfs_inode_item *item;
1038 rec = active_node->current;
1039 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040 if (rec->found_inode_item) {
1041 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045 rec->nlink = btrfs_inode_nlink(eb, item);
1046 rec->isize = btrfs_inode_size(eb, item);
1047 rec->nbytes = btrfs_inode_nbytes(eb, item);
1048 rec->imode = btrfs_inode_mode(eb, item);
1049 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->found_inode_item = 1;
1052 if (rec->nlink == 0)
1053 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054 maybe_free_inode_rec(&active_node->inode_cache, rec);
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 int namelen, u64 dir)
1062 struct inode_backref *backref;
1064 list_for_each_entry(backref, &rec->backrefs, list) {
1065 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 if (backref->dir != dir || backref->namelen != namelen)
1069 if (memcmp(name, backref->name, namelen))
1074 backref = malloc(sizeof(*backref) + namelen + 1);
1077 memset(backref, 0, sizeof(*backref));
1079 backref->namelen = namelen;
1080 memcpy(backref->name, name, namelen);
1081 backref->name[namelen] = '\0';
1082 list_add_tail(&backref->list, &rec->backrefs);
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087 u64 ino, u64 dir, u64 index,
1088 const char *name, int namelen,
1089 u8 filetype, u8 itemtype, int errors)
1091 struct inode_record *rec;
1092 struct inode_backref *backref;
1094 rec = get_inode_rec(inode_cache, ino, 1);
1095 BUG_ON(IS_ERR(rec));
1096 backref = get_inode_backref(rec, name, namelen, dir);
1099 backref->errors |= errors;
1100 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101 if (backref->found_dir_index)
1102 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103 if (backref->found_inode_ref && backref->index != index)
1104 backref->errors |= REF_ERR_INDEX_UNMATCH;
1105 if (backref->found_dir_item && backref->filetype != filetype)
1106 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108 backref->index = index;
1109 backref->filetype = filetype;
1110 backref->found_dir_index = 1;
1111 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 if (backref->found_dir_item)
1114 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115 if (backref->found_dir_index && backref->filetype != filetype)
1116 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118 backref->filetype = filetype;
1119 backref->found_dir_item = 1;
1120 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122 if (backref->found_inode_ref)
1123 backref->errors |= REF_ERR_DUP_INODE_REF;
1124 if (backref->found_dir_index && backref->index != index)
1125 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 backref->index = index;
1129 backref->ref_type = itemtype;
1130 backref->found_inode_ref = 1;
1135 maybe_free_inode_rec(inode_cache, rec);
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140 struct cache_tree *dst_cache)
1142 struct inode_backref *backref;
1147 list_for_each_entry(backref, &src->backrefs, list) {
1148 if (backref->found_dir_index) {
1149 add_inode_backref(dst_cache, dst->ino, backref->dir,
1150 backref->index, backref->name,
1151 backref->namelen, backref->filetype,
1152 BTRFS_DIR_INDEX_KEY, backref->errors);
1154 if (backref->found_dir_item) {
1156 add_inode_backref(dst_cache, dst->ino,
1157 backref->dir, 0, backref->name,
1158 backref->namelen, backref->filetype,
1159 BTRFS_DIR_ITEM_KEY, backref->errors);
1161 if (backref->found_inode_ref) {
1162 add_inode_backref(dst_cache, dst->ino,
1163 backref->dir, backref->index,
1164 backref->name, backref->namelen, 0,
1165 backref->ref_type, backref->errors);
1169 if (src->found_dir_item)
1170 dst->found_dir_item = 1;
1171 if (src->found_file_extent)
1172 dst->found_file_extent = 1;
1173 if (src->found_csum_item)
1174 dst->found_csum_item = 1;
1175 if (src->some_csum_missing)
1176 dst->some_csum_missing = 1;
1177 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1183 BUG_ON(src->found_link < dir_count);
1184 dst->found_link += src->found_link - dir_count;
1185 dst->found_size += src->found_size;
1186 if (src->extent_start != (u64)-1) {
1187 if (dst->extent_start == (u64)-1) {
1188 dst->extent_start = src->extent_start;
1189 dst->extent_end = src->extent_end;
1191 if (dst->extent_end > src->extent_start)
1192 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193 else if (dst->extent_end < src->extent_start) {
1194 ret = add_file_extent_hole(&dst->holes,
1196 src->extent_start - dst->extent_end);
1198 if (dst->extent_end < src->extent_end)
1199 dst->extent_end = src->extent_end;
1203 dst->errors |= src->errors;
1204 if (src->found_inode_item) {
1205 if (!dst->found_inode_item) {
1206 dst->nlink = src->nlink;
1207 dst->isize = src->isize;
1208 dst->nbytes = src->nbytes;
1209 dst->imode = src->imode;
1210 dst->nodatasum = src->nodatasum;
1211 dst->found_inode_item = 1;
1213 dst->errors |= I_ERR_DUP_INODE_ITEM;
1221 static int splice_shared_node(struct shared_node *src_node,
1222 struct shared_node *dst_node)
1224 struct cache_extent *cache;
1225 struct ptr_node *node, *ins;
1226 struct cache_tree *src, *dst;
1227 struct inode_record *rec, *conflict;
1228 u64 current_ino = 0;
1232 if (--src_node->refs == 0)
1234 if (src_node->current)
1235 current_ino = src_node->current->ino;
1237 src = &src_node->root_cache;
1238 dst = &dst_node->root_cache;
1240 cache = search_cache_extent(src, 0);
1242 node = container_of(cache, struct ptr_node, cache);
1244 cache = next_cache_extent(cache);
1247 remove_cache_extent(src, &node->cache);
1250 ins = malloc(sizeof(*ins));
1252 ins->cache.start = node->cache.start;
1253 ins->cache.size = node->cache.size;
1257 ret = insert_cache_extent(dst, &ins->cache);
1258 if (ret == -EEXIST) {
1259 conflict = get_inode_rec(dst, rec->ino, 1);
1260 BUG_ON(IS_ERR(conflict));
1261 merge_inode_recs(rec, conflict, dst);
1263 conflict->checked = 1;
1264 if (dst_node->current == conflict)
1265 dst_node->current = NULL;
1267 maybe_free_inode_rec(dst, conflict);
1268 free_inode_rec(rec);
1275 if (src == &src_node->root_cache) {
1276 src = &src_node->inode_cache;
1277 dst = &dst_node->inode_cache;
1281 if (current_ino > 0 && (!dst_node->current ||
1282 current_ino > dst_node->current->ino)) {
1283 if (dst_node->current) {
1284 dst_node->current->checked = 1;
1285 maybe_free_inode_rec(dst, dst_node->current);
1287 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288 BUG_ON(IS_ERR(dst_node->current));
1293 static void free_inode_ptr(struct cache_extent *cache)
1295 struct ptr_node *node;
1296 struct inode_record *rec;
1298 node = container_of(cache, struct ptr_node, cache);
1300 free_inode_rec(rec);
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309 struct cache_extent *cache;
1310 struct shared_node *node;
1312 cache = lookup_cache_extent(shared, bytenr, 1);
1314 node = container_of(cache, struct shared_node, cache);
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 struct shared_node *node;
1325 node = calloc(1, sizeof(*node));
1328 node->cache.start = bytenr;
1329 node->cache.size = 1;
1330 cache_tree_init(&node->root_cache);
1331 cache_tree_init(&node->inode_cache);
1334 ret = insert_cache_extent(shared, &node->cache);
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340 struct walk_control *wc, int level)
1342 struct shared_node *node;
1343 struct shared_node *dest;
1346 if (level == wc->active_node)
1349 BUG_ON(wc->active_node <= level);
1350 node = find_shared_node(&wc->shared, bytenr);
1352 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 node = find_shared_node(&wc->shared, bytenr);
1355 wc->nodes[level] = node;
1356 wc->active_node = level;
1360 if (wc->root_level == wc->active_node &&
1361 btrfs_root_refs(&root->root_item) == 0) {
1362 if (--node->refs == 0) {
1363 free_inode_recs_tree(&node->root_cache);
1364 free_inode_recs_tree(&node->inode_cache);
1365 remove_cache_extent(&wc->shared, &node->cache);
1371 dest = wc->nodes[wc->active_node];
1372 splice_shared_node(node, dest);
1373 if (node->refs == 0) {
1374 remove_cache_extent(&wc->shared, &node->cache);
1380 static int leave_shared_node(struct btrfs_root *root,
1381 struct walk_control *wc, int level)
1383 struct shared_node *node;
1384 struct shared_node *dest;
1387 if (level == wc->root_level)
1390 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1394 BUG_ON(i >= BTRFS_MAX_LEVEL);
1396 node = wc->nodes[wc->active_node];
1397 wc->nodes[wc->active_node] = NULL;
1398 wc->active_node = i;
1400 dest = wc->nodes[wc->active_node];
1401 if (wc->active_node < wc->root_level ||
1402 btrfs_root_refs(&root->root_item) > 0) {
1403 BUG_ON(node->refs <= 1);
1404 splice_shared_node(node, dest);
1406 BUG_ON(node->refs < 2);
1415 * 1 - if the root with id child_root_id is a child of root parent_root_id
1416 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1417 * has other root(s) as parent(s)
1418 * 2 - if the root child_root_id doesn't have any parent roots
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423 struct btrfs_path path;
1424 struct btrfs_key key;
1425 struct extent_buffer *leaf;
1429 btrfs_init_path(&path);
1431 key.objectid = parent_root_id;
1432 key.type = BTRFS_ROOT_REF_KEY;
1433 key.offset = child_root_id;
1434 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1438 btrfs_release_path(&path);
1442 key.objectid = child_root_id;
1443 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1451 leaf = path.nodes[0];
1452 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456 leaf = path.nodes[0];
1459 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460 if (key.objectid != child_root_id ||
1461 key.type != BTRFS_ROOT_BACKREF_KEY)
1466 if (key.offset == parent_root_id) {
1467 btrfs_release_path(&path);
1474 btrfs_release_path(&path);
1477 return has_parent ? 0 : 2;
1480 static int process_dir_item(struct extent_buffer *eb,
1481 int slot, struct btrfs_key *key,
1482 struct shared_node *active_node)
1492 struct btrfs_dir_item *di;
1493 struct inode_record *rec;
1494 struct cache_tree *root_cache;
1495 struct cache_tree *inode_cache;
1496 struct btrfs_key location;
1497 char namebuf[BTRFS_NAME_LEN];
1499 root_cache = &active_node->root_cache;
1500 inode_cache = &active_node->inode_cache;
1501 rec = active_node->current;
1502 rec->found_dir_item = 1;
1504 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1505 total = btrfs_item_size_nr(eb, slot);
1506 while (cur < total) {
1508 btrfs_dir_item_key_to_cpu(eb, di, &location);
1509 name_len = btrfs_dir_name_len(eb, di);
1510 data_len = btrfs_dir_data_len(eb, di);
1511 filetype = btrfs_dir_type(eb, di);
1513 rec->found_size += name_len;
1514 if (cur + sizeof(*di) + name_len > total ||
1515 name_len > BTRFS_NAME_LEN) {
1516 error = REF_ERR_NAME_TOO_LONG;
1518 if (cur + sizeof(*di) > total)
1520 len = min_t(u32, total - cur - sizeof(*di),
1527 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529 if (key->type == BTRFS_DIR_ITEM_KEY &&
1530 key->offset != btrfs_name_hash(namebuf, len)) {
1531 rec->errors |= I_ERR_ODD_DIR_ITEM;
1532 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1533 key->objectid, key->offset, namebuf, len, filetype,
1534 key->offset, btrfs_name_hash(namebuf, len));
1537 if (location.type == BTRFS_INODE_ITEM_KEY) {
1538 add_inode_backref(inode_cache, location.objectid,
1539 key->objectid, key->offset, namebuf,
1540 len, filetype, key->type, error);
1541 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1542 add_inode_backref(root_cache, location.objectid,
1543 key->objectid, key->offset,
1544 namebuf, len, filetype,
1547 fprintf(stderr, "invalid location in dir item %u\n",
1549 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1550 key->objectid, key->offset, namebuf,
1551 len, filetype, key->type, error);
1554 len = sizeof(*di) + name_len + data_len;
1555 di = (struct btrfs_dir_item *)((char *)di + len);
1558 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1559 rec->errors |= I_ERR_DUP_DIR_INDEX;
1564 static int process_inode_ref(struct extent_buffer *eb,
1565 int slot, struct btrfs_key *key,
1566 struct shared_node *active_node)
1574 struct cache_tree *inode_cache;
1575 struct btrfs_inode_ref *ref;
1576 char namebuf[BTRFS_NAME_LEN];
1578 inode_cache = &active_node->inode_cache;
1580 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1581 total = btrfs_item_size_nr(eb, slot);
1582 while (cur < total) {
1583 name_len = btrfs_inode_ref_name_len(eb, ref);
1584 index = btrfs_inode_ref_index(eb, ref);
1586 /* inode_ref + namelen should not cross item boundary */
1587 if (cur + sizeof(*ref) + name_len > total ||
1588 name_len > BTRFS_NAME_LEN) {
1589 if (total < cur + sizeof(*ref))
1592 /* Still try to read out the remaining part */
1593 len = min_t(u32, total - cur - sizeof(*ref),
1595 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1602 add_inode_backref(inode_cache, key->objectid, key->offset,
1603 index, namebuf, len, 0, key->type, error);
1605 len = sizeof(*ref) + name_len;
1606 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1612 static int process_inode_extref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1623 struct cache_tree *inode_cache;
1624 struct btrfs_inode_extref *extref;
1625 char namebuf[BTRFS_NAME_LEN];
1627 inode_cache = &active_node->inode_cache;
1629 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1630 total = btrfs_item_size_nr(eb, slot);
1631 while (cur < total) {
1632 name_len = btrfs_inode_extref_name_len(eb, extref);
1633 index = btrfs_inode_extref_index(eb, extref);
1634 parent = btrfs_inode_extref_parent(eb, extref);
1635 if (name_len <= BTRFS_NAME_LEN) {
1639 len = BTRFS_NAME_LEN;
1640 error = REF_ERR_NAME_TOO_LONG;
1642 read_extent_buffer(eb, namebuf,
1643 (unsigned long)(extref + 1), len);
1644 add_inode_backref(inode_cache, key->objectid, parent,
1645 index, namebuf, len, 0, key->type, error);
1647 len = sizeof(*extref) + name_len;
1648 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1655 static int count_csum_range(struct btrfs_root *root, u64 start,
1656 u64 len, u64 *found)
1658 struct btrfs_key key;
1659 struct btrfs_path path;
1660 struct extent_buffer *leaf;
1665 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667 btrfs_init_path(&path);
1669 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671 key.type = BTRFS_EXTENT_CSUM_KEY;
1673 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1677 if (ret > 0 && path.slots[0] > 0) {
1678 leaf = path.nodes[0];
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1680 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1681 key.type == BTRFS_EXTENT_CSUM_KEY)
1686 leaf = path.nodes[0];
1687 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1688 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1693 leaf = path.nodes[0];
1696 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1697 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1698 key.type != BTRFS_EXTENT_CSUM_KEY)
1701 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1702 if (key.offset >= start + len)
1705 if (key.offset > start)
1708 size = btrfs_item_size_nr(leaf, path.slots[0]);
1709 csum_end = key.offset + (size / csum_size) *
1710 root->fs_info->sectorsize;
1711 if (csum_end > start) {
1712 size = min(csum_end - start, len);
1721 btrfs_release_path(&path);
1727 static int process_file_extent(struct btrfs_root *root,
1728 struct extent_buffer *eb,
1729 int slot, struct btrfs_key *key,
1730 struct shared_node *active_node)
1732 struct inode_record *rec;
1733 struct btrfs_file_extent_item *fi;
1735 u64 disk_bytenr = 0;
1736 u64 extent_offset = 0;
1737 u64 mask = root->fs_info->sectorsize - 1;
1741 rec = active_node->current;
1742 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743 rec->found_file_extent = 1;
1745 if (rec->extent_start == (u64)-1) {
1746 rec->extent_start = key->offset;
1747 rec->extent_end = key->offset;
1750 if (rec->extent_end > key->offset)
1751 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752 else if (rec->extent_end < key->offset) {
1753 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754 key->offset - rec->extent_end);
1759 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760 extent_type = btrfs_file_extent_type(eb, fi);
1762 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766 rec->found_size += num_bytes;
1767 num_bytes = (num_bytes + mask) & ~mask;
1768 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772 extent_offset = btrfs_file_extent_offset(eb, fi);
1773 if (num_bytes == 0 || (num_bytes & mask))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (num_bytes + extent_offset >
1776 btrfs_file_extent_ram_bytes(eb, fi))
1777 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779 (btrfs_file_extent_compression(eb, fi) ||
1780 btrfs_file_extent_encryption(eb, fi) ||
1781 btrfs_file_extent_other_encoding(eb, fi)))
1782 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783 if (disk_bytenr > 0)
1784 rec->found_size += num_bytes;
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788 rec->extent_end = key->offset + num_bytes;
1791 * The data reloc tree will copy full extents into its inode and then
1792 * copy the corresponding csums. Because the extent it copied could be
1793 * a preallocated extent that hasn't been written to yet there may be no
1794 * csums to copy, ergo we won't have csums for our file extent. This is
1795 * ok so just don't bother checking csums if the inode belongs to the
1798 if (disk_bytenr > 0 &&
1799 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801 if (btrfs_file_extent_compression(eb, fi))
1802 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804 disk_bytenr += extent_offset;
1806 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811 rec->found_csum_item = 1;
1812 if (found < num_bytes)
1813 rec->some_csum_missing = 1;
1814 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823 struct walk_control *wc)
1825 struct btrfs_key key;
1829 struct cache_tree *inode_cache;
1830 struct shared_node *active_node;
1832 if (wc->root_level == wc->active_node &&
1833 btrfs_root_refs(&root->root_item) == 0)
1836 active_node = wc->nodes[wc->active_node];
1837 inode_cache = &active_node->inode_cache;
1838 nritems = btrfs_header_nritems(eb);
1839 for (i = 0; i < nritems; i++) {
1840 btrfs_item_key_to_cpu(eb, &key, i);
1842 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847 if (active_node->current == NULL ||
1848 active_node->current->ino < key.objectid) {
1849 if (active_node->current) {
1850 active_node->current->checked = 1;
1851 maybe_free_inode_rec(inode_cache,
1852 active_node->current);
1854 active_node->current = get_inode_rec(inode_cache,
1856 BUG_ON(IS_ERR(active_node->current));
1859 case BTRFS_DIR_ITEM_KEY:
1860 case BTRFS_DIR_INDEX_KEY:
1861 ret = process_dir_item(eb, i, &key, active_node);
1863 case BTRFS_INODE_REF_KEY:
1864 ret = process_inode_ref(eb, i, &key, active_node);
1866 case BTRFS_INODE_EXTREF_KEY:
1867 ret = process_inode_extref(eb, i, &key, active_node);
1869 case BTRFS_INODE_ITEM_KEY:
1870 ret = process_inode_item(eb, i, &key, active_node);
1872 case BTRFS_EXTENT_DATA_KEY:
1873 ret = process_file_extent(root, eb, i, &key,
1884 u64 bytenr[BTRFS_MAX_LEVEL];
1885 u64 refs[BTRFS_MAX_LEVEL];
1886 int need_check[BTRFS_MAX_LEVEL];
1889 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1890 struct node_refs *nrefs, u64 level);
1891 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1892 unsigned int ext_ref);
1895 * Returns >0 Found error, not fatal, should continue
1896 * Returns <0 Fatal error, must exit the whole check
1897 * Returns 0 No errors found
1899 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1900 struct node_refs *nrefs, int *level, int ext_ref)
1902 struct extent_buffer *cur = path->nodes[0];
1903 struct btrfs_key key;
1907 int root_level = btrfs_header_level(root->node);
1909 int ret = 0; /* Final return value */
1910 int err = 0; /* Positive error bitmap */
1912 cur_bytenr = cur->start;
1914 /* skip to first inode item or the first inode number change */
1915 nritems = btrfs_header_nritems(cur);
1916 for (i = 0; i < nritems; i++) {
1917 btrfs_item_key_to_cpu(cur, &key, i);
1919 first_ino = key.objectid;
1920 if (key.type == BTRFS_INODE_ITEM_KEY ||
1921 (first_ino && first_ino != key.objectid))
1925 path->slots[0] = nritems;
1931 err |= check_inode_item(root, path, ext_ref);
1933 if (err & LAST_ITEM)
1936 /* still have inode items in thie leaf */
1937 if (cur->start == cur_bytenr)
1941 * we have switched to another leaf, above nodes may
1942 * have changed, here walk down the path, if a node
1943 * or leaf is shared, check whether we can skip this
1946 for (i = root_level; i >= 0; i--) {
1947 if (path->nodes[i]->start == nrefs->bytenr[i])
1950 ret = update_nodes_refs(root,
1951 path->nodes[i]->start,
1956 if (!nrefs->need_check[i]) {
1962 for (i = 0; i < *level; i++) {
1963 free_extent_buffer(path->nodes[i]);
1964 path->nodes[i] = NULL;
1973 static void reada_walk_down(struct btrfs_root *root,
1974 struct extent_buffer *node, int slot)
1976 struct btrfs_fs_info *fs_info = root->fs_info;
1983 level = btrfs_header_level(node);
1987 nritems = btrfs_header_nritems(node);
1988 for (i = slot; i < nritems; i++) {
1989 bytenr = btrfs_node_blockptr(node, i);
1990 ptr_gen = btrfs_node_ptr_generation(node, i);
1991 readahead_tree_block(fs_info, bytenr, ptr_gen);
1996 * Check the child node/leaf by the following condition:
1997 * 1. the first item key of the node/leaf should be the same with the one
1999 * 2. block in parent node should match the child node/leaf.
2000 * 3. generation of parent node and child's header should be consistent.
2002 * Or the child node/leaf pointed by the key in parent is not valid.
2004 * We hope to check leaf owner too, but since subvol may share leaves,
2005 * which makes leaf owner check not so strong, key check should be
2006 * sufficient enough for that case.
2008 static int check_child_node(struct extent_buffer *parent, int slot,
2009 struct extent_buffer *child)
2011 struct btrfs_key parent_key;
2012 struct btrfs_key child_key;
2015 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2016 if (btrfs_header_level(child) == 0)
2017 btrfs_item_key_to_cpu(child, &child_key, 0);
2019 btrfs_node_key_to_cpu(child, &child_key, 0);
2021 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2024 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2025 parent_key.objectid, parent_key.type, parent_key.offset,
2026 child_key.objectid, child_key.type, child_key.offset);
2028 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2030 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2031 btrfs_node_blockptr(parent, slot),
2032 btrfs_header_bytenr(child));
2034 if (btrfs_node_ptr_generation(parent, slot) !=
2035 btrfs_header_generation(child)) {
2037 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2038 btrfs_header_generation(child),
2039 btrfs_node_ptr_generation(parent, slot));
2045 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2046 * in every fs or file tree check. Here we find its all root ids, and only check
2047 * it in the fs or file tree which has the smallest root id.
2049 static int need_check(struct btrfs_root *root, struct ulist *roots)
2051 struct rb_node *node;
2052 struct ulist_node *u;
2054 if (roots->nnodes == 1)
2057 node = rb_first(&roots->root);
2058 u = rb_entry(node, struct ulist_node, rb_node);
2060 * current root id is not smallest, we skip it and let it be checked
2061 * in the fs or file tree who hash the smallest root id.
2063 if (root->objectid != u->val)
2070 * for a tree node or leaf, we record its reference count, so later if we still
2071 * process this node or leaf, don't need to compute its reference count again.
2073 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2074 struct node_refs *nrefs, u64 level)
2078 struct ulist *roots;
2080 if (nrefs->bytenr[level] != bytenr) {
2081 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2082 level, 1, &refs, NULL);
2086 nrefs->bytenr[level] = bytenr;
2087 nrefs->refs[level] = refs;
2089 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2094 check = need_check(root, roots);
2096 nrefs->need_check[level] = check;
2098 nrefs->need_check[level] = 1;
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106 struct walk_control *wc, int *level,
2107 struct node_refs *nrefs)
2109 enum btrfs_tree_block_status status;
2112 struct btrfs_fs_info *fs_info = root->fs_info;
2113 struct extent_buffer *next;
2114 struct extent_buffer *cur;
2118 WARN_ON(*level < 0);
2119 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2121 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122 refs = nrefs->refs[*level];
2125 ret = btrfs_lookup_extent_info(NULL, root,
2126 path->nodes[*level]->start,
2127 *level, 1, &refs, NULL);
2132 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133 nrefs->refs[*level] = refs;
2137 ret = enter_shared_node(root, path->nodes[*level]->start,
2145 while (*level >= 0) {
2146 WARN_ON(*level < 0);
2147 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148 cur = path->nodes[*level];
2150 if (btrfs_header_level(cur) != *level)
2153 if (path->slots[*level] >= btrfs_header_nritems(cur))
2156 ret = process_one_leaf(root, cur, wc);
2161 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2164 if (bytenr == nrefs->bytenr[*level - 1]) {
2165 refs = nrefs->refs[*level - 1];
2167 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168 *level - 1, 1, &refs, NULL);
2172 nrefs->bytenr[*level - 1] = bytenr;
2173 nrefs->refs[*level - 1] = refs;
2178 ret = enter_shared_node(root, bytenr, refs,
2181 path->slots[*level]++;
2186 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188 free_extent_buffer(next);
2189 reada_walk_down(root, cur, path->slots[*level]);
2190 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191 if (!extent_buffer_uptodate(next)) {
2192 struct btrfs_key node_key;
2194 btrfs_node_key_to_cpu(path->nodes[*level],
2196 path->slots[*level]);
2197 btrfs_add_corrupt_extent_record(root->fs_info,
2199 path->nodes[*level]->start,
2200 root->fs_info->nodesize,
2207 ret = check_child_node(cur, path->slots[*level], next);
2209 free_extent_buffer(next);
2214 if (btrfs_is_leaf(next))
2215 status = btrfs_check_leaf(root, NULL, next);
2217 status = btrfs_check_node(root, NULL, next);
2218 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219 free_extent_buffer(next);
2224 *level = *level - 1;
2225 free_extent_buffer(path->nodes[*level]);
2226 path->nodes[*level] = next;
2227 path->slots[*level] = 0;
2230 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2234 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2235 unsigned int ext_ref);
2238 * Returns >0 Found error, should continue
2239 * Returns <0 Fatal error, must exit the whole check
2240 * Returns 0 No errors found
2242 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2243 int *level, struct node_refs *nrefs, int ext_ref)
2245 enum btrfs_tree_block_status status;
2248 struct btrfs_fs_info *fs_info = root->fs_info;
2249 struct extent_buffer *next;
2250 struct extent_buffer *cur;
2253 WARN_ON(*level < 0);
2254 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2256 ret = update_nodes_refs(root, path->nodes[*level]->start,
2261 while (*level >= 0) {
2262 WARN_ON(*level < 0);
2263 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264 cur = path->nodes[*level];
2266 if (btrfs_header_level(cur) != *level)
2269 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271 /* Don't forgot to check leaf/node validation */
2273 ret = btrfs_check_leaf(root, NULL, cur);
2274 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278 ret = process_one_leaf_v2(root, path, nrefs,
2282 ret = btrfs_check_node(root, NULL, cur);
2283 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2288 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2291 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2294 if (!nrefs->need_check[*level - 1]) {
2295 path->slots[*level]++;
2299 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2300 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301 free_extent_buffer(next);
2302 reada_walk_down(root, cur, path->slots[*level]);
2303 next = read_tree_block(fs_info, bytenr, ptr_gen);
2304 if (!extent_buffer_uptodate(next)) {
2305 struct btrfs_key node_key;
2307 btrfs_node_key_to_cpu(path->nodes[*level],
2309 path->slots[*level]);
2310 btrfs_add_corrupt_extent_record(fs_info,
2312 path->nodes[*level]->start,
2320 ret = check_child_node(cur, path->slots[*level], next);
2324 if (btrfs_is_leaf(next))
2325 status = btrfs_check_leaf(root, NULL, next);
2327 status = btrfs_check_node(root, NULL, next);
2328 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2329 free_extent_buffer(next);
2334 *level = *level - 1;
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = next;
2337 path->slots[*level] = 0;
2342 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2343 struct walk_control *wc, int *level)
2346 struct extent_buffer *leaf;
2348 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349 leaf = path->nodes[i];
2350 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355 free_extent_buffer(path->nodes[*level]);
2356 path->nodes[*level] = NULL;
2357 BUG_ON(*level > wc->active_node);
2358 if (*level == wc->active_node)
2359 leave_shared_node(root, wc, *level);
2366 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2370 struct extent_buffer *leaf;
2372 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2373 leaf = path->nodes[i];
2374 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2379 free_extent_buffer(path->nodes[*level]);
2380 path->nodes[*level] = NULL;
2387 static int check_root_dir(struct inode_record *rec)
2389 struct inode_backref *backref;
2392 if (!rec->found_inode_item || rec->errors)
2394 if (rec->nlink != 1 || rec->found_link != 0)
2396 if (list_empty(&rec->backrefs))
2398 backref = to_inode_backref(rec->backrefs.next);
2399 if (!backref->found_inode_ref)
2401 if (backref->index != 0 || backref->namelen != 2 ||
2402 memcmp(backref->name, "..", 2))
2404 if (backref->found_dir_index || backref->found_dir_item)
2411 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2412 struct btrfs_root *root, struct btrfs_path *path,
2413 struct inode_record *rec)
2415 struct btrfs_inode_item *ei;
2416 struct btrfs_key key;
2419 key.objectid = rec->ino;
2420 key.type = BTRFS_INODE_ITEM_KEY;
2421 key.offset = (u64)-1;
2423 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2427 if (!path->slots[0]) {
2434 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2435 if (key.objectid != rec->ino) {
2440 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2441 struct btrfs_inode_item);
2442 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2443 btrfs_mark_buffer_dirty(path->nodes[0]);
2444 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2445 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2446 root->root_key.objectid);
2448 btrfs_release_path(path);
2452 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2453 struct btrfs_root *root,
2454 struct btrfs_path *path,
2455 struct inode_record *rec)
2459 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2460 btrfs_release_path(path);
2462 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2466 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2467 struct btrfs_root *root,
2468 struct btrfs_path *path,
2469 struct inode_record *rec)
2471 struct btrfs_inode_item *ei;
2472 struct btrfs_key key;
2475 key.objectid = rec->ino;
2476 key.type = BTRFS_INODE_ITEM_KEY;
2479 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2486 /* Since ret == 0, no need to check anything */
2487 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2488 struct btrfs_inode_item);
2489 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2490 btrfs_mark_buffer_dirty(path->nodes[0]);
2491 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2492 printf("reset nbytes for ino %llu root %llu\n",
2493 rec->ino, root->root_key.objectid);
2495 btrfs_release_path(path);
2499 static int add_missing_dir_index(struct btrfs_root *root,
2500 struct cache_tree *inode_cache,
2501 struct inode_record *rec,
2502 struct inode_backref *backref)
2504 struct btrfs_path path;
2505 struct btrfs_trans_handle *trans;
2506 struct btrfs_dir_item *dir_item;
2507 struct extent_buffer *leaf;
2508 struct btrfs_key key;
2509 struct btrfs_disk_key disk_key;
2510 struct inode_record *dir_rec;
2511 unsigned long name_ptr;
2512 u32 data_size = sizeof(*dir_item) + backref->namelen;
2515 trans = btrfs_start_transaction(root, 1);
2517 return PTR_ERR(trans);
2519 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2520 (unsigned long long)rec->ino);
2522 btrfs_init_path(&path);
2523 key.objectid = backref->dir;
2524 key.type = BTRFS_DIR_INDEX_KEY;
2525 key.offset = backref->index;
2526 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2529 leaf = path.nodes[0];
2530 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2532 disk_key.objectid = cpu_to_le64(rec->ino);
2533 disk_key.type = BTRFS_INODE_ITEM_KEY;
2534 disk_key.offset = 0;
2536 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2537 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2538 btrfs_set_dir_data_len(leaf, dir_item, 0);
2539 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2540 name_ptr = (unsigned long)(dir_item + 1);
2541 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2542 btrfs_mark_buffer_dirty(leaf);
2543 btrfs_release_path(&path);
2544 btrfs_commit_transaction(trans, root);
2546 backref->found_dir_index = 1;
2547 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2548 BUG_ON(IS_ERR(dir_rec));
2551 dir_rec->found_size += backref->namelen;
2552 if (dir_rec->found_size == dir_rec->isize &&
2553 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2554 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2555 if (dir_rec->found_size != dir_rec->isize)
2556 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2561 static int delete_dir_index(struct btrfs_root *root,
2562 struct inode_backref *backref)
2564 struct btrfs_trans_handle *trans;
2565 struct btrfs_dir_item *di;
2566 struct btrfs_path path;
2569 trans = btrfs_start_transaction(root, 1);
2571 return PTR_ERR(trans);
2573 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2574 (unsigned long long)backref->dir,
2575 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2576 (unsigned long long)root->objectid);
2578 btrfs_init_path(&path);
2579 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2580 backref->name, backref->namelen,
2581 backref->index, -1);
2584 btrfs_release_path(&path);
2585 btrfs_commit_transaction(trans, root);
2592 ret = btrfs_del_item(trans, root, &path);
2594 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2596 btrfs_release_path(&path);
2597 btrfs_commit_transaction(trans, root);
2601 static int create_inode_item(struct btrfs_root *root,
2602 struct inode_record *rec,
2605 struct btrfs_trans_handle *trans;
2606 struct btrfs_inode_item inode_item;
2607 time_t now = time(NULL);
2610 trans = btrfs_start_transaction(root, 1);
2611 if (IS_ERR(trans)) {
2612 ret = PTR_ERR(trans);
2616 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2617 "be incomplete, please check permissions and content after "
2618 "the fsck completes.\n", (unsigned long long)root->objectid,
2619 (unsigned long long)rec->ino);
2621 memset(&inode_item, 0, sizeof(inode_item));
2622 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2624 btrfs_set_stack_inode_nlink(&inode_item, 1);
2626 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2627 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2628 if (rec->found_dir_item) {
2629 if (rec->found_file_extent)
2630 fprintf(stderr, "root %llu inode %llu has both a dir "
2631 "item and extents, unsure if it is a dir or a "
2632 "regular file so setting it as a directory\n",
2633 (unsigned long long)root->objectid,
2634 (unsigned long long)rec->ino);
2635 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2636 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2637 } else if (!rec->found_dir_item) {
2638 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2639 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2641 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2642 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2643 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2644 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2645 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2646 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2647 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2648 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2650 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2652 btrfs_commit_transaction(trans, root);
2656 static int repair_inode_backrefs(struct btrfs_root *root,
2657 struct inode_record *rec,
2658 struct cache_tree *inode_cache,
2661 struct inode_backref *tmp, *backref;
2662 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2666 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2667 if (!delete && rec->ino == root_dirid) {
2668 if (!rec->found_inode_item) {
2669 ret = create_inode_item(root, rec, 1);
2676 /* Index 0 for root dir's are special, don't mess with it */
2677 if (rec->ino == root_dirid && backref->index == 0)
2681 ((backref->found_dir_index && !backref->found_inode_ref) ||
2682 (backref->found_dir_index && backref->found_inode_ref &&
2683 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2684 ret = delete_dir_index(root, backref);
2688 list_del(&backref->list);
2693 if (!delete && !backref->found_dir_index &&
2694 backref->found_dir_item && backref->found_inode_ref) {
2695 ret = add_missing_dir_index(root, inode_cache, rec,
2700 if (backref->found_dir_item &&
2701 backref->found_dir_index) {
2702 if (!backref->errors &&
2703 backref->found_inode_ref) {
2704 list_del(&backref->list);
2711 if (!delete && (!backref->found_dir_index &&
2712 !backref->found_dir_item &&
2713 backref->found_inode_ref)) {
2714 struct btrfs_trans_handle *trans;
2715 struct btrfs_key location;
2717 ret = check_dir_conflict(root, backref->name,
2723 * let nlink fixing routine to handle it,
2724 * which can do it better.
2729 location.objectid = rec->ino;
2730 location.type = BTRFS_INODE_ITEM_KEY;
2731 location.offset = 0;
2733 trans = btrfs_start_transaction(root, 1);
2734 if (IS_ERR(trans)) {
2735 ret = PTR_ERR(trans);
2738 fprintf(stderr, "adding missing dir index/item pair "
2740 (unsigned long long)rec->ino);
2741 ret = btrfs_insert_dir_item(trans, root, backref->name,
2743 backref->dir, &location,
2744 imode_to_type(rec->imode),
2747 btrfs_commit_transaction(trans, root);
2751 if (!delete && (backref->found_inode_ref &&
2752 backref->found_dir_index &&
2753 backref->found_dir_item &&
2754 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2755 !rec->found_inode_item)) {
2756 ret = create_inode_item(root, rec, 0);
2763 return ret ? ret : repaired;
2767 * To determine the file type for nlink/inode_item repair
2769 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2770 * Return -ENOENT if file type is not found.
2772 static int find_file_type(struct inode_record *rec, u8 *type)
2774 struct inode_backref *backref;
2776 /* For inode item recovered case */
2777 if (rec->found_inode_item) {
2778 *type = imode_to_type(rec->imode);
2782 list_for_each_entry(backref, &rec->backrefs, list) {
2783 if (backref->found_dir_index || backref->found_dir_item) {
2784 *type = backref->filetype;
2792 * To determine the file name for nlink repair
2794 * Return 0 if file name is found, set name and namelen.
2795 * Return -ENOENT if file name is not found.
2797 static int find_file_name(struct inode_record *rec,
2798 char *name, int *namelen)
2800 struct inode_backref *backref;
2802 list_for_each_entry(backref, &rec->backrefs, list) {
2803 if (backref->found_dir_index || backref->found_dir_item ||
2804 backref->found_inode_ref) {
2805 memcpy(name, backref->name, backref->namelen);
2806 *namelen = backref->namelen;
2813 /* Reset the nlink of the inode to the correct one */
2814 static int reset_nlink(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct btrfs_path *path,
2817 struct inode_record *rec)
2819 struct inode_backref *backref;
2820 struct inode_backref *tmp;
2821 struct btrfs_key key;
2822 struct btrfs_inode_item *inode_item;
2825 /* We don't believe this either, reset it and iterate backref */
2826 rec->found_link = 0;
2828 /* Remove all backref including the valid ones */
2829 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2830 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2831 backref->index, backref->name,
2832 backref->namelen, 0);
2836 /* remove invalid backref, so it won't be added back */
2837 if (!(backref->found_dir_index &&
2838 backref->found_dir_item &&
2839 backref->found_inode_ref)) {
2840 list_del(&backref->list);
2847 /* Set nlink to 0 */
2848 key.objectid = rec->ino;
2849 key.type = BTRFS_INODE_ITEM_KEY;
2851 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2858 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2859 struct btrfs_inode_item);
2860 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2861 btrfs_mark_buffer_dirty(path->nodes[0]);
2862 btrfs_release_path(path);
2865 * Add back valid inode_ref/dir_item/dir_index,
2866 * add_link() will handle the nlink inc, so new nlink must be correct
2868 list_for_each_entry(backref, &rec->backrefs, list) {
2869 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2870 backref->name, backref->namelen,
2871 backref->filetype, &backref->index, 1);
2876 btrfs_release_path(path);
2880 static int get_highest_inode(struct btrfs_trans_handle *trans,
2881 struct btrfs_root *root,
2882 struct btrfs_path *path,
2885 struct btrfs_key key, found_key;
2888 btrfs_init_path(path);
2889 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2891 key.type = BTRFS_INODE_ITEM_KEY;
2892 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2894 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2895 path->slots[0] - 1);
2896 *highest_ino = found_key.objectid;
2899 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2901 btrfs_release_path(path);
2905 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 char *dir_name = "lost+found";
2911 char namebuf[BTRFS_NAME_LEN] = {0};
2916 int name_recovered = 0;
2917 int type_recovered = 0;
2921 * Get file name and type first before these invalid inode ref
2922 * are deleted by remove_all_invalid_backref()
2924 name_recovered = !find_file_name(rec, namebuf, &namelen);
2925 type_recovered = !find_file_type(rec, &type);
2927 if (!name_recovered) {
2928 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2929 rec->ino, rec->ino);
2930 namelen = count_digits(rec->ino);
2931 sprintf(namebuf, "%llu", rec->ino);
2934 if (!type_recovered) {
2935 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2937 type = BTRFS_FT_REG_FILE;
2941 ret = reset_nlink(trans, root, path, rec);
2944 "Failed to reset nlink for inode %llu: %s\n",
2945 rec->ino, strerror(-ret));
2949 if (rec->found_link == 0) {
2950 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2954 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2955 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2958 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2959 dir_name, strerror(-ret));
2962 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2963 namebuf, namelen, type, NULL, 1);
2965 * Add ".INO" suffix several times to handle case where
2966 * "FILENAME.INO" is already taken by another file.
2968 while (ret == -EEXIST) {
2970 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2972 if (namelen + count_digits(rec->ino) + 1 >
2977 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2979 namelen += count_digits(rec->ino) + 1;
2980 ret = btrfs_add_link(trans, root, rec->ino,
2981 lost_found_ino, namebuf,
2982 namelen, type, NULL, 1);
2986 "Failed to link the inode %llu to %s dir: %s\n",
2987 rec->ino, dir_name, strerror(-ret));
2991 * Just increase the found_link, don't actually add the
2992 * backref. This will make things easier and this inode
2993 * record will be freed after the repair is done.
2994 * So fsck will not report problem about this inode.
2997 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2998 namelen, namebuf, dir_name);
3000 printf("Fixed the nlink of inode %llu\n", rec->ino);
3003 * Clear the flag anyway, or we will loop forever for the same inode
3004 * as it will not be removed from the bad inode list and the dead loop
3007 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3008 btrfs_release_path(path);
3013 * Check if there is any normal(reg or prealloc) file extent for given
3015 * This is used to determine the file type when neither its dir_index/item or
3016 * inode_item exists.
3018 * This will *NOT* report error, if any error happens, just consider it does
3019 * not have any normal file extent.
3021 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3023 struct btrfs_path path;
3024 struct btrfs_key key;
3025 struct btrfs_key found_key;
3026 struct btrfs_file_extent_item *fi;
3030 btrfs_init_path(&path);
3032 key.type = BTRFS_EXTENT_DATA_KEY;
3035 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3040 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3041 ret = btrfs_next_leaf(root, &path);
3048 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3050 if (found_key.objectid != ino ||
3051 found_key.type != BTRFS_EXTENT_DATA_KEY)
3053 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3054 struct btrfs_file_extent_item);
3055 type = btrfs_file_extent_type(path.nodes[0], fi);
3056 if (type != BTRFS_FILE_EXTENT_INLINE) {
3062 btrfs_release_path(&path);
3066 static u32 btrfs_type_to_imode(u8 type)
3068 static u32 imode_by_btrfs_type[] = {
3069 [BTRFS_FT_REG_FILE] = S_IFREG,
3070 [BTRFS_FT_DIR] = S_IFDIR,
3071 [BTRFS_FT_CHRDEV] = S_IFCHR,
3072 [BTRFS_FT_BLKDEV] = S_IFBLK,
3073 [BTRFS_FT_FIFO] = S_IFIFO,
3074 [BTRFS_FT_SOCK] = S_IFSOCK,
3075 [BTRFS_FT_SYMLINK] = S_IFLNK,
3078 return imode_by_btrfs_type[(type)];
3081 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3082 struct btrfs_root *root,
3083 struct btrfs_path *path,
3084 struct inode_record *rec)
3088 int type_recovered = 0;
3091 printf("Trying to rebuild inode:%llu\n", rec->ino);
3093 type_recovered = !find_file_type(rec, &filetype);
3096 * Try to determine inode type if type not found.
3098 * For found regular file extent, it must be FILE.
3099 * For found dir_item/index, it must be DIR.
3101 * For undetermined one, use FILE as fallback.
3104 * 1. If found backref(inode_index/item is already handled) to it,
3106 * Need new inode-inode ref structure to allow search for that.
3108 if (!type_recovered) {
3109 if (rec->found_file_extent &&
3110 find_normal_file_extent(root, rec->ino)) {
3112 filetype = BTRFS_FT_REG_FILE;
3113 } else if (rec->found_dir_item) {
3115 filetype = BTRFS_FT_DIR;
3116 } else if (!list_empty(&rec->orphan_extents)) {
3118 filetype = BTRFS_FT_REG_FILE;
3120 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3123 filetype = BTRFS_FT_REG_FILE;
3127 ret = btrfs_new_inode(trans, root, rec->ino,
3128 mode | btrfs_type_to_imode(filetype));
3133 * Here inode rebuild is done, we only rebuild the inode item,
3134 * don't repair the nlink(like move to lost+found).
3135 * That is the job of nlink repair.
3137 * We just fill the record and return
3139 rec->found_dir_item = 1;
3140 rec->imode = mode | btrfs_type_to_imode(filetype);
3142 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3143 /* Ensure the inode_nlinks repair function will be called */
3144 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3149 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3150 struct btrfs_root *root,
3151 struct btrfs_path *path,
3152 struct inode_record *rec)
3154 struct orphan_data_extent *orphan;
3155 struct orphan_data_extent *tmp;
3158 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3160 * Check for conflicting file extents
3162 * Here we don't know whether the extents is compressed or not,
3163 * so we can only assume it not compressed nor data offset,
3164 * and use its disk_len as extent length.
3166 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3167 orphan->offset, orphan->disk_len, 0);
3168 btrfs_release_path(path);
3173 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3174 orphan->disk_bytenr, orphan->disk_len);
3175 ret = btrfs_free_extent(trans,
3176 root->fs_info->extent_root,
3177 orphan->disk_bytenr, orphan->disk_len,
3178 0, root->objectid, orphan->objectid,
3183 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3184 orphan->offset, orphan->disk_bytenr,
3185 orphan->disk_len, orphan->disk_len);
3189 /* Update file size info */
3190 rec->found_size += orphan->disk_len;
3191 if (rec->found_size == rec->nbytes)
3192 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3194 /* Update the file extent hole info too */
3195 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3199 if (RB_EMPTY_ROOT(&rec->holes))
3200 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3202 list_del(&orphan->list);
3205 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3210 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3211 struct btrfs_root *root,
3212 struct btrfs_path *path,
3213 struct inode_record *rec)
3215 struct rb_node *node;
3216 struct file_extent_hole *hole;
3220 node = rb_first(&rec->holes);
3224 hole = rb_entry(node, struct file_extent_hole, node);
3225 ret = btrfs_punch_hole(trans, root, rec->ino,
3226 hole->start, hole->len);
3229 ret = del_file_extent_hole(&rec->holes, hole->start,
3233 if (RB_EMPTY_ROOT(&rec->holes))
3234 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3235 node = rb_first(&rec->holes);
3237 /* special case for a file losing all its file extent */
3239 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3240 round_up(rec->isize,
3241 root->fs_info->sectorsize));
3245 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3246 rec->ino, root->objectid);
3251 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3253 struct btrfs_trans_handle *trans;
3254 struct btrfs_path path;
3257 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3258 I_ERR_NO_ORPHAN_ITEM |
3259 I_ERR_LINK_COUNT_WRONG |
3260 I_ERR_NO_INODE_ITEM |
3261 I_ERR_FILE_EXTENT_ORPHAN |
3262 I_ERR_FILE_EXTENT_DISCOUNT|
3263 I_ERR_FILE_NBYTES_WRONG)))
3267 * For nlink repair, it may create a dir and add link, so
3268 * 2 for parent(256)'s dir_index and dir_item
3269 * 2 for lost+found dir's inode_item and inode_ref
3270 * 1 for the new inode_ref of the file
3271 * 2 for lost+found dir's dir_index and dir_item for the file
3273 trans = btrfs_start_transaction(root, 7);
3275 return PTR_ERR(trans);
3277 btrfs_init_path(&path);
3278 if (rec->errors & I_ERR_NO_INODE_ITEM)
3279 ret = repair_inode_no_item(trans, root, &path, rec);
3280 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3281 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3283 ret = repair_inode_discount_extent(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3285 ret = repair_inode_isize(trans, root, &path, rec);
3286 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3287 ret = repair_inode_orphan_item(trans, root, &path, rec);
3288 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3289 ret = repair_inode_nlinks(trans, root, &path, rec);
3290 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3291 ret = repair_inode_nbytes(trans, root, &path, rec);
3292 btrfs_commit_transaction(trans, root);
3293 btrfs_release_path(&path);
3297 static int check_inode_recs(struct btrfs_root *root,
3298 struct cache_tree *inode_cache)
3300 struct cache_extent *cache;
3301 struct ptr_node *node;
3302 struct inode_record *rec;
3303 struct inode_backref *backref;
3308 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3310 if (btrfs_root_refs(&root->root_item) == 0) {
3311 if (!cache_tree_empty(inode_cache))
3312 fprintf(stderr, "warning line %d\n", __LINE__);
3317 * We need to repair backrefs first because we could change some of the
3318 * errors in the inode recs.
3320 * We also need to go through and delete invalid backrefs first and then
3321 * add the correct ones second. We do this because we may get EEXIST
3322 * when adding back the correct index because we hadn't yet deleted the
3325 * For example, if we were missing a dir index then the directories
3326 * isize would be wrong, so if we fixed the isize to what we thought it
3327 * would be and then fixed the backref we'd still have a invalid fs, so
3328 * we need to add back the dir index and then check to see if the isize
3333 if (stage == 3 && !err)
3336 cache = search_cache_extent(inode_cache, 0);
3337 while (repair && cache) {
3338 node = container_of(cache, struct ptr_node, cache);
3340 cache = next_cache_extent(cache);
3342 /* Need to free everything up and rescan */
3344 remove_cache_extent(inode_cache, &node->cache);
3346 free_inode_rec(rec);
3350 if (list_empty(&rec->backrefs))
3353 ret = repair_inode_backrefs(root, rec, inode_cache,
3367 rec = get_inode_rec(inode_cache, root_dirid, 0);
3368 BUG_ON(IS_ERR(rec));
3370 ret = check_root_dir(rec);
3372 fprintf(stderr, "root %llu root dir %llu error\n",
3373 (unsigned long long)root->root_key.objectid,
3374 (unsigned long long)root_dirid);
3375 print_inode_error(root, rec);
3380 struct btrfs_trans_handle *trans;
3382 trans = btrfs_start_transaction(root, 1);
3383 if (IS_ERR(trans)) {
3384 err = PTR_ERR(trans);
3389 "root %llu missing its root dir, recreating\n",
3390 (unsigned long long)root->objectid);
3392 ret = btrfs_make_root_dir(trans, root, root_dirid);
3395 btrfs_commit_transaction(trans, root);
3399 fprintf(stderr, "root %llu root dir %llu not found\n",
3400 (unsigned long long)root->root_key.objectid,
3401 (unsigned long long)root_dirid);
3405 cache = search_cache_extent(inode_cache, 0);
3408 node = container_of(cache, struct ptr_node, cache);
3410 remove_cache_extent(inode_cache, &node->cache);
3412 if (rec->ino == root_dirid ||
3413 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3414 free_inode_rec(rec);
3418 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3419 ret = check_orphan_item(root, rec->ino);
3421 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3422 if (can_free_inode_rec(rec)) {
3423 free_inode_rec(rec);
3428 if (!rec->found_inode_item)
3429 rec->errors |= I_ERR_NO_INODE_ITEM;
3430 if (rec->found_link != rec->nlink)
3431 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3433 ret = try_repair_inode(root, rec);
3434 if (ret == 0 && can_free_inode_rec(rec)) {
3435 free_inode_rec(rec);
3441 if (!(repair && ret == 0))
3443 print_inode_error(root, rec);
3444 list_for_each_entry(backref, &rec->backrefs, list) {
3445 if (!backref->found_dir_item)
3446 backref->errors |= REF_ERR_NO_DIR_ITEM;
3447 if (!backref->found_dir_index)
3448 backref->errors |= REF_ERR_NO_DIR_INDEX;
3449 if (!backref->found_inode_ref)
3450 backref->errors |= REF_ERR_NO_INODE_REF;
3451 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3452 " namelen %u name %s filetype %d errors %x",
3453 (unsigned long long)backref->dir,
3454 (unsigned long long)backref->index,
3455 backref->namelen, backref->name,
3456 backref->filetype, backref->errors);
3457 print_ref_error(backref->errors);
3459 free_inode_rec(rec);
3461 return (error > 0) ? -1 : 0;
3464 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3467 struct cache_extent *cache;
3468 struct root_record *rec = NULL;
3471 cache = lookup_cache_extent(root_cache, objectid, 1);
3473 rec = container_of(cache, struct root_record, cache);
3475 rec = calloc(1, sizeof(*rec));
3477 return ERR_PTR(-ENOMEM);
3478 rec->objectid = objectid;
3479 INIT_LIST_HEAD(&rec->backrefs);
3480 rec->cache.start = objectid;
3481 rec->cache.size = 1;
3483 ret = insert_cache_extent(root_cache, &rec->cache);
3485 return ERR_PTR(-EEXIST);
3490 static struct root_backref *get_root_backref(struct root_record *rec,
3491 u64 ref_root, u64 dir, u64 index,
3492 const char *name, int namelen)
3494 struct root_backref *backref;
3496 list_for_each_entry(backref, &rec->backrefs, list) {
3497 if (backref->ref_root != ref_root || backref->dir != dir ||
3498 backref->namelen != namelen)
3500 if (memcmp(name, backref->name, namelen))
3505 backref = calloc(1, sizeof(*backref) + namelen + 1);
3508 backref->ref_root = ref_root;
3510 backref->index = index;
3511 backref->namelen = namelen;
3512 memcpy(backref->name, name, namelen);
3513 backref->name[namelen] = '\0';
3514 list_add_tail(&backref->list, &rec->backrefs);
3518 static void free_root_record(struct cache_extent *cache)
3520 struct root_record *rec;
3521 struct root_backref *backref;
3523 rec = container_of(cache, struct root_record, cache);
3524 while (!list_empty(&rec->backrefs)) {
3525 backref = to_root_backref(rec->backrefs.next);
3526 list_del(&backref->list);
3533 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3535 static int add_root_backref(struct cache_tree *root_cache,
3536 u64 root_id, u64 ref_root, u64 dir, u64 index,
3537 const char *name, int namelen,
3538 int item_type, int errors)
3540 struct root_record *rec;
3541 struct root_backref *backref;
3543 rec = get_root_rec(root_cache, root_id);
3544 BUG_ON(IS_ERR(rec));
3545 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3548 backref->errors |= errors;
3550 if (item_type != BTRFS_DIR_ITEM_KEY) {
3551 if (backref->found_dir_index || backref->found_back_ref ||
3552 backref->found_forward_ref) {
3553 if (backref->index != index)
3554 backref->errors |= REF_ERR_INDEX_UNMATCH;
3556 backref->index = index;
3560 if (item_type == BTRFS_DIR_ITEM_KEY) {
3561 if (backref->found_forward_ref)
3563 backref->found_dir_item = 1;
3564 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3565 backref->found_dir_index = 1;
3566 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3567 if (backref->found_forward_ref)
3568 backref->errors |= REF_ERR_DUP_ROOT_REF;
3569 else if (backref->found_dir_item)
3571 backref->found_forward_ref = 1;
3572 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3573 if (backref->found_back_ref)
3574 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3575 backref->found_back_ref = 1;
3580 if (backref->found_forward_ref && backref->found_dir_item)
3581 backref->reachable = 1;
3585 static int merge_root_recs(struct btrfs_root *root,
3586 struct cache_tree *src_cache,
3587 struct cache_tree *dst_cache)
3589 struct cache_extent *cache;
3590 struct ptr_node *node;
3591 struct inode_record *rec;
3592 struct inode_backref *backref;
3595 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3596 free_inode_recs_tree(src_cache);
3601 cache = search_cache_extent(src_cache, 0);
3604 node = container_of(cache, struct ptr_node, cache);
3606 remove_cache_extent(src_cache, &node->cache);
3609 ret = is_child_root(root, root->objectid, rec->ino);
3615 list_for_each_entry(backref, &rec->backrefs, list) {
3616 BUG_ON(backref->found_inode_ref);
3617 if (backref->found_dir_item)
3618 add_root_backref(dst_cache, rec->ino,
3619 root->root_key.objectid, backref->dir,
3620 backref->index, backref->name,
3621 backref->namelen, BTRFS_DIR_ITEM_KEY,
3623 if (backref->found_dir_index)
3624 add_root_backref(dst_cache, rec->ino,
3625 root->root_key.objectid, backref->dir,
3626 backref->index, backref->name,
3627 backref->namelen, BTRFS_DIR_INDEX_KEY,
3631 free_inode_rec(rec);
3638 static int check_root_refs(struct btrfs_root *root,
3639 struct cache_tree *root_cache)
3641 struct root_record *rec;
3642 struct root_record *ref_root;
3643 struct root_backref *backref;
3644 struct cache_extent *cache;
3650 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3651 BUG_ON(IS_ERR(rec));
3654 /* fixme: this can not detect circular references */
3657 cache = search_cache_extent(root_cache, 0);
3661 rec = container_of(cache, struct root_record, cache);
3662 cache = next_cache_extent(cache);
3664 if (rec->found_ref == 0)
3667 list_for_each_entry(backref, &rec->backrefs, list) {
3668 if (!backref->reachable)
3671 ref_root = get_root_rec(root_cache,
3673 BUG_ON(IS_ERR(ref_root));
3674 if (ref_root->found_ref > 0)
3677 backref->reachable = 0;
3679 if (rec->found_ref == 0)
3685 cache = search_cache_extent(root_cache, 0);
3689 rec = container_of(cache, struct root_record, cache);
3690 cache = next_cache_extent(cache);
3692 if (rec->found_ref == 0 &&
3693 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3694 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3695 ret = check_orphan_item(root->fs_info->tree_root,
3701 * If we don't have a root item then we likely just have
3702 * a dir item in a snapshot for this root but no actual
3703 * ref key or anything so it's meaningless.
3705 if (!rec->found_root_item)
3708 fprintf(stderr, "fs tree %llu not referenced\n",
3709 (unsigned long long)rec->objectid);
3713 if (rec->found_ref > 0 && !rec->found_root_item)
3715 list_for_each_entry(backref, &rec->backrefs, list) {
3716 if (!backref->found_dir_item)
3717 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718 if (!backref->found_dir_index)
3719 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720 if (!backref->found_back_ref)
3721 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3722 if (!backref->found_forward_ref)
3723 backref->errors |= REF_ERR_NO_ROOT_REF;
3724 if (backref->reachable && backref->errors)
3731 fprintf(stderr, "fs tree %llu refs %u %s\n",
3732 (unsigned long long)rec->objectid, rec->found_ref,
3733 rec->found_root_item ? "" : "not found");
3735 list_for_each_entry(backref, &rec->backrefs, list) {
3736 if (!backref->reachable)
3738 if (!backref->errors && rec->found_root_item)
3740 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3741 " index %llu namelen %u name %s errors %x\n",
3742 (unsigned long long)backref->ref_root,
3743 (unsigned long long)backref->dir,
3744 (unsigned long long)backref->index,
3745 backref->namelen, backref->name,
3747 print_ref_error(backref->errors);
3750 return errors > 0 ? 1 : 0;
3753 static int process_root_ref(struct extent_buffer *eb, int slot,
3754 struct btrfs_key *key,
3755 struct cache_tree *root_cache)
3761 struct btrfs_root_ref *ref;
3762 char namebuf[BTRFS_NAME_LEN];
3765 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3767 dirid = btrfs_root_ref_dirid(eb, ref);
3768 index = btrfs_root_ref_sequence(eb, ref);
3769 name_len = btrfs_root_ref_name_len(eb, ref);
3771 if (name_len <= BTRFS_NAME_LEN) {
3775 len = BTRFS_NAME_LEN;
3776 error = REF_ERR_NAME_TOO_LONG;
3778 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3780 if (key->type == BTRFS_ROOT_REF_KEY) {
3781 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3782 index, namebuf, len, key->type, error);
3784 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3785 index, namebuf, len, key->type, error);
3790 static void free_corrupt_block(struct cache_extent *cache)
3792 struct btrfs_corrupt_block *corrupt;
3794 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3798 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3801 * Repair the btree of the given root.
3803 * The fix is to remove the node key in corrupt_blocks cache_tree.
3804 * and rebalance the tree.
3805 * After the fix, the btree should be writeable.
3807 static int repair_btree(struct btrfs_root *root,
3808 struct cache_tree *corrupt_blocks)
3810 struct btrfs_trans_handle *trans;
3811 struct btrfs_path path;
3812 struct btrfs_corrupt_block *corrupt;
3813 struct cache_extent *cache;
3814 struct btrfs_key key;
3819 if (cache_tree_empty(corrupt_blocks))
3822 trans = btrfs_start_transaction(root, 1);
3823 if (IS_ERR(trans)) {
3824 ret = PTR_ERR(trans);
3825 fprintf(stderr, "Error starting transaction: %s\n",
3829 btrfs_init_path(&path);
3830 cache = first_cache_extent(corrupt_blocks);
3832 corrupt = container_of(cache, struct btrfs_corrupt_block,
3834 level = corrupt->level;
3835 path.lowest_level = level;
3836 key.objectid = corrupt->key.objectid;
3837 key.type = corrupt->key.type;
3838 key.offset = corrupt->key.offset;
3841 * Here we don't want to do any tree balance, since it may
3842 * cause a balance with corrupted brother leaf/node,
3843 * so ins_len set to 0 here.
3844 * Balance will be done after all corrupt node/leaf is deleted.
3846 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3849 offset = btrfs_node_blockptr(path.nodes[level],
3852 /* Remove the ptr */
3853 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3857 * Remove the corresponding extent
3858 * return value is not concerned.
3860 btrfs_release_path(&path);
3861 ret = btrfs_free_extent(trans, root, offset,
3862 root->fs_info->nodesize, 0,
3863 root->root_key.objectid, level - 1, 0);
3864 cache = next_cache_extent(cache);
3867 /* Balance the btree using btrfs_search_slot() */
3868 cache = first_cache_extent(corrupt_blocks);
3870 corrupt = container_of(cache, struct btrfs_corrupt_block,
3872 memcpy(&key, &corrupt->key, sizeof(key));
3873 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3876 /* return will always >0 since it won't find the item */
3878 btrfs_release_path(&path);
3879 cache = next_cache_extent(cache);
3882 btrfs_commit_transaction(trans, root);
3883 btrfs_release_path(&path);
3887 static int check_fs_root(struct btrfs_root *root,
3888 struct cache_tree *root_cache,
3889 struct walk_control *wc)
3895 struct btrfs_path path;
3896 struct shared_node root_node;
3897 struct root_record *rec;
3898 struct btrfs_root_item *root_item = &root->root_item;
3899 struct cache_tree corrupt_blocks;
3900 struct orphan_data_extent *orphan;
3901 struct orphan_data_extent *tmp;
3902 enum btrfs_tree_block_status status;
3903 struct node_refs nrefs;
3906 * Reuse the corrupt_block cache tree to record corrupted tree block
3908 * Unlike the usage in extent tree check, here we do it in a per
3909 * fs/subvol tree base.
3911 cache_tree_init(&corrupt_blocks);
3912 root->fs_info->corrupt_blocks = &corrupt_blocks;
3914 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3915 rec = get_root_rec(root_cache, root->root_key.objectid);
3916 BUG_ON(IS_ERR(rec));
3917 if (btrfs_root_refs(root_item) > 0)
3918 rec->found_root_item = 1;
3921 btrfs_init_path(&path);
3922 memset(&root_node, 0, sizeof(root_node));
3923 cache_tree_init(&root_node.root_cache);
3924 cache_tree_init(&root_node.inode_cache);
3925 memset(&nrefs, 0, sizeof(nrefs));
3927 /* Move the orphan extent record to corresponding inode_record */
3928 list_for_each_entry_safe(orphan, tmp,
3929 &root->orphan_data_extents, list) {
3930 struct inode_record *inode;
3932 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3934 BUG_ON(IS_ERR(inode));
3935 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3936 list_move(&orphan->list, &inode->orphan_extents);
3939 level = btrfs_header_level(root->node);
3940 memset(wc->nodes, 0, sizeof(wc->nodes));
3941 wc->nodes[level] = &root_node;
3942 wc->active_node = level;
3943 wc->root_level = level;
3945 /* We may not have checked the root block, lets do that now */
3946 if (btrfs_is_leaf(root->node))
3947 status = btrfs_check_leaf(root, NULL, root->node);
3949 status = btrfs_check_node(root, NULL, root->node);
3950 if (status != BTRFS_TREE_BLOCK_CLEAN)
3953 if (btrfs_root_refs(root_item) > 0 ||
3954 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3955 path.nodes[level] = root->node;
3956 extent_buffer_get(root->node);
3957 path.slots[level] = 0;
3959 struct btrfs_key key;
3960 struct btrfs_disk_key found_key;
3962 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3963 level = root_item->drop_level;
3964 path.lowest_level = level;
3965 if (level > btrfs_header_level(root->node) ||
3966 level >= BTRFS_MAX_LEVEL) {
3967 error("ignoring invalid drop level: %u", level);
3970 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3973 btrfs_node_key(path.nodes[level], &found_key,
3975 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3976 sizeof(found_key)));
3980 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3986 wret = walk_up_tree(root, &path, wc, &level);
3993 btrfs_release_path(&path);
3995 if (!cache_tree_empty(&corrupt_blocks)) {
3996 struct cache_extent *cache;
3997 struct btrfs_corrupt_block *corrupt;
3999 printf("The following tree block(s) is corrupted in tree %llu:\n",
4000 root->root_key.objectid);
4001 cache = first_cache_extent(&corrupt_blocks);
4003 corrupt = container_of(cache,
4004 struct btrfs_corrupt_block,
4006 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4007 cache->start, corrupt->level,
4008 corrupt->key.objectid, corrupt->key.type,
4009 corrupt->key.offset);
4010 cache = next_cache_extent(cache);
4013 printf("Try to repair the btree for root %llu\n",
4014 root->root_key.objectid);
4015 ret = repair_btree(root, &corrupt_blocks);
4017 fprintf(stderr, "Failed to repair btree: %s\n",
4020 printf("Btree for root %llu is fixed\n",
4021 root->root_key.objectid);
4025 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4029 if (root_node.current) {
4030 root_node.current->checked = 1;
4031 maybe_free_inode_rec(&root_node.inode_cache,
4035 err = check_inode_recs(root, &root_node.inode_cache);
4039 free_corrupt_blocks_tree(&corrupt_blocks);
4040 root->fs_info->corrupt_blocks = NULL;
4041 free_orphan_data_extents(&root->orphan_data_extents);
4045 static int fs_root_objectid(u64 objectid)
4047 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4048 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4050 return is_fstree(objectid);
4053 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4054 struct cache_tree *root_cache)
4056 struct btrfs_path path;
4057 struct btrfs_key key;
4058 struct walk_control wc;
4059 struct extent_buffer *leaf, *tree_node;
4060 struct btrfs_root *tmp_root;
4061 struct btrfs_root *tree_root = fs_info->tree_root;
4065 if (ctx.progress_enabled) {
4066 ctx.tp = TASK_FS_ROOTS;
4067 task_start(ctx.info);
4071 * Just in case we made any changes to the extent tree that weren't
4072 * reflected into the free space cache yet.
4075 reset_cached_block_groups(fs_info);
4076 memset(&wc, 0, sizeof(wc));
4077 cache_tree_init(&wc.shared);
4078 btrfs_init_path(&path);
4083 key.type = BTRFS_ROOT_ITEM_KEY;
4084 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4089 tree_node = tree_root->node;
4091 if (tree_node != tree_root->node) {
4092 free_root_recs_tree(root_cache);
4093 btrfs_release_path(&path);
4096 leaf = path.nodes[0];
4097 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4098 ret = btrfs_next_leaf(tree_root, &path);
4104 leaf = path.nodes[0];
4106 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4107 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4108 fs_root_objectid(key.objectid)) {
4109 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4110 tmp_root = btrfs_read_fs_root_no_cache(
4113 key.offset = (u64)-1;
4114 tmp_root = btrfs_read_fs_root(
4117 if (IS_ERR(tmp_root)) {
4121 ret = check_fs_root(tmp_root, root_cache, &wc);
4122 if (ret == -EAGAIN) {
4123 free_root_recs_tree(root_cache);
4124 btrfs_release_path(&path);
4129 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4130 btrfs_free_fs_root(tmp_root);
4131 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4132 key.type == BTRFS_ROOT_BACKREF_KEY) {
4133 process_root_ref(leaf, path.slots[0], &key,
4140 btrfs_release_path(&path);
4142 free_extent_cache_tree(&wc.shared);
4143 if (!cache_tree_empty(&wc.shared))
4144 fprintf(stderr, "warning line %d\n", __LINE__);
4146 task_stop(ctx.info);
4152 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4153 * INODE_REF/INODE_EXTREF match.
4155 * @root: the root of the fs/file tree
4156 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4157 * @key: the key of the DIR_ITEM/DIR_INDEX
4158 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4159 * distinguish root_dir between normal dir/file
4160 * @name: the name in the INODE_REF/INODE_EXTREF
4161 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4162 * @mode: the st_mode of INODE_ITEM
4164 * Return 0 if no error occurred.
4165 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4166 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4168 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4169 * not match for normal dir/file.
4171 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4172 struct btrfs_key *key, u64 index, char *name,
4173 u32 namelen, u32 mode)
4175 struct btrfs_path path;
4176 struct extent_buffer *node;
4177 struct btrfs_dir_item *di;
4178 struct btrfs_key location;
4179 char namebuf[BTRFS_NAME_LEN] = {0};
4189 btrfs_init_path(&path);
4190 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4192 ret = DIR_ITEM_MISSING;
4196 /* Process root dir and goto out*/
4199 ret = ROOT_DIR_ERROR;
4201 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4203 ref_key->type == BTRFS_INODE_REF_KEY ?
4205 ref_key->objectid, ref_key->offset,
4206 key->type == BTRFS_DIR_ITEM_KEY ?
4207 "DIR_ITEM" : "DIR_INDEX");
4215 /* Process normal file/dir */
4217 ret = DIR_ITEM_MISSING;
4219 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4221 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4222 ref_key->objectid, ref_key->offset,
4223 key->type == BTRFS_DIR_ITEM_KEY ?
4224 "DIR_ITEM" : "DIR_INDEX",
4225 key->objectid, key->offset, namelen, name,
4226 imode_to_type(mode));
4230 /* Check whether inode_id/filetype/name match */
4231 node = path.nodes[0];
4232 slot = path.slots[0];
4233 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4234 total = btrfs_item_size_nr(node, slot);
4235 while (cur < total) {
4236 ret = DIR_ITEM_MISMATCH;
4237 name_len = btrfs_dir_name_len(node, di);
4238 data_len = btrfs_dir_data_len(node, di);
4240 btrfs_dir_item_key_to_cpu(node, di, &location);
4241 if (location.objectid != ref_key->objectid ||
4242 location.type != BTRFS_INODE_ITEM_KEY ||
4243 location.offset != 0)
4246 filetype = btrfs_dir_type(node, di);
4247 if (imode_to_type(mode) != filetype)
4250 if (cur + sizeof(*di) + name_len > total ||
4251 name_len > BTRFS_NAME_LEN) {
4252 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4254 key->type == BTRFS_DIR_ITEM_KEY ?
4255 "DIR_ITEM" : "DIR_INDEX",
4256 key->objectid, key->offset, name_len);
4258 if (cur + sizeof(*di) > total)
4260 len = min_t(u32, total - cur - sizeof(*di),
4266 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4267 if (len != namelen || strncmp(namebuf, name, len))
4273 len = sizeof(*di) + name_len + data_len;
4274 di = (struct btrfs_dir_item *)((char *)di + len);
4277 if (ret == DIR_ITEM_MISMATCH)
4279 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4281 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4282 ref_key->objectid, ref_key->offset,
4283 key->type == BTRFS_DIR_ITEM_KEY ?
4284 "DIR_ITEM" : "DIR_INDEX",
4285 key->objectid, key->offset, namelen, name,
4286 imode_to_type(mode));
4288 btrfs_release_path(&path);
4293 * Traverse the given INODE_REF and call find_dir_item() to find related
4294 * DIR_ITEM/DIR_INDEX.
4296 * @root: the root of the fs/file tree
4297 * @ref_key: the key of the INODE_REF
4298 * @refs: the count of INODE_REF
4299 * @mode: the st_mode of INODE_ITEM
4301 * Return 0 if no error occurred.
4303 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4304 struct extent_buffer *node, int slot, u64 *refs,
4307 struct btrfs_key key;
4308 struct btrfs_inode_ref *ref;
4309 char namebuf[BTRFS_NAME_LEN] = {0};
4317 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4318 total = btrfs_item_size_nr(node, slot);
4321 /* Update inode ref count */
4324 index = btrfs_inode_ref_index(node, ref);
4325 name_len = btrfs_inode_ref_name_len(node, ref);
4326 if (cur + sizeof(*ref) + name_len > total ||
4327 name_len > BTRFS_NAME_LEN) {
4328 warning("root %llu INODE_REF[%llu %llu] name too long",
4329 root->objectid, ref_key->objectid, ref_key->offset);
4331 if (total < cur + sizeof(*ref))
4333 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4338 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4340 /* Check root dir ref name */
4341 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4342 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4343 root->objectid, ref_key->objectid, ref_key->offset,
4345 err |= ROOT_DIR_ERROR;
4348 /* Find related DIR_INDEX */
4349 key.objectid = ref_key->offset;
4350 key.type = BTRFS_DIR_INDEX_KEY;
4352 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4355 /* Find related dir_item */
4356 key.objectid = ref_key->offset;
4357 key.type = BTRFS_DIR_ITEM_KEY;
4358 key.offset = btrfs_name_hash(namebuf, len);
4359 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4362 len = sizeof(*ref) + name_len;
4363 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4373 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4374 * DIR_ITEM/DIR_INDEX.
4376 * @root: the root of the fs/file tree
4377 * @ref_key: the key of the INODE_EXTREF
4378 * @refs: the count of INODE_EXTREF
4379 * @mode: the st_mode of INODE_ITEM
4381 * Return 0 if no error occurred.
4383 static int check_inode_extref(struct btrfs_root *root,
4384 struct btrfs_key *ref_key,
4385 struct extent_buffer *node, int slot, u64 *refs,
4388 struct btrfs_key key;
4389 struct btrfs_inode_extref *extref;
4390 char namebuf[BTRFS_NAME_LEN] = {0};
4400 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4401 total = btrfs_item_size_nr(node, slot);
4404 /* update inode ref count */
4406 name_len = btrfs_inode_extref_name_len(node, extref);
4407 index = btrfs_inode_extref_index(node, extref);
4408 parent = btrfs_inode_extref_parent(node, extref);
4409 if (name_len <= BTRFS_NAME_LEN) {
4412 len = BTRFS_NAME_LEN;
4413 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4414 root->objectid, ref_key->objectid, ref_key->offset);
4416 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4418 /* Check root dir ref name */
4419 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4420 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4421 root->objectid, ref_key->objectid, ref_key->offset,
4423 err |= ROOT_DIR_ERROR;
4426 /* find related dir_index */
4427 key.objectid = parent;
4428 key.type = BTRFS_DIR_INDEX_KEY;
4430 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4433 /* find related dir_item */
4434 key.objectid = parent;
4435 key.type = BTRFS_DIR_ITEM_KEY;
4436 key.offset = btrfs_name_hash(namebuf, len);
4437 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4440 len = sizeof(*extref) + name_len;
4441 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4451 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4452 * DIR_ITEM/DIR_INDEX match.
4454 * @root: the root of the fs/file tree
4455 * @key: the key of the INODE_REF/INODE_EXTREF
4456 * @name: the name in the INODE_REF/INODE_EXTREF
4457 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4458 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4460 * @ext_ref: the EXTENDED_IREF feature
4462 * Return 0 if no error occurred.
4463 * Return >0 for error bitmap
4465 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4466 char *name, int namelen, u64 index,
4467 unsigned int ext_ref)
4469 struct btrfs_path path;
4470 struct btrfs_inode_ref *ref;
4471 struct btrfs_inode_extref *extref;
4472 struct extent_buffer *node;
4473 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4484 btrfs_init_path(&path);
4485 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4487 ret = INODE_REF_MISSING;
4491 node = path.nodes[0];
4492 slot = path.slots[0];
4494 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4495 total = btrfs_item_size_nr(node, slot);
4497 /* Iterate all entry of INODE_REF */
4498 while (cur < total) {
4499 ret = INODE_REF_MISSING;
4501 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4502 ref_index = btrfs_inode_ref_index(node, ref);
4503 if (index != (u64)-1 && index != ref_index)
4506 if (cur + sizeof(*ref) + ref_namelen > total ||
4507 ref_namelen > BTRFS_NAME_LEN) {
4508 warning("root %llu INODE %s[%llu %llu] name too long",
4510 key->type == BTRFS_INODE_REF_KEY ?
4512 key->objectid, key->offset);
4514 if (cur + sizeof(*ref) > total)
4516 len = min_t(u32, total - cur - sizeof(*ref),
4522 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4525 if (len != namelen || strncmp(ref_namebuf, name, len))
4531 len = sizeof(*ref) + ref_namelen;
4532 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4537 /* Skip if not support EXTENDED_IREF feature */
4541 btrfs_release_path(&path);
4542 btrfs_init_path(&path);
4544 dir_id = key->offset;
4545 key->type = BTRFS_INODE_EXTREF_KEY;
4546 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4548 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4550 ret = INODE_REF_MISSING;
4554 node = path.nodes[0];
4555 slot = path.slots[0];
4557 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559 total = btrfs_item_size_nr(node, slot);
4561 /* Iterate all entry of INODE_EXTREF */
4562 while (cur < total) {
4563 ret = INODE_REF_MISSING;
4565 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4566 ref_index = btrfs_inode_extref_index(node, extref);
4567 parent = btrfs_inode_extref_parent(node, extref);
4568 if (index != (u64)-1 && index != ref_index)
4571 if (parent != dir_id)
4574 if (ref_namelen <= BTRFS_NAME_LEN) {
4577 len = BTRFS_NAME_LEN;
4578 warning("root %llu INODE %s[%llu %llu] name too long",
4580 key->type == BTRFS_INODE_REF_KEY ?
4582 key->objectid, key->offset);
4584 read_extent_buffer(node, ref_namebuf,
4585 (unsigned long)(extref + 1), len);
4587 if (len != namelen || strncmp(ref_namebuf, name, len))
4594 len = sizeof(*extref) + ref_namelen;
4595 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4600 btrfs_release_path(&path);
4605 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4606 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4608 * @root: the root of the fs/file tree
4609 * @key: the key of the INODE_REF/INODE_EXTREF
4610 * @size: the st_size of the INODE_ITEM
4611 * @ext_ref: the EXTENDED_IREF feature
4613 * Return 0 if no error occurred.
4615 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4616 struct extent_buffer *node, int slot, u64 *size,
4617 unsigned int ext_ref)
4619 struct btrfs_dir_item *di;
4620 struct btrfs_inode_item *ii;
4621 struct btrfs_path path;
4622 struct btrfs_key location;
4623 char namebuf[BTRFS_NAME_LEN] = {0};
4636 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4637 * ignore index check.
4639 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4641 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4642 total = btrfs_item_size_nr(node, slot);
4644 while (cur < total) {
4645 data_len = btrfs_dir_data_len(node, di);
4647 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4648 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4649 "DIR_ITEM" : "DIR_INDEX",
4650 key->objectid, key->offset, data_len);
4652 name_len = btrfs_dir_name_len(node, di);
4653 if (cur + sizeof(*di) + name_len > total ||
4654 name_len > BTRFS_NAME_LEN) {
4655 warning("root %llu %s[%llu %llu] name too long",
4657 key->type == BTRFS_DIR_ITEM_KEY ?
4658 "DIR_ITEM" : "DIR_INDEX",
4659 key->objectid, key->offset);
4661 if (cur + sizeof(*di) > total)
4663 len = min_t(u32, total - cur - sizeof(*di),
4668 (*size) += name_len;
4670 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4671 filetype = btrfs_dir_type(node, di);
4673 if (key->type == BTRFS_DIR_ITEM_KEY &&
4674 key->offset != btrfs_name_hash(namebuf, len)) {
4676 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4677 root->objectid, key->objectid, key->offset,
4678 namebuf, len, filetype, key->offset,
4679 btrfs_name_hash(namebuf, len));
4682 btrfs_init_path(&path);
4683 btrfs_dir_item_key_to_cpu(node, di, &location);
4685 /* Ignore related ROOT_ITEM check */
4686 if (location.type == BTRFS_ROOT_ITEM_KEY)
4689 /* Check relative INODE_ITEM(existence/filetype) */
4690 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4692 err |= INODE_ITEM_MISSING;
4693 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4694 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4695 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4696 key->offset, location.objectid, name_len,
4701 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4702 struct btrfs_inode_item);
4703 mode = btrfs_inode_mode(path.nodes[0], ii);
4705 if (imode_to_type(mode) != filetype) {
4706 err |= INODE_ITEM_MISMATCH;
4707 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4708 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4709 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4710 key->offset, name_len, namebuf, filetype);
4713 /* Check relative INODE_REF/INODE_EXTREF */
4714 location.type = BTRFS_INODE_REF_KEY;
4715 location.offset = key->objectid;
4716 ret = find_inode_ref(root, &location, namebuf, len,
4719 if (ret & INODE_REF_MISSING)
4720 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4721 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4722 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4723 key->offset, name_len, namebuf, filetype);
4726 btrfs_release_path(&path);
4727 len = sizeof(*di) + name_len + data_len;
4728 di = (struct btrfs_dir_item *)((char *)di + len);
4731 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4732 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4733 root->objectid, key->objectid, key->offset);
4742 * Check file extent datasum/hole, update the size of the file extents,
4743 * check and update the last offset of the file extent.
4745 * @root: the root of fs/file tree.
4746 * @fkey: the key of the file extent.
4747 * @nodatasum: INODE_NODATASUM feature.
4748 * @size: the sum of all EXTENT_DATA items size for this inode.
4749 * @end: the offset of the last extent.
4751 * Return 0 if no error occurred.
4753 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4754 struct extent_buffer *node, int slot,
4755 unsigned int nodatasum, u64 *size, u64 *end)
4757 struct btrfs_file_extent_item *fi;
4760 u64 extent_num_bytes;
4762 u64 csum_found; /* In byte size, sectorsize aligned */
4763 u64 search_start; /* Logical range start we search for csum */
4764 u64 search_len; /* Logical range len we search for csum */
4765 unsigned int extent_type;
4766 unsigned int is_hole;
4771 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4773 /* Check inline extent */
4774 extent_type = btrfs_file_extent_type(node, fi);
4775 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4776 struct btrfs_item *e = btrfs_item_nr(slot);
4777 u32 item_inline_len;
4779 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4780 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4781 compressed = btrfs_file_extent_compression(node, fi);
4782 if (extent_num_bytes == 0) {
4784 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4785 root->objectid, fkey->objectid, fkey->offset);
4786 err |= FILE_EXTENT_ERROR;
4788 if (!compressed && extent_num_bytes != item_inline_len) {
4790 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4791 root->objectid, fkey->objectid, fkey->offset,
4792 extent_num_bytes, item_inline_len);
4793 err |= FILE_EXTENT_ERROR;
4795 *end += extent_num_bytes;
4796 *size += extent_num_bytes;
4800 /* Check extent type */
4801 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4802 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4803 err |= FILE_EXTENT_ERROR;
4804 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4805 root->objectid, fkey->objectid, fkey->offset);
4809 /* Check REG_EXTENT/PREALLOC_EXTENT */
4810 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4811 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4812 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4813 extent_offset = btrfs_file_extent_offset(node, fi);
4814 compressed = btrfs_file_extent_compression(node, fi);
4815 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4818 * Check EXTENT_DATA csum
4820 * For plain (uncompressed) extent, we should only check the range
4821 * we're referring to, as it's possible that part of prealloc extent
4822 * has been written, and has csum:
4824 * |<--- Original large preallocated extent A ---->|
4825 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4828 * For compressed extent, we should check the whole range.
4831 search_start = disk_bytenr + extent_offset;
4832 search_len = extent_num_bytes;
4834 search_start = disk_bytenr;
4835 search_len = disk_num_bytes;
4837 ret = count_csum_range(root, search_start, search_len, &csum_found);
4838 if (csum_found > 0 && nodatasum) {
4839 err |= ODD_CSUM_ITEM;
4840 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4841 root->objectid, fkey->objectid, fkey->offset);
4842 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4843 !is_hole && (ret < 0 || csum_found < search_len)) {
4844 err |= CSUM_ITEM_MISSING;
4845 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4846 root->objectid, fkey->objectid, fkey->offset,
4847 csum_found, search_len);
4848 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4849 err |= ODD_CSUM_ITEM;
4850 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4851 root->objectid, fkey->objectid, fkey->offset, csum_found);
4854 /* Check EXTENT_DATA hole */
4855 if (!no_holes && *end != fkey->offset) {
4856 err |= FILE_EXTENT_ERROR;
4857 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4858 root->objectid, fkey->objectid, fkey->offset);
4861 *end += extent_num_bytes;
4863 *size += extent_num_bytes;
4869 * Check INODE_ITEM and related ITEMs (the same inode number)
4870 * 1. check link count
4871 * 2. check inode ref/extref
4872 * 3. check dir item/index
4874 * @ext_ref: the EXTENDED_IREF feature
4876 * Return 0 if no error occurred.
4877 * Return >0 for error or hit the traversal is done(by error bitmap)
4879 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4880 unsigned int ext_ref)
4882 struct extent_buffer *node;
4883 struct btrfs_inode_item *ii;
4884 struct btrfs_key key;
4893 u64 extent_size = 0;
4895 unsigned int nodatasum;
4900 node = path->nodes[0];
4901 slot = path->slots[0];
4903 btrfs_item_key_to_cpu(node, &key, slot);
4904 inode_id = key.objectid;
4906 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4907 ret = btrfs_next_item(root, path);
4913 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4914 isize = btrfs_inode_size(node, ii);
4915 nbytes = btrfs_inode_nbytes(node, ii);
4916 mode = btrfs_inode_mode(node, ii);
4917 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4918 nlink = btrfs_inode_nlink(node, ii);
4919 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4922 ret = btrfs_next_item(root, path);
4924 /* out will fill 'err' rusing current statistics */
4926 } else if (ret > 0) {
4931 node = path->nodes[0];
4932 slot = path->slots[0];
4933 btrfs_item_key_to_cpu(node, &key, slot);
4934 if (key.objectid != inode_id)
4938 case BTRFS_INODE_REF_KEY:
4939 ret = check_inode_ref(root, &key, node, slot, &refs,
4943 case BTRFS_INODE_EXTREF_KEY:
4944 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4945 warning("root %llu EXTREF[%llu %llu] isn't supported",
4946 root->objectid, key.objectid,
4948 ret = check_inode_extref(root, &key, node, slot, &refs,
4952 case BTRFS_DIR_ITEM_KEY:
4953 case BTRFS_DIR_INDEX_KEY:
4955 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4956 root->objectid, inode_id,
4957 imode_to_type(mode), key.objectid,
4960 ret = check_dir_item(root, &key, node, slot, &size,
4964 case BTRFS_EXTENT_DATA_KEY:
4966 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4967 root->objectid, inode_id, key.objectid,
4970 ret = check_file_extent(root, &key, node, slot,
4971 nodatasum, &extent_size,
4975 case BTRFS_XATTR_ITEM_KEY:
4978 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4979 key.objectid, key.type, key.offset);
4984 /* verify INODE_ITEM nlink/isize/nbytes */
4987 err |= LINK_COUNT_ERROR;
4988 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4989 root->objectid, inode_id, nlink);
4993 * Just a warning, as dir inode nbytes is just an
4994 * instructive value.
4996 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4997 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4998 root->objectid, inode_id,
4999 root->fs_info->nodesize);
5002 if (isize != size) {
5004 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5005 root->objectid, inode_id, isize, size);
5008 if (nlink != refs) {
5009 err |= LINK_COUNT_ERROR;
5010 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5011 root->objectid, inode_id, nlink, refs);
5012 } else if (!nlink) {
5016 if (!nbytes && !no_holes && extent_end < isize) {
5017 err |= NBYTES_ERROR;
5018 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5019 root->objectid, inode_id, isize);
5022 if (nbytes != extent_size) {
5023 err |= NBYTES_ERROR;
5024 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5025 root->objectid, inode_id, nbytes, extent_size);
5032 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5034 struct btrfs_path path;
5035 struct btrfs_key key;
5039 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5040 key.type = BTRFS_INODE_ITEM_KEY;
5043 /* For root being dropped, we don't need to check first inode */
5044 if (btrfs_root_refs(&root->root_item) == 0 &&
5045 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5049 btrfs_init_path(&path);
5051 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5056 err |= INODE_ITEM_MISSING;
5057 error("first inode item of root %llu is missing",
5061 err |= check_inode_item(root, &path, ext_ref);
5066 btrfs_release_path(&path);
5071 * Iterate all item on the tree and call check_inode_item() to check.
5073 * @root: the root of the tree to be checked.
5074 * @ext_ref: the EXTENDED_IREF feature
5076 * Return 0 if no error found.
5077 * Return <0 for error.
5079 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5081 struct btrfs_path path;
5082 struct node_refs nrefs;
5083 struct btrfs_root_item *root_item = &root->root_item;
5089 * We need to manually check the first inode item(256)
5090 * As the following traversal function will only start from
5091 * the first inode item in the leaf, if inode item(256) is missing
5092 * we will just skip it forever.
5094 ret = check_fs_first_inode(root, ext_ref);
5098 memset(&nrefs, 0, sizeof(nrefs));
5099 level = btrfs_header_level(root->node);
5100 btrfs_init_path(&path);
5102 if (btrfs_root_refs(root_item) > 0 ||
5103 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5104 path.nodes[level] = root->node;
5105 path.slots[level] = 0;
5106 extent_buffer_get(root->node);
5108 struct btrfs_key key;
5110 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5111 level = root_item->drop_level;
5112 path.lowest_level = level;
5113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5120 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5123 /* if ret is negative, walk shall stop */
5129 ret = walk_up_tree_v2(root, &path, &level);
5131 /* Normal exit, reset ret to err */
5138 btrfs_release_path(&path);
5143 * Find the relative ref for root_ref and root_backref.
5145 * @root: the root of the root tree.
5146 * @ref_key: the key of the root ref.
5148 * Return 0 if no error occurred.
5150 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5151 struct extent_buffer *node, int slot)
5153 struct btrfs_path path;
5154 struct btrfs_key key;
5155 struct btrfs_root_ref *ref;
5156 struct btrfs_root_ref *backref;
5157 char ref_name[BTRFS_NAME_LEN] = {0};
5158 char backref_name[BTRFS_NAME_LEN] = {0};
5164 u32 backref_namelen;
5169 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5170 ref_dirid = btrfs_root_ref_dirid(node, ref);
5171 ref_seq = btrfs_root_ref_sequence(node, ref);
5172 ref_namelen = btrfs_root_ref_name_len(node, ref);
5174 if (ref_namelen <= BTRFS_NAME_LEN) {
5177 len = BTRFS_NAME_LEN;
5178 warning("%s[%llu %llu] ref_name too long",
5179 ref_key->type == BTRFS_ROOT_REF_KEY ?
5180 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5183 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5185 /* Find relative root_ref */
5186 key.objectid = ref_key->offset;
5187 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5188 key.offset = ref_key->objectid;
5190 btrfs_init_path(&path);
5191 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5193 err |= ROOT_REF_MISSING;
5194 error("%s[%llu %llu] couldn't find relative ref",
5195 ref_key->type == BTRFS_ROOT_REF_KEY ?
5196 "ROOT_REF" : "ROOT_BACKREF",
5197 ref_key->objectid, ref_key->offset);
5201 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5202 struct btrfs_root_ref);
5203 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5204 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5205 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5207 if (backref_namelen <= BTRFS_NAME_LEN) {
5208 len = backref_namelen;
5210 len = BTRFS_NAME_LEN;
5211 warning("%s[%llu %llu] ref_name too long",
5212 key.type == BTRFS_ROOT_REF_KEY ?
5213 "ROOT_REF" : "ROOT_BACKREF",
5214 key.objectid, key.offset);
5216 read_extent_buffer(path.nodes[0], backref_name,
5217 (unsigned long)(backref + 1), len);
5219 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5220 ref_namelen != backref_namelen ||
5221 strncmp(ref_name, backref_name, len)) {
5222 err |= ROOT_REF_MISMATCH;
5223 error("%s[%llu %llu] mismatch relative ref",
5224 ref_key->type == BTRFS_ROOT_REF_KEY ?
5225 "ROOT_REF" : "ROOT_BACKREF",
5226 ref_key->objectid, ref_key->offset);
5229 btrfs_release_path(&path);
5234 * Check all fs/file tree in low_memory mode.
5236 * 1. for fs tree root item, call check_fs_root_v2()
5237 * 2. for fs tree root ref/backref, call check_root_ref()
5239 * Return 0 if no error occurred.
5241 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5243 struct btrfs_root *tree_root = fs_info->tree_root;
5244 struct btrfs_root *cur_root = NULL;
5245 struct btrfs_path path;
5246 struct btrfs_key key;
5247 struct extent_buffer *node;
5248 unsigned int ext_ref;
5253 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5255 btrfs_init_path(&path);
5256 key.objectid = BTRFS_FS_TREE_OBJECTID;
5258 key.type = BTRFS_ROOT_ITEM_KEY;
5260 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5264 } else if (ret > 0) {
5270 node = path.nodes[0];
5271 slot = path.slots[0];
5272 btrfs_item_key_to_cpu(node, &key, slot);
5273 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5275 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5276 fs_root_objectid(key.objectid)) {
5277 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5278 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5281 key.offset = (u64)-1;
5282 cur_root = btrfs_read_fs_root(fs_info, &key);
5285 if (IS_ERR(cur_root)) {
5286 error("Fail to read fs/subvol tree: %lld",
5292 ret = check_fs_root_v2(cur_root, ext_ref);
5295 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5296 btrfs_free_fs_root(cur_root);
5297 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5298 key.type == BTRFS_ROOT_BACKREF_KEY) {
5299 ret = check_root_ref(tree_root, &key, node, slot);
5303 ret = btrfs_next_item(tree_root, &path);
5313 btrfs_release_path(&path);
5317 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5318 struct cache_tree *root_cache)
5322 if (!ctx.progress_enabled)
5323 fprintf(stderr, "checking fs roots\n");
5324 if (check_mode == CHECK_MODE_LOWMEM)
5325 ret = check_fs_roots_v2(fs_info);
5327 ret = check_fs_roots(fs_info, root_cache);
5332 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5334 struct list_head *cur = rec->backrefs.next;
5335 struct extent_backref *back;
5336 struct tree_backref *tback;
5337 struct data_backref *dback;
5341 while(cur != &rec->backrefs) {
5342 back = to_extent_backref(cur);
5344 if (!back->found_extent_tree) {
5348 if (back->is_data) {
5349 dback = to_data_backref(back);
5350 fprintf(stderr, "Backref %llu %s %llu"
5351 " owner %llu offset %llu num_refs %lu"
5352 " not found in extent tree\n",
5353 (unsigned long long)rec->start,
5354 back->full_backref ?
5356 back->full_backref ?
5357 (unsigned long long)dback->parent:
5358 (unsigned long long)dback->root,
5359 (unsigned long long)dback->owner,
5360 (unsigned long long)dback->offset,
5361 (unsigned long)dback->num_refs);
5363 tback = to_tree_backref(back);
5364 fprintf(stderr, "Backref %llu parent %llu"
5365 " root %llu not found in extent tree\n",
5366 (unsigned long long)rec->start,
5367 (unsigned long long)tback->parent,
5368 (unsigned long long)tback->root);
5371 if (!back->is_data && !back->found_ref) {
5375 tback = to_tree_backref(back);
5376 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5377 (unsigned long long)rec->start,
5378 back->full_backref ? "parent" : "root",
5379 back->full_backref ?
5380 (unsigned long long)tback->parent :
5381 (unsigned long long)tback->root, back);
5383 if (back->is_data) {
5384 dback = to_data_backref(back);
5385 if (dback->found_ref != dback->num_refs) {
5389 fprintf(stderr, "Incorrect local backref count"
5390 " on %llu %s %llu owner %llu"
5391 " offset %llu found %u wanted %u back %p\n",
5392 (unsigned long long)rec->start,
5393 back->full_backref ?
5395 back->full_backref ?
5396 (unsigned long long)dback->parent:
5397 (unsigned long long)dback->root,
5398 (unsigned long long)dback->owner,
5399 (unsigned long long)dback->offset,
5400 dback->found_ref, dback->num_refs, back);
5402 if (dback->disk_bytenr != rec->start) {
5406 fprintf(stderr, "Backref disk bytenr does not"
5407 " match extent record, bytenr=%llu, "
5408 "ref bytenr=%llu\n",
5409 (unsigned long long)rec->start,
5410 (unsigned long long)dback->disk_bytenr);
5413 if (dback->bytes != rec->nr) {
5417 fprintf(stderr, "Backref bytes do not match "
5418 "extent backref, bytenr=%llu, ref "
5419 "bytes=%llu, backref bytes=%llu\n",
5420 (unsigned long long)rec->start,
5421 (unsigned long long)rec->nr,
5422 (unsigned long long)dback->bytes);
5425 if (!back->is_data) {
5428 dback = to_data_backref(back);
5429 found += dback->found_ref;
5432 if (found != rec->refs) {
5436 fprintf(stderr, "Incorrect global backref count "
5437 "on %llu found %llu wanted %llu\n",
5438 (unsigned long long)rec->start,
5439 (unsigned long long)found,
5440 (unsigned long long)rec->refs);
5446 static int free_all_extent_backrefs(struct extent_record *rec)
5448 struct extent_backref *back;
5449 struct list_head *cur;
5450 while (!list_empty(&rec->backrefs)) {
5451 cur = rec->backrefs.next;
5452 back = to_extent_backref(cur);
5459 static void free_extent_record_cache(struct cache_tree *extent_cache)
5461 struct cache_extent *cache;
5462 struct extent_record *rec;
5465 cache = first_cache_extent(extent_cache);
5468 rec = container_of(cache, struct extent_record, cache);
5469 remove_cache_extent(extent_cache, cache);
5470 free_all_extent_backrefs(rec);
5475 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5476 struct extent_record *rec)
5478 if (rec->content_checked && rec->owner_ref_checked &&
5479 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5480 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5481 !rec->bad_full_backref && !rec->crossing_stripes &&
5482 !rec->wrong_chunk_type) {
5483 remove_cache_extent(extent_cache, &rec->cache);
5484 free_all_extent_backrefs(rec);
5485 list_del_init(&rec->list);
5491 static int check_owner_ref(struct btrfs_root *root,
5492 struct extent_record *rec,
5493 struct extent_buffer *buf)
5495 struct extent_backref *node;
5496 struct tree_backref *back;
5497 struct btrfs_root *ref_root;
5498 struct btrfs_key key;
5499 struct btrfs_path path;
5500 struct extent_buffer *parent;
5505 list_for_each_entry(node, &rec->backrefs, list) {
5508 if (!node->found_ref)
5510 if (node->full_backref)
5512 back = to_tree_backref(node);
5513 if (btrfs_header_owner(buf) == back->root)
5516 BUG_ON(rec->is_root);
5518 /* try to find the block by search corresponding fs tree */
5519 key.objectid = btrfs_header_owner(buf);
5520 key.type = BTRFS_ROOT_ITEM_KEY;
5521 key.offset = (u64)-1;
5523 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5524 if (IS_ERR(ref_root))
5527 level = btrfs_header_level(buf);
5529 btrfs_item_key_to_cpu(buf, &key, 0);
5531 btrfs_node_key_to_cpu(buf, &key, 0);
5533 btrfs_init_path(&path);
5534 path.lowest_level = level + 1;
5535 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5539 parent = path.nodes[level + 1];
5540 if (parent && buf->start == btrfs_node_blockptr(parent,
5541 path.slots[level + 1]))
5544 btrfs_release_path(&path);
5545 return found ? 0 : 1;
5548 static int is_extent_tree_record(struct extent_record *rec)
5550 struct list_head *cur = rec->backrefs.next;
5551 struct extent_backref *node;
5552 struct tree_backref *back;
5555 while(cur != &rec->backrefs) {
5556 node = to_extent_backref(cur);
5560 back = to_tree_backref(node);
5561 if (node->full_backref)
5563 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5570 static int record_bad_block_io(struct btrfs_fs_info *info,
5571 struct cache_tree *extent_cache,
5574 struct extent_record *rec;
5575 struct cache_extent *cache;
5576 struct btrfs_key key;
5578 cache = lookup_cache_extent(extent_cache, start, len);
5582 rec = container_of(cache, struct extent_record, cache);
5583 if (!is_extent_tree_record(rec))
5586 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5587 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5590 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5591 struct extent_buffer *buf, int slot)
5593 if (btrfs_header_level(buf)) {
5594 struct btrfs_key_ptr ptr1, ptr2;
5596 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5597 sizeof(struct btrfs_key_ptr));
5598 read_extent_buffer(buf, &ptr2,
5599 btrfs_node_key_ptr_offset(slot + 1),
5600 sizeof(struct btrfs_key_ptr));
5601 write_extent_buffer(buf, &ptr1,
5602 btrfs_node_key_ptr_offset(slot + 1),
5603 sizeof(struct btrfs_key_ptr));
5604 write_extent_buffer(buf, &ptr2,
5605 btrfs_node_key_ptr_offset(slot),
5606 sizeof(struct btrfs_key_ptr));
5608 struct btrfs_disk_key key;
5609 btrfs_node_key(buf, &key, 0);
5610 btrfs_fixup_low_keys(root, path, &key,
5611 btrfs_header_level(buf) + 1);
5614 struct btrfs_item *item1, *item2;
5615 struct btrfs_key k1, k2;
5616 char *item1_data, *item2_data;
5617 u32 item1_offset, item2_offset, item1_size, item2_size;
5619 item1 = btrfs_item_nr(slot);
5620 item2 = btrfs_item_nr(slot + 1);
5621 btrfs_item_key_to_cpu(buf, &k1, slot);
5622 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5623 item1_offset = btrfs_item_offset(buf, item1);
5624 item2_offset = btrfs_item_offset(buf, item2);
5625 item1_size = btrfs_item_size(buf, item1);
5626 item2_size = btrfs_item_size(buf, item2);
5628 item1_data = malloc(item1_size);
5631 item2_data = malloc(item2_size);
5637 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5638 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5640 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5641 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5645 btrfs_set_item_offset(buf, item1, item2_offset);
5646 btrfs_set_item_offset(buf, item2, item1_offset);
5647 btrfs_set_item_size(buf, item1, item2_size);
5648 btrfs_set_item_size(buf, item2, item1_size);
5650 path->slots[0] = slot;
5651 btrfs_set_item_key_unsafe(root, path, &k2);
5652 path->slots[0] = slot + 1;
5653 btrfs_set_item_key_unsafe(root, path, &k1);
5658 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5660 struct extent_buffer *buf;
5661 struct btrfs_key k1, k2;
5663 int level = path->lowest_level;
5666 buf = path->nodes[level];
5667 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5669 btrfs_node_key_to_cpu(buf, &k1, i);
5670 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5672 btrfs_item_key_to_cpu(buf, &k1, i);
5673 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5675 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5677 ret = swap_values(root, path, buf, i);
5680 btrfs_mark_buffer_dirty(buf);
5686 static int delete_bogus_item(struct btrfs_root *root,
5687 struct btrfs_path *path,
5688 struct extent_buffer *buf, int slot)
5690 struct btrfs_key key;
5691 int nritems = btrfs_header_nritems(buf);
5693 btrfs_item_key_to_cpu(buf, &key, slot);
5695 /* These are all the keys we can deal with missing. */
5696 if (key.type != BTRFS_DIR_INDEX_KEY &&
5697 key.type != BTRFS_EXTENT_ITEM_KEY &&
5698 key.type != BTRFS_METADATA_ITEM_KEY &&
5699 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5700 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5703 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5704 (unsigned long long)key.objectid, key.type,
5705 (unsigned long long)key.offset, slot, buf->start);
5706 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5707 btrfs_item_nr_offset(slot + 1),
5708 sizeof(struct btrfs_item) *
5709 (nritems - slot - 1));
5710 btrfs_set_header_nritems(buf, nritems - 1);
5712 struct btrfs_disk_key disk_key;
5714 btrfs_item_key(buf, &disk_key, 0);
5715 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5717 btrfs_mark_buffer_dirty(buf);
5721 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5723 struct extent_buffer *buf;
5727 /* We should only get this for leaves */
5728 BUG_ON(path->lowest_level);
5729 buf = path->nodes[0];
5731 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5732 unsigned int shift = 0, offset;
5734 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5735 BTRFS_LEAF_DATA_SIZE(root)) {
5736 if (btrfs_item_end_nr(buf, i) >
5737 BTRFS_LEAF_DATA_SIZE(root)) {
5738 ret = delete_bogus_item(root, path, buf, i);
5741 fprintf(stderr, "item is off the end of the "
5742 "leaf, can't fix\n");
5746 shift = BTRFS_LEAF_DATA_SIZE(root) -
5747 btrfs_item_end_nr(buf, i);
5748 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5749 btrfs_item_offset_nr(buf, i - 1)) {
5750 if (btrfs_item_end_nr(buf, i) >
5751 btrfs_item_offset_nr(buf, i - 1)) {
5752 ret = delete_bogus_item(root, path, buf, i);
5755 fprintf(stderr, "items overlap, can't fix\n");
5759 shift = btrfs_item_offset_nr(buf, i - 1) -
5760 btrfs_item_end_nr(buf, i);
5765 printf("Shifting item nr %d by %u bytes in block %llu\n",
5766 i, shift, (unsigned long long)buf->start);
5767 offset = btrfs_item_offset_nr(buf, i);
5768 memmove_extent_buffer(buf,
5769 btrfs_leaf_data(buf) + offset + shift,
5770 btrfs_leaf_data(buf) + offset,
5771 btrfs_item_size_nr(buf, i));
5772 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5774 btrfs_mark_buffer_dirty(buf);
5778 * We may have moved things, in which case we want to exit so we don't
5779 * write those changes out. Once we have proper abort functionality in
5780 * progs this can be changed to something nicer.
5787 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5788 * then just return -EIO.
5790 static int try_to_fix_bad_block(struct btrfs_root *root,
5791 struct extent_buffer *buf,
5792 enum btrfs_tree_block_status status)
5794 struct btrfs_trans_handle *trans;
5795 struct ulist *roots;
5796 struct ulist_node *node;
5797 struct btrfs_root *search_root;
5798 struct btrfs_path path;
5799 struct ulist_iterator iter;
5800 struct btrfs_key root_key, key;
5803 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5804 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5807 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5811 btrfs_init_path(&path);
5812 ULIST_ITER_INIT(&iter);
5813 while ((node = ulist_next(roots, &iter))) {
5814 root_key.objectid = node->val;
5815 root_key.type = BTRFS_ROOT_ITEM_KEY;
5816 root_key.offset = (u64)-1;
5818 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5825 trans = btrfs_start_transaction(search_root, 0);
5826 if (IS_ERR(trans)) {
5827 ret = PTR_ERR(trans);
5831 path.lowest_level = btrfs_header_level(buf);
5832 path.skip_check_block = 1;
5833 if (path.lowest_level)
5834 btrfs_node_key_to_cpu(buf, &key, 0);
5836 btrfs_item_key_to_cpu(buf, &key, 0);
5837 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5840 btrfs_commit_transaction(trans, search_root);
5843 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5844 ret = fix_key_order(search_root, &path);
5845 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5846 ret = fix_item_offset(search_root, &path);
5848 btrfs_commit_transaction(trans, search_root);
5851 btrfs_release_path(&path);
5852 btrfs_commit_transaction(trans, search_root);
5855 btrfs_release_path(&path);
5859 static int check_block(struct btrfs_root *root,
5860 struct cache_tree *extent_cache,
5861 struct extent_buffer *buf, u64 flags)
5863 struct extent_record *rec;
5864 struct cache_extent *cache;
5865 struct btrfs_key key;
5866 enum btrfs_tree_block_status status;
5870 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5873 rec = container_of(cache, struct extent_record, cache);
5874 rec->generation = btrfs_header_generation(buf);
5876 level = btrfs_header_level(buf);
5877 if (btrfs_header_nritems(buf) > 0) {
5880 btrfs_item_key_to_cpu(buf, &key, 0);
5882 btrfs_node_key_to_cpu(buf, &key, 0);
5884 rec->info_objectid = key.objectid;
5886 rec->info_level = level;
5888 if (btrfs_is_leaf(buf))
5889 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5891 status = btrfs_check_node(root, &rec->parent_key, buf);
5893 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5895 status = try_to_fix_bad_block(root, buf, status);
5896 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5898 fprintf(stderr, "bad block %llu\n",
5899 (unsigned long long)buf->start);
5902 * Signal to callers we need to start the scan over
5903 * again since we'll have cowed blocks.
5908 rec->content_checked = 1;
5909 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5910 rec->owner_ref_checked = 1;
5912 ret = check_owner_ref(root, rec, buf);
5914 rec->owner_ref_checked = 1;
5918 maybe_free_extent_rec(extent_cache, rec);
5922 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5923 u64 parent, u64 root)
5925 struct list_head *cur = rec->backrefs.next;
5926 struct extent_backref *node;
5927 struct tree_backref *back;
5929 while(cur != &rec->backrefs) {
5930 node = to_extent_backref(cur);
5934 back = to_tree_backref(node);
5936 if (!node->full_backref)
5938 if (parent == back->parent)
5941 if (node->full_backref)
5943 if (back->root == root)
5950 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5951 u64 parent, u64 root)
5953 struct tree_backref *ref = malloc(sizeof(*ref));
5957 memset(&ref->node, 0, sizeof(ref->node));
5959 ref->parent = parent;
5960 ref->node.full_backref = 1;
5963 ref->node.full_backref = 0;
5965 list_add_tail(&ref->node.list, &rec->backrefs);
5970 static struct data_backref *find_data_backref(struct extent_record *rec,
5971 u64 parent, u64 root,
5972 u64 owner, u64 offset,
5974 u64 disk_bytenr, u64 bytes)
5976 struct list_head *cur = rec->backrefs.next;
5977 struct extent_backref *node;
5978 struct data_backref *back;
5980 while(cur != &rec->backrefs) {
5981 node = to_extent_backref(cur);
5985 back = to_data_backref(node);
5987 if (!node->full_backref)
5989 if (parent == back->parent)
5992 if (node->full_backref)
5994 if (back->root == root && back->owner == owner &&
5995 back->offset == offset) {
5996 if (found_ref && node->found_ref &&
5997 (back->bytes != bytes ||
5998 back->disk_bytenr != disk_bytenr))
6007 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6008 u64 parent, u64 root,
6009 u64 owner, u64 offset,
6012 struct data_backref *ref = malloc(sizeof(*ref));
6016 memset(&ref->node, 0, sizeof(ref->node));
6017 ref->node.is_data = 1;
6020 ref->parent = parent;
6023 ref->node.full_backref = 1;
6027 ref->offset = offset;
6028 ref->node.full_backref = 0;
6030 ref->bytes = max_size;
6033 list_add_tail(&ref->node.list, &rec->backrefs);
6034 if (max_size > rec->max_size)
6035 rec->max_size = max_size;
6039 /* Check if the type of extent matches with its chunk */
6040 static void check_extent_type(struct extent_record *rec)
6042 struct btrfs_block_group_cache *bg_cache;
6044 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6048 /* data extent, check chunk directly*/
6049 if (!rec->metadata) {
6050 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6051 rec->wrong_chunk_type = 1;
6055 /* metadata extent, check the obvious case first */
6056 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6057 BTRFS_BLOCK_GROUP_METADATA))) {
6058 rec->wrong_chunk_type = 1;
6063 * Check SYSTEM extent, as it's also marked as metadata, we can only
6064 * make sure it's a SYSTEM extent by its backref
6066 if (!list_empty(&rec->backrefs)) {
6067 struct extent_backref *node;
6068 struct tree_backref *tback;
6071 node = to_extent_backref(rec->backrefs.next);
6072 if (node->is_data) {
6073 /* tree block shouldn't have data backref */
6074 rec->wrong_chunk_type = 1;
6077 tback = container_of(node, struct tree_backref, node);
6079 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6080 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6082 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6083 if (!(bg_cache->flags & bg_type))
6084 rec->wrong_chunk_type = 1;
6089 * Allocate a new extent record, fill default values from @tmpl and insert int
6090 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6091 * the cache, otherwise it fails.
6093 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6094 struct extent_record *tmpl)
6096 struct extent_record *rec;
6099 BUG_ON(tmpl->max_size == 0);
6100 rec = malloc(sizeof(*rec));
6103 rec->start = tmpl->start;
6104 rec->max_size = tmpl->max_size;
6105 rec->nr = max(tmpl->nr, tmpl->max_size);
6106 rec->found_rec = tmpl->found_rec;
6107 rec->content_checked = tmpl->content_checked;
6108 rec->owner_ref_checked = tmpl->owner_ref_checked;
6109 rec->num_duplicates = 0;
6110 rec->metadata = tmpl->metadata;
6111 rec->flag_block_full_backref = FLAG_UNSET;
6112 rec->bad_full_backref = 0;
6113 rec->crossing_stripes = 0;
6114 rec->wrong_chunk_type = 0;
6115 rec->is_root = tmpl->is_root;
6116 rec->refs = tmpl->refs;
6117 rec->extent_item_refs = tmpl->extent_item_refs;
6118 rec->parent_generation = tmpl->parent_generation;
6119 INIT_LIST_HEAD(&rec->backrefs);
6120 INIT_LIST_HEAD(&rec->dups);
6121 INIT_LIST_HEAD(&rec->list);
6122 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6123 rec->cache.start = tmpl->start;
6124 rec->cache.size = tmpl->nr;
6125 ret = insert_cache_extent(extent_cache, &rec->cache);
6130 bytes_used += rec->nr;
6133 rec->crossing_stripes = check_crossing_stripes(global_info,
6134 rec->start, global_info->nodesize);
6135 check_extent_type(rec);
6140 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6142 * - refs - if found, increase refs
6143 * - is_root - if found, set
6144 * - content_checked - if found, set
6145 * - owner_ref_checked - if found, set
6147 * If not found, create a new one, initialize and insert.
6149 static int add_extent_rec(struct cache_tree *extent_cache,
6150 struct extent_record *tmpl)
6152 struct extent_record *rec;
6153 struct cache_extent *cache;
6157 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6159 rec = container_of(cache, struct extent_record, cache);
6163 rec->nr = max(tmpl->nr, tmpl->max_size);
6166 * We need to make sure to reset nr to whatever the extent
6167 * record says was the real size, this way we can compare it to
6170 if (tmpl->found_rec) {
6171 if (tmpl->start != rec->start || rec->found_rec) {
6172 struct extent_record *tmp;
6175 if (list_empty(&rec->list))
6176 list_add_tail(&rec->list,
6177 &duplicate_extents);
6180 * We have to do this song and dance in case we
6181 * find an extent record that falls inside of
6182 * our current extent record but does not have
6183 * the same objectid.
6185 tmp = malloc(sizeof(*tmp));
6188 tmp->start = tmpl->start;
6189 tmp->max_size = tmpl->max_size;
6192 tmp->metadata = tmpl->metadata;
6193 tmp->extent_item_refs = tmpl->extent_item_refs;
6194 INIT_LIST_HEAD(&tmp->list);
6195 list_add_tail(&tmp->list, &rec->dups);
6196 rec->num_duplicates++;
6203 if (tmpl->extent_item_refs && !dup) {
6204 if (rec->extent_item_refs) {
6205 fprintf(stderr, "block %llu rec "
6206 "extent_item_refs %llu, passed %llu\n",
6207 (unsigned long long)tmpl->start,
6208 (unsigned long long)
6209 rec->extent_item_refs,
6210 (unsigned long long)tmpl->extent_item_refs);
6212 rec->extent_item_refs = tmpl->extent_item_refs;
6216 if (tmpl->content_checked)
6217 rec->content_checked = 1;
6218 if (tmpl->owner_ref_checked)
6219 rec->owner_ref_checked = 1;
6220 memcpy(&rec->parent_key, &tmpl->parent_key,
6221 sizeof(tmpl->parent_key));
6222 if (tmpl->parent_generation)
6223 rec->parent_generation = tmpl->parent_generation;
6224 if (rec->max_size < tmpl->max_size)
6225 rec->max_size = tmpl->max_size;
6228 * A metadata extent can't cross stripe_len boundary, otherwise
6229 * kernel scrub won't be able to handle it.
6230 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6234 rec->crossing_stripes = check_crossing_stripes(
6235 global_info, rec->start,
6236 global_info->nodesize);
6237 check_extent_type(rec);
6238 maybe_free_extent_rec(extent_cache, rec);
6242 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6247 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6248 u64 parent, u64 root, int found_ref)
6250 struct extent_record *rec;
6251 struct tree_backref *back;
6252 struct cache_extent *cache;
6255 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6257 struct extent_record tmpl;
6259 memset(&tmpl, 0, sizeof(tmpl));
6260 tmpl.start = bytenr;
6265 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6269 /* really a bug in cache_extent implement now */
6270 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6275 rec = container_of(cache, struct extent_record, cache);
6276 if (rec->start != bytenr) {
6278 * Several cause, from unaligned bytenr to over lapping extents
6283 back = find_tree_backref(rec, parent, root);
6285 back = alloc_tree_backref(rec, parent, root);
6291 if (back->node.found_ref) {
6292 fprintf(stderr, "Extent back ref already exists "
6293 "for %llu parent %llu root %llu \n",
6294 (unsigned long long)bytenr,
6295 (unsigned long long)parent,
6296 (unsigned long long)root);
6298 back->node.found_ref = 1;
6300 if (back->node.found_extent_tree) {
6301 fprintf(stderr, "Extent back ref already exists "
6302 "for %llu parent %llu root %llu \n",
6303 (unsigned long long)bytenr,
6304 (unsigned long long)parent,
6305 (unsigned long long)root);
6307 back->node.found_extent_tree = 1;
6309 check_extent_type(rec);
6310 maybe_free_extent_rec(extent_cache, rec);
6314 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6315 u64 parent, u64 root, u64 owner, u64 offset,
6316 u32 num_refs, int found_ref, u64 max_size)
6318 struct extent_record *rec;
6319 struct data_backref *back;
6320 struct cache_extent *cache;
6323 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6325 struct extent_record tmpl;
6327 memset(&tmpl, 0, sizeof(tmpl));
6328 tmpl.start = bytenr;
6330 tmpl.max_size = max_size;
6332 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6336 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6341 rec = container_of(cache, struct extent_record, cache);
6342 if (rec->max_size < max_size)
6343 rec->max_size = max_size;
6346 * If found_ref is set then max_size is the real size and must match the
6347 * existing refs. So if we have already found a ref then we need to
6348 * make sure that this ref matches the existing one, otherwise we need
6349 * to add a new backref so we can notice that the backrefs don't match
6350 * and we need to figure out who is telling the truth. This is to
6351 * account for that awful fsync bug I introduced where we'd end up with
6352 * a btrfs_file_extent_item that would have its length include multiple
6353 * prealloc extents or point inside of a prealloc extent.
6355 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6358 back = alloc_data_backref(rec, parent, root, owner, offset,
6364 BUG_ON(num_refs != 1);
6365 if (back->node.found_ref)
6366 BUG_ON(back->bytes != max_size);
6367 back->node.found_ref = 1;
6368 back->found_ref += 1;
6369 back->bytes = max_size;
6370 back->disk_bytenr = bytenr;
6372 rec->content_checked = 1;
6373 rec->owner_ref_checked = 1;
6375 if (back->node.found_extent_tree) {
6376 fprintf(stderr, "Extent back ref already exists "
6377 "for %llu parent %llu root %llu "
6378 "owner %llu offset %llu num_refs %lu\n",
6379 (unsigned long long)bytenr,
6380 (unsigned long long)parent,
6381 (unsigned long long)root,
6382 (unsigned long long)owner,
6383 (unsigned long long)offset,
6384 (unsigned long)num_refs);
6386 back->num_refs = num_refs;
6387 back->node.found_extent_tree = 1;
6389 maybe_free_extent_rec(extent_cache, rec);
6393 static int add_pending(struct cache_tree *pending,
6394 struct cache_tree *seen, u64 bytenr, u32 size)
6397 ret = add_cache_extent(seen, bytenr, size);
6400 add_cache_extent(pending, bytenr, size);
6404 static int pick_next_pending(struct cache_tree *pending,
6405 struct cache_tree *reada,
6406 struct cache_tree *nodes,
6407 u64 last, struct block_info *bits, int bits_nr,
6410 unsigned long node_start = last;
6411 struct cache_extent *cache;
6414 cache = search_cache_extent(reada, 0);
6416 bits[0].start = cache->start;
6417 bits[0].size = cache->size;
6422 if (node_start > 32768)
6423 node_start -= 32768;
6425 cache = search_cache_extent(nodes, node_start);
6427 cache = search_cache_extent(nodes, 0);
6430 cache = search_cache_extent(pending, 0);
6435 bits[ret].start = cache->start;
6436 bits[ret].size = cache->size;
6437 cache = next_cache_extent(cache);
6439 } while (cache && ret < bits_nr);
6445 bits[ret].start = cache->start;
6446 bits[ret].size = cache->size;
6447 cache = next_cache_extent(cache);
6449 } while (cache && ret < bits_nr);
6451 if (bits_nr - ret > 8) {
6452 u64 lookup = bits[0].start + bits[0].size;
6453 struct cache_extent *next;
6454 next = search_cache_extent(pending, lookup);
6456 if (next->start - lookup > 32768)
6458 bits[ret].start = next->start;
6459 bits[ret].size = next->size;
6460 lookup = next->start + next->size;
6464 next = next_cache_extent(next);
6472 static void free_chunk_record(struct cache_extent *cache)
6474 struct chunk_record *rec;
6476 rec = container_of(cache, struct chunk_record, cache);
6477 list_del_init(&rec->list);
6478 list_del_init(&rec->dextents);
6482 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6484 cache_tree_free_extents(chunk_cache, free_chunk_record);
6487 static void free_device_record(struct rb_node *node)
6489 struct device_record *rec;
6491 rec = container_of(node, struct device_record, node);
6495 FREE_RB_BASED_TREE(device_cache, free_device_record);
6497 int insert_block_group_record(struct block_group_tree *tree,
6498 struct block_group_record *bg_rec)
6502 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6506 list_add_tail(&bg_rec->list, &tree->block_groups);
6510 static void free_block_group_record(struct cache_extent *cache)
6512 struct block_group_record *rec;
6514 rec = container_of(cache, struct block_group_record, cache);
6515 list_del_init(&rec->list);
6519 void free_block_group_tree(struct block_group_tree *tree)
6521 cache_tree_free_extents(&tree->tree, free_block_group_record);
6524 int insert_device_extent_record(struct device_extent_tree *tree,
6525 struct device_extent_record *de_rec)
6530 * Device extent is a bit different from the other extents, because
6531 * the extents which belong to the different devices may have the
6532 * same start and size, so we need use the special extent cache
6533 * search/insert functions.
6535 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6539 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6540 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6544 static void free_device_extent_record(struct cache_extent *cache)
6546 struct device_extent_record *rec;
6548 rec = container_of(cache, struct device_extent_record, cache);
6549 if (!list_empty(&rec->chunk_list))
6550 list_del_init(&rec->chunk_list);
6551 if (!list_empty(&rec->device_list))
6552 list_del_init(&rec->device_list);
6556 void free_device_extent_tree(struct device_extent_tree *tree)
6558 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6561 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6562 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6563 struct extent_buffer *leaf, int slot)
6565 struct btrfs_extent_ref_v0 *ref0;
6566 struct btrfs_key key;
6569 btrfs_item_key_to_cpu(leaf, &key, slot);
6570 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6571 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6572 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6575 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6576 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6582 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6583 struct btrfs_key *key,
6586 struct btrfs_chunk *ptr;
6587 struct chunk_record *rec;
6590 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6591 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6593 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6595 fprintf(stderr, "memory allocation failed\n");
6599 INIT_LIST_HEAD(&rec->list);
6600 INIT_LIST_HEAD(&rec->dextents);
6603 rec->cache.start = key->offset;
6604 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6606 rec->generation = btrfs_header_generation(leaf);
6608 rec->objectid = key->objectid;
6609 rec->type = key->type;
6610 rec->offset = key->offset;
6612 rec->length = rec->cache.size;
6613 rec->owner = btrfs_chunk_owner(leaf, ptr);
6614 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6615 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6616 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6617 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6618 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6619 rec->num_stripes = num_stripes;
6620 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6622 for (i = 0; i < rec->num_stripes; ++i) {
6623 rec->stripes[i].devid =
6624 btrfs_stripe_devid_nr(leaf, ptr, i);
6625 rec->stripes[i].offset =
6626 btrfs_stripe_offset_nr(leaf, ptr, i);
6627 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6628 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6635 static int process_chunk_item(struct cache_tree *chunk_cache,
6636 struct btrfs_key *key, struct extent_buffer *eb,
6639 struct chunk_record *rec;
6640 struct btrfs_chunk *chunk;
6643 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6645 * Do extra check for this chunk item,
6647 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6648 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6649 * and owner<->key_type check.
6651 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6654 error("chunk(%llu, %llu) is not valid, ignore it",
6655 key->offset, btrfs_chunk_length(eb, chunk));
6658 rec = btrfs_new_chunk_record(eb, key, slot);
6659 ret = insert_cache_extent(chunk_cache, &rec->cache);
6661 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6662 rec->offset, rec->length);
6669 static int process_device_item(struct rb_root *dev_cache,
6670 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6672 struct btrfs_dev_item *ptr;
6673 struct device_record *rec;
6676 ptr = btrfs_item_ptr(eb,
6677 slot, struct btrfs_dev_item);
6679 rec = malloc(sizeof(*rec));
6681 fprintf(stderr, "memory allocation failed\n");
6685 rec->devid = key->offset;
6686 rec->generation = btrfs_header_generation(eb);
6688 rec->objectid = key->objectid;
6689 rec->type = key->type;
6690 rec->offset = key->offset;
6692 rec->devid = btrfs_device_id(eb, ptr);
6693 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6694 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6696 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6698 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6705 struct block_group_record *
6706 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6709 struct btrfs_block_group_item *ptr;
6710 struct block_group_record *rec;
6712 rec = calloc(1, sizeof(*rec));
6714 fprintf(stderr, "memory allocation failed\n");
6718 rec->cache.start = key->objectid;
6719 rec->cache.size = key->offset;
6721 rec->generation = btrfs_header_generation(leaf);
6723 rec->objectid = key->objectid;
6724 rec->type = key->type;
6725 rec->offset = key->offset;
6727 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6728 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6730 INIT_LIST_HEAD(&rec->list);
6735 static int process_block_group_item(struct block_group_tree *block_group_cache,
6736 struct btrfs_key *key,
6737 struct extent_buffer *eb, int slot)
6739 struct block_group_record *rec;
6742 rec = btrfs_new_block_group_record(eb, key, slot);
6743 ret = insert_block_group_record(block_group_cache, rec);
6745 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6746 rec->objectid, rec->offset);
6753 struct device_extent_record *
6754 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6755 struct btrfs_key *key, int slot)
6757 struct device_extent_record *rec;
6758 struct btrfs_dev_extent *ptr;
6760 rec = calloc(1, sizeof(*rec));
6762 fprintf(stderr, "memory allocation failed\n");
6766 rec->cache.objectid = key->objectid;
6767 rec->cache.start = key->offset;
6769 rec->generation = btrfs_header_generation(leaf);
6771 rec->objectid = key->objectid;
6772 rec->type = key->type;
6773 rec->offset = key->offset;
6775 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6776 rec->chunk_objecteid =
6777 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6779 btrfs_dev_extent_chunk_offset(leaf, ptr);
6780 rec->length = btrfs_dev_extent_length(leaf, ptr);
6781 rec->cache.size = rec->length;
6783 INIT_LIST_HEAD(&rec->chunk_list);
6784 INIT_LIST_HEAD(&rec->device_list);
6790 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6791 struct btrfs_key *key, struct extent_buffer *eb,
6794 struct device_extent_record *rec;
6797 rec = btrfs_new_device_extent_record(eb, key, slot);
6798 ret = insert_device_extent_record(dev_extent_cache, rec);
6801 "Device extent[%llu, %llu, %llu] existed.\n",
6802 rec->objectid, rec->offset, rec->length);
6809 static int process_extent_item(struct btrfs_root *root,
6810 struct cache_tree *extent_cache,
6811 struct extent_buffer *eb, int slot)
6813 struct btrfs_extent_item *ei;
6814 struct btrfs_extent_inline_ref *iref;
6815 struct btrfs_extent_data_ref *dref;
6816 struct btrfs_shared_data_ref *sref;
6817 struct btrfs_key key;
6818 struct extent_record tmpl;
6823 u32 item_size = btrfs_item_size_nr(eb, slot);
6829 btrfs_item_key_to_cpu(eb, &key, slot);
6831 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6833 num_bytes = root->fs_info->nodesize;
6835 num_bytes = key.offset;
6838 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6839 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6840 key.objectid, root->fs_info->sectorsize);
6843 if (item_size < sizeof(*ei)) {
6844 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6845 struct btrfs_extent_item_v0 *ei0;
6846 BUG_ON(item_size != sizeof(*ei0));
6847 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6848 refs = btrfs_extent_refs_v0(eb, ei0);
6852 memset(&tmpl, 0, sizeof(tmpl));
6853 tmpl.start = key.objectid;
6854 tmpl.nr = num_bytes;
6855 tmpl.extent_item_refs = refs;
6856 tmpl.metadata = metadata;
6858 tmpl.max_size = num_bytes;
6860 return add_extent_rec(extent_cache, &tmpl);
6863 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6864 refs = btrfs_extent_refs(eb, ei);
6865 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6869 if (metadata && num_bytes != root->fs_info->nodesize) {
6870 error("ignore invalid metadata extent, length %llu does not equal to %u",
6871 num_bytes, root->fs_info->nodesize);
6874 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6875 error("ignore invalid data extent, length %llu is not aligned to %u",
6876 num_bytes, root->fs_info->sectorsize);
6880 memset(&tmpl, 0, sizeof(tmpl));
6881 tmpl.start = key.objectid;
6882 tmpl.nr = num_bytes;
6883 tmpl.extent_item_refs = refs;
6884 tmpl.metadata = metadata;
6886 tmpl.max_size = num_bytes;
6887 add_extent_rec(extent_cache, &tmpl);
6889 ptr = (unsigned long)(ei + 1);
6890 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6891 key.type == BTRFS_EXTENT_ITEM_KEY)
6892 ptr += sizeof(struct btrfs_tree_block_info);
6894 end = (unsigned long)ei + item_size;
6896 iref = (struct btrfs_extent_inline_ref *)ptr;
6897 type = btrfs_extent_inline_ref_type(eb, iref);
6898 offset = btrfs_extent_inline_ref_offset(eb, iref);
6900 case BTRFS_TREE_BLOCK_REF_KEY:
6901 ret = add_tree_backref(extent_cache, key.objectid,
6905 "add_tree_backref failed (extent items tree block): %s",
6908 case BTRFS_SHARED_BLOCK_REF_KEY:
6909 ret = add_tree_backref(extent_cache, key.objectid,
6913 "add_tree_backref failed (extent items shared block): %s",
6916 case BTRFS_EXTENT_DATA_REF_KEY:
6917 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6918 add_data_backref(extent_cache, key.objectid, 0,
6919 btrfs_extent_data_ref_root(eb, dref),
6920 btrfs_extent_data_ref_objectid(eb,
6922 btrfs_extent_data_ref_offset(eb, dref),
6923 btrfs_extent_data_ref_count(eb, dref),
6926 case BTRFS_SHARED_DATA_REF_KEY:
6927 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6928 add_data_backref(extent_cache, key.objectid, offset,
6930 btrfs_shared_data_ref_count(eb, sref),
6934 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6935 key.objectid, key.type, num_bytes);
6938 ptr += btrfs_extent_inline_ref_size(type);
6945 static int check_cache_range(struct btrfs_root *root,
6946 struct btrfs_block_group_cache *cache,
6947 u64 offset, u64 bytes)
6949 struct btrfs_free_space *entry;
6955 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6956 bytenr = btrfs_sb_offset(i);
6957 ret = btrfs_rmap_block(root->fs_info,
6958 cache->key.objectid, bytenr, 0,
6959 &logical, &nr, &stripe_len);
6964 if (logical[nr] + stripe_len <= offset)
6966 if (offset + bytes <= logical[nr])
6968 if (logical[nr] == offset) {
6969 if (stripe_len >= bytes) {
6973 bytes -= stripe_len;
6974 offset += stripe_len;
6975 } else if (logical[nr] < offset) {
6976 if (logical[nr] + stripe_len >=
6981 bytes = (offset + bytes) -
6982 (logical[nr] + stripe_len);
6983 offset = logical[nr] + stripe_len;
6986 * Could be tricky, the super may land in the
6987 * middle of the area we're checking. First
6988 * check the easiest case, it's at the end.
6990 if (logical[nr] + stripe_len >=
6992 bytes = logical[nr] - offset;
6996 /* Check the left side */
6997 ret = check_cache_range(root, cache,
6999 logical[nr] - offset);
7005 /* Now we continue with the right side */
7006 bytes = (offset + bytes) -
7007 (logical[nr] + stripe_len);
7008 offset = logical[nr] + stripe_len;
7015 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7017 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7018 offset, offset+bytes);
7022 if (entry->offset != offset) {
7023 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7028 if (entry->bytes != bytes) {
7029 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7030 bytes, entry->bytes, offset);
7034 unlink_free_space(cache->free_space_ctl, entry);
7039 static int verify_space_cache(struct btrfs_root *root,
7040 struct btrfs_block_group_cache *cache)
7042 struct btrfs_path path;
7043 struct extent_buffer *leaf;
7044 struct btrfs_key key;
7048 root = root->fs_info->extent_root;
7050 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7052 btrfs_init_path(&path);
7053 key.objectid = last;
7055 key.type = BTRFS_EXTENT_ITEM_KEY;
7056 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7061 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7062 ret = btrfs_next_leaf(root, &path);
7070 leaf = path.nodes[0];
7071 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7072 if (key.objectid >= cache->key.offset + cache->key.objectid)
7074 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7075 key.type != BTRFS_METADATA_ITEM_KEY) {
7080 if (last == key.objectid) {
7081 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7082 last = key.objectid + key.offset;
7084 last = key.objectid + root->fs_info->nodesize;
7089 ret = check_cache_range(root, cache, last,
7090 key.objectid - last);
7093 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7094 last = key.objectid + key.offset;
7096 last = key.objectid + root->fs_info->nodesize;
7100 if (last < cache->key.objectid + cache->key.offset)
7101 ret = check_cache_range(root, cache, last,
7102 cache->key.objectid +
7103 cache->key.offset - last);
7106 btrfs_release_path(&path);
7109 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7110 fprintf(stderr, "There are still entries left in the space "
7118 static int check_space_cache(struct btrfs_root *root)
7120 struct btrfs_block_group_cache *cache;
7121 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7125 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7126 btrfs_super_generation(root->fs_info->super_copy) !=
7127 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7128 printf("cache and super generation don't match, space cache "
7129 "will be invalidated\n");
7133 if (ctx.progress_enabled) {
7134 ctx.tp = TASK_FREE_SPACE;
7135 task_start(ctx.info);
7139 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7143 start = cache->key.objectid + cache->key.offset;
7144 if (!cache->free_space_ctl) {
7145 if (btrfs_init_free_space_ctl(cache,
7146 root->fs_info->sectorsize)) {
7151 btrfs_remove_free_space_cache(cache);
7154 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7155 ret = exclude_super_stripes(root, cache);
7157 fprintf(stderr, "could not exclude super stripes: %s\n",
7162 ret = load_free_space_tree(root->fs_info, cache);
7163 free_excluded_extents(root, cache);
7165 fprintf(stderr, "could not load free space tree: %s\n",
7172 ret = load_free_space_cache(root->fs_info, cache);
7177 ret = verify_space_cache(root, cache);
7179 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7180 cache->key.objectid);
7185 task_stop(ctx.info);
7187 return error ? -EINVAL : 0;
7190 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7191 u64 num_bytes, unsigned long leaf_offset,
7192 struct extent_buffer *eb) {
7194 struct btrfs_fs_info *fs_info = root->fs_info;
7196 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7198 unsigned long csum_offset;
7202 u64 data_checked = 0;
7208 if (num_bytes % fs_info->sectorsize)
7211 data = malloc(num_bytes);
7215 while (offset < num_bytes) {
7218 read_len = num_bytes - offset;
7219 /* read as much space once a time */
7220 ret = read_extent_data(fs_info, data + offset,
7221 bytenr + offset, &read_len, mirror);
7225 /* verify every 4k data's checksum */
7226 while (data_checked < read_len) {
7228 tmp = offset + data_checked;
7230 csum = btrfs_csum_data((char *)data + tmp,
7231 csum, fs_info->sectorsize);
7232 btrfs_csum_final(csum, (u8 *)&csum);
7234 csum_offset = leaf_offset +
7235 tmp / fs_info->sectorsize * csum_size;
7236 read_extent_buffer(eb, (char *)&csum_expected,
7237 csum_offset, csum_size);
7238 /* try another mirror */
7239 if (csum != csum_expected) {
7240 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7241 mirror, bytenr + tmp,
7242 csum, csum_expected);
7243 num_copies = btrfs_num_copies(root->fs_info,
7245 if (mirror < num_copies - 1) {
7250 data_checked += fs_info->sectorsize;
7259 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7262 struct btrfs_path path;
7263 struct extent_buffer *leaf;
7264 struct btrfs_key key;
7267 btrfs_init_path(&path);
7268 key.objectid = bytenr;
7269 key.type = BTRFS_EXTENT_ITEM_KEY;
7270 key.offset = (u64)-1;
7273 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7276 fprintf(stderr, "Error looking up extent record %d\n", ret);
7277 btrfs_release_path(&path);
7280 if (path.slots[0] > 0) {
7283 ret = btrfs_prev_leaf(root, &path);
7286 } else if (ret > 0) {
7293 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7296 * Block group items come before extent items if they have the same
7297 * bytenr, so walk back one more just in case. Dear future traveller,
7298 * first congrats on mastering time travel. Now if it's not too much
7299 * trouble could you go back to 2006 and tell Chris to make the
7300 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7301 * EXTENT_ITEM_KEY please?
7303 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7304 if (path.slots[0] > 0) {
7307 ret = btrfs_prev_leaf(root, &path);
7310 } else if (ret > 0) {
7315 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7319 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7320 ret = btrfs_next_leaf(root, &path);
7322 fprintf(stderr, "Error going to next leaf "
7324 btrfs_release_path(&path);
7330 leaf = path.nodes[0];
7331 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7332 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7336 if (key.objectid + key.offset < bytenr) {
7340 if (key.objectid > bytenr + num_bytes)
7343 if (key.objectid == bytenr) {
7344 if (key.offset >= num_bytes) {
7348 num_bytes -= key.offset;
7349 bytenr += key.offset;
7350 } else if (key.objectid < bytenr) {
7351 if (key.objectid + key.offset >= bytenr + num_bytes) {
7355 num_bytes = (bytenr + num_bytes) -
7356 (key.objectid + key.offset);
7357 bytenr = key.objectid + key.offset;
7359 if (key.objectid + key.offset < bytenr + num_bytes) {
7360 u64 new_start = key.objectid + key.offset;
7361 u64 new_bytes = bytenr + num_bytes - new_start;
7364 * Weird case, the extent is in the middle of
7365 * our range, we'll have to search one side
7366 * and then the other. Not sure if this happens
7367 * in real life, but no harm in coding it up
7368 * anyway just in case.
7370 btrfs_release_path(&path);
7371 ret = check_extent_exists(root, new_start,
7374 fprintf(stderr, "Right section didn't "
7378 num_bytes = key.objectid - bytenr;
7381 num_bytes = key.objectid - bytenr;
7388 if (num_bytes && !ret) {
7389 fprintf(stderr, "There are no extents for csum range "
7390 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7394 btrfs_release_path(&path);
7398 static int check_csums(struct btrfs_root *root)
7400 struct btrfs_path path;
7401 struct extent_buffer *leaf;
7402 struct btrfs_key key;
7403 u64 offset = 0, num_bytes = 0;
7404 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7408 unsigned long leaf_offset;
7410 root = root->fs_info->csum_root;
7411 if (!extent_buffer_uptodate(root->node)) {
7412 fprintf(stderr, "No valid csum tree found\n");
7416 btrfs_init_path(&path);
7417 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7418 key.type = BTRFS_EXTENT_CSUM_KEY;
7420 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7422 fprintf(stderr, "Error searching csum tree %d\n", ret);
7423 btrfs_release_path(&path);
7427 if (ret > 0 && path.slots[0])
7432 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7433 ret = btrfs_next_leaf(root, &path);
7435 fprintf(stderr, "Error going to next leaf "
7442 leaf = path.nodes[0];
7444 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7445 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7450 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7451 csum_size) * root->fs_info->sectorsize;
7452 if (!check_data_csum)
7453 goto skip_csum_check;
7454 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7455 ret = check_extent_csums(root, key.offset, data_len,
7461 offset = key.offset;
7462 } else if (key.offset != offset + num_bytes) {
7463 ret = check_extent_exists(root, offset, num_bytes);
7465 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7466 "there is no extent record\n",
7467 offset, offset+num_bytes);
7470 offset = key.offset;
7473 num_bytes += data_len;
7477 btrfs_release_path(&path);
7481 static int is_dropped_key(struct btrfs_key *key,
7482 struct btrfs_key *drop_key) {
7483 if (key->objectid < drop_key->objectid)
7485 else if (key->objectid == drop_key->objectid) {
7486 if (key->type < drop_key->type)
7488 else if (key->type == drop_key->type) {
7489 if (key->offset < drop_key->offset)
7497 * Here are the rules for FULL_BACKREF.
7499 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7500 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7502 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7503 * if it happened after the relocation occurred since we'll have dropped the
7504 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7505 * have no real way to know for sure.
7507 * We process the blocks one root at a time, and we start from the lowest root
7508 * objectid and go to the highest. So we can just lookup the owner backref for
7509 * the record and if we don't find it then we know it doesn't exist and we have
7512 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7513 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7514 * be set or not and then we can check later once we've gathered all the refs.
7516 static int calc_extent_flag(struct cache_tree *extent_cache,
7517 struct extent_buffer *buf,
7518 struct root_item_record *ri,
7521 struct extent_record *rec;
7522 struct cache_extent *cache;
7523 struct tree_backref *tback;
7526 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7527 /* we have added this extent before */
7531 rec = container_of(cache, struct extent_record, cache);
7534 * Except file/reloc tree, we can not have
7537 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7542 if (buf->start == ri->bytenr)
7545 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7548 owner = btrfs_header_owner(buf);
7549 if (owner == ri->objectid)
7552 tback = find_tree_backref(rec, 0, owner);
7557 if (rec->flag_block_full_backref != FLAG_UNSET &&
7558 rec->flag_block_full_backref != 0)
7559 rec->bad_full_backref = 1;
7562 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7563 if (rec->flag_block_full_backref != FLAG_UNSET &&
7564 rec->flag_block_full_backref != 1)
7565 rec->bad_full_backref = 1;
7569 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7571 fprintf(stderr, "Invalid key type(");
7572 print_key_type(stderr, 0, key_type);
7573 fprintf(stderr, ") found in root(");
7574 print_objectid(stderr, rootid, 0);
7575 fprintf(stderr, ")\n");
7579 * Check if the key is valid with its extent buffer.
7581 * This is a early check in case invalid key exists in a extent buffer
7582 * This is not comprehensive yet, but should prevent wrong key/item passed
7585 static int check_type_with_root(u64 rootid, u8 key_type)
7588 /* Only valid in chunk tree */
7589 case BTRFS_DEV_ITEM_KEY:
7590 case BTRFS_CHUNK_ITEM_KEY:
7591 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7594 /* valid in csum and log tree */
7595 case BTRFS_CSUM_TREE_OBJECTID:
7596 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7600 case BTRFS_EXTENT_ITEM_KEY:
7601 case BTRFS_METADATA_ITEM_KEY:
7602 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7603 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7606 case BTRFS_ROOT_ITEM_KEY:
7607 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7610 case BTRFS_DEV_EXTENT_KEY:
7611 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7617 report_mismatch_key_root(key_type, rootid);
7621 static int run_next_block(struct btrfs_root *root,
7622 struct block_info *bits,
7625 struct cache_tree *pending,
7626 struct cache_tree *seen,
7627 struct cache_tree *reada,
7628 struct cache_tree *nodes,
7629 struct cache_tree *extent_cache,
7630 struct cache_tree *chunk_cache,
7631 struct rb_root *dev_cache,
7632 struct block_group_tree *block_group_cache,
7633 struct device_extent_tree *dev_extent_cache,
7634 struct root_item_record *ri)
7636 struct btrfs_fs_info *fs_info = root->fs_info;
7637 struct extent_buffer *buf;
7638 struct extent_record *rec = NULL;
7649 struct btrfs_key key;
7650 struct cache_extent *cache;
7653 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7654 bits_nr, &reada_bits);
7659 for(i = 0; i < nritems; i++) {
7660 ret = add_cache_extent(reada, bits[i].start,
7665 /* fixme, get the parent transid */
7666 readahead_tree_block(fs_info, bits[i].start, 0);
7669 *last = bits[0].start;
7670 bytenr = bits[0].start;
7671 size = bits[0].size;
7673 cache = lookup_cache_extent(pending, bytenr, size);
7675 remove_cache_extent(pending, cache);
7678 cache = lookup_cache_extent(reada, bytenr, size);
7680 remove_cache_extent(reada, cache);
7683 cache = lookup_cache_extent(nodes, bytenr, size);
7685 remove_cache_extent(nodes, cache);
7688 cache = lookup_cache_extent(extent_cache, bytenr, size);
7690 rec = container_of(cache, struct extent_record, cache);
7691 gen = rec->parent_generation;
7694 /* fixme, get the real parent transid */
7695 buf = read_tree_block(root->fs_info, bytenr, gen);
7696 if (!extent_buffer_uptodate(buf)) {
7697 record_bad_block_io(root->fs_info,
7698 extent_cache, bytenr, size);
7702 nritems = btrfs_header_nritems(buf);
7705 if (!init_extent_tree) {
7706 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7707 btrfs_header_level(buf), 1, NULL,
7710 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7712 fprintf(stderr, "Couldn't calc extent flags\n");
7713 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7718 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7720 fprintf(stderr, "Couldn't calc extent flags\n");
7721 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7725 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7727 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7728 ri->objectid == btrfs_header_owner(buf)) {
7730 * Ok we got to this block from it's original owner and
7731 * we have FULL_BACKREF set. Relocation can leave
7732 * converted blocks over so this is altogether possible,
7733 * however it's not possible if the generation > the
7734 * last snapshot, so check for this case.
7736 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7737 btrfs_header_generation(buf) > ri->last_snapshot) {
7738 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7739 rec->bad_full_backref = 1;
7744 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7745 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7746 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7747 rec->bad_full_backref = 1;
7751 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7752 rec->flag_block_full_backref = 1;
7756 rec->flag_block_full_backref = 0;
7758 owner = btrfs_header_owner(buf);
7761 ret = check_block(root, extent_cache, buf, flags);
7765 if (btrfs_is_leaf(buf)) {
7766 btree_space_waste += btrfs_leaf_free_space(root, buf);
7767 for (i = 0; i < nritems; i++) {
7768 struct btrfs_file_extent_item *fi;
7769 btrfs_item_key_to_cpu(buf, &key, i);
7771 * Check key type against the leaf owner.
7772 * Could filter quite a lot of early error if
7775 if (check_type_with_root(btrfs_header_owner(buf),
7777 fprintf(stderr, "ignoring invalid key\n");
7780 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7781 process_extent_item(root, extent_cache, buf,
7785 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7786 process_extent_item(root, extent_cache, buf,
7790 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7792 btrfs_item_size_nr(buf, i);
7795 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7796 process_chunk_item(chunk_cache, &key, buf, i);
7799 if (key.type == BTRFS_DEV_ITEM_KEY) {
7800 process_device_item(dev_cache, &key, buf, i);
7803 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7804 process_block_group_item(block_group_cache,
7808 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7809 process_device_extent_item(dev_extent_cache,
7814 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7815 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7816 process_extent_ref_v0(extent_cache, buf, i);
7823 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7824 ret = add_tree_backref(extent_cache,
7825 key.objectid, 0, key.offset, 0);
7828 "add_tree_backref failed (leaf tree block): %s",
7832 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7833 ret = add_tree_backref(extent_cache,
7834 key.objectid, key.offset, 0, 0);
7837 "add_tree_backref failed (leaf shared block): %s",
7841 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7842 struct btrfs_extent_data_ref *ref;
7843 ref = btrfs_item_ptr(buf, i,
7844 struct btrfs_extent_data_ref);
7845 add_data_backref(extent_cache,
7847 btrfs_extent_data_ref_root(buf, ref),
7848 btrfs_extent_data_ref_objectid(buf,
7850 btrfs_extent_data_ref_offset(buf, ref),
7851 btrfs_extent_data_ref_count(buf, ref),
7852 0, root->fs_info->sectorsize);
7855 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7856 struct btrfs_shared_data_ref *ref;
7857 ref = btrfs_item_ptr(buf, i,
7858 struct btrfs_shared_data_ref);
7859 add_data_backref(extent_cache,
7860 key.objectid, key.offset, 0, 0, 0,
7861 btrfs_shared_data_ref_count(buf, ref),
7862 0, root->fs_info->sectorsize);
7865 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7866 struct bad_item *bad;
7868 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7872 bad = malloc(sizeof(struct bad_item));
7875 INIT_LIST_HEAD(&bad->list);
7876 memcpy(&bad->key, &key,
7877 sizeof(struct btrfs_key));
7878 bad->root_id = owner;
7879 list_add_tail(&bad->list, &delete_items);
7882 if (key.type != BTRFS_EXTENT_DATA_KEY)
7884 fi = btrfs_item_ptr(buf, i,
7885 struct btrfs_file_extent_item);
7886 if (btrfs_file_extent_type(buf, fi) ==
7887 BTRFS_FILE_EXTENT_INLINE)
7889 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7892 data_bytes_allocated +=
7893 btrfs_file_extent_disk_num_bytes(buf, fi);
7894 if (data_bytes_allocated < root->fs_info->sectorsize) {
7897 data_bytes_referenced +=
7898 btrfs_file_extent_num_bytes(buf, fi);
7899 add_data_backref(extent_cache,
7900 btrfs_file_extent_disk_bytenr(buf, fi),
7901 parent, owner, key.objectid, key.offset -
7902 btrfs_file_extent_offset(buf, fi), 1, 1,
7903 btrfs_file_extent_disk_num_bytes(buf, fi));
7907 struct btrfs_key first_key;
7909 first_key.objectid = 0;
7912 btrfs_item_key_to_cpu(buf, &first_key, 0);
7913 level = btrfs_header_level(buf);
7914 for (i = 0; i < nritems; i++) {
7915 struct extent_record tmpl;
7917 ptr = btrfs_node_blockptr(buf, i);
7918 size = root->fs_info->nodesize;
7919 btrfs_node_key_to_cpu(buf, &key, i);
7921 if ((level == ri->drop_level)
7922 && is_dropped_key(&key, &ri->drop_key)) {
7927 memset(&tmpl, 0, sizeof(tmpl));
7928 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7929 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7934 tmpl.max_size = size;
7935 ret = add_extent_rec(extent_cache, &tmpl);
7939 ret = add_tree_backref(extent_cache, ptr, parent,
7943 "add_tree_backref failed (non-leaf block): %s",
7949 add_pending(nodes, seen, ptr, size);
7951 add_pending(pending, seen, ptr, size);
7954 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7955 nritems) * sizeof(struct btrfs_key_ptr);
7957 total_btree_bytes += buf->len;
7958 if (fs_root_objectid(btrfs_header_owner(buf)))
7959 total_fs_tree_bytes += buf->len;
7960 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7961 total_extent_tree_bytes += buf->len;
7963 free_extent_buffer(buf);
7967 static int add_root_to_pending(struct extent_buffer *buf,
7968 struct cache_tree *extent_cache,
7969 struct cache_tree *pending,
7970 struct cache_tree *seen,
7971 struct cache_tree *nodes,
7974 struct extent_record tmpl;
7977 if (btrfs_header_level(buf) > 0)
7978 add_pending(nodes, seen, buf->start, buf->len);
7980 add_pending(pending, seen, buf->start, buf->len);
7982 memset(&tmpl, 0, sizeof(tmpl));
7983 tmpl.start = buf->start;
7988 tmpl.max_size = buf->len;
7989 add_extent_rec(extent_cache, &tmpl);
7991 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7992 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7993 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7996 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8001 /* as we fix the tree, we might be deleting blocks that
8002 * we're tracking for repair. This hook makes sure we
8003 * remove any backrefs for blocks as we are fixing them.
8005 static int free_extent_hook(struct btrfs_trans_handle *trans,
8006 struct btrfs_root *root,
8007 u64 bytenr, u64 num_bytes, u64 parent,
8008 u64 root_objectid, u64 owner, u64 offset,
8011 struct extent_record *rec;
8012 struct cache_extent *cache;
8014 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8016 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8017 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8021 rec = container_of(cache, struct extent_record, cache);
8023 struct data_backref *back;
8024 back = find_data_backref(rec, parent, root_objectid, owner,
8025 offset, 1, bytenr, num_bytes);
8028 if (back->node.found_ref) {
8029 back->found_ref -= refs_to_drop;
8031 rec->refs -= refs_to_drop;
8033 if (back->node.found_extent_tree) {
8034 back->num_refs -= refs_to_drop;
8035 if (rec->extent_item_refs)
8036 rec->extent_item_refs -= refs_to_drop;
8038 if (back->found_ref == 0)
8039 back->node.found_ref = 0;
8040 if (back->num_refs == 0)
8041 back->node.found_extent_tree = 0;
8043 if (!back->node.found_extent_tree && back->node.found_ref) {
8044 list_del(&back->node.list);
8048 struct tree_backref *back;
8049 back = find_tree_backref(rec, parent, root_objectid);
8052 if (back->node.found_ref) {
8055 back->node.found_ref = 0;
8057 if (back->node.found_extent_tree) {
8058 if (rec->extent_item_refs)
8059 rec->extent_item_refs--;
8060 back->node.found_extent_tree = 0;
8062 if (!back->node.found_extent_tree && back->node.found_ref) {
8063 list_del(&back->node.list);
8067 maybe_free_extent_rec(extent_cache, rec);
8072 static int delete_extent_records(struct btrfs_trans_handle *trans,
8073 struct btrfs_root *root,
8074 struct btrfs_path *path,
8077 struct btrfs_key key;
8078 struct btrfs_key found_key;
8079 struct extent_buffer *leaf;
8084 key.objectid = bytenr;
8086 key.offset = (u64)-1;
8089 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8096 if (path->slots[0] == 0)
8102 leaf = path->nodes[0];
8103 slot = path->slots[0];
8105 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8106 if (found_key.objectid != bytenr)
8109 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8110 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8111 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8112 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8113 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8114 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8115 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8116 btrfs_release_path(path);
8117 if (found_key.type == 0) {
8118 if (found_key.offset == 0)
8120 key.offset = found_key.offset - 1;
8121 key.type = found_key.type;
8123 key.type = found_key.type - 1;
8124 key.offset = (u64)-1;
8128 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8129 found_key.objectid, found_key.type, found_key.offset);
8131 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8134 btrfs_release_path(path);
8136 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8137 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8138 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8139 found_key.offset : root->fs_info->nodesize;
8141 ret = btrfs_update_block_group(trans, root, bytenr,
8148 btrfs_release_path(path);
8153 * for a single backref, this will allocate a new extent
8154 * and add the backref to it.
8156 static int record_extent(struct btrfs_trans_handle *trans,
8157 struct btrfs_fs_info *info,
8158 struct btrfs_path *path,
8159 struct extent_record *rec,
8160 struct extent_backref *back,
8161 int allocated, u64 flags)
8164 struct btrfs_root *extent_root = info->extent_root;
8165 struct extent_buffer *leaf;
8166 struct btrfs_key ins_key;
8167 struct btrfs_extent_item *ei;
8168 struct data_backref *dback;
8169 struct btrfs_tree_block_info *bi;
8172 rec->max_size = max_t(u64, rec->max_size,
8176 u32 item_size = sizeof(*ei);
8179 item_size += sizeof(*bi);
8181 ins_key.objectid = rec->start;
8182 ins_key.offset = rec->max_size;
8183 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8185 ret = btrfs_insert_empty_item(trans, extent_root, path,
8186 &ins_key, item_size);
8190 leaf = path->nodes[0];
8191 ei = btrfs_item_ptr(leaf, path->slots[0],
8192 struct btrfs_extent_item);
8194 btrfs_set_extent_refs(leaf, ei, 0);
8195 btrfs_set_extent_generation(leaf, ei, rec->generation);
8197 if (back->is_data) {
8198 btrfs_set_extent_flags(leaf, ei,
8199 BTRFS_EXTENT_FLAG_DATA);
8201 struct btrfs_disk_key copy_key;;
8203 bi = (struct btrfs_tree_block_info *)(ei + 1);
8204 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8207 btrfs_set_disk_key_objectid(©_key,
8208 rec->info_objectid);
8209 btrfs_set_disk_key_type(©_key, 0);
8210 btrfs_set_disk_key_offset(©_key, 0);
8212 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8213 btrfs_set_tree_block_key(leaf, bi, ©_key);
8215 btrfs_set_extent_flags(leaf, ei,
8216 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8219 btrfs_mark_buffer_dirty(leaf);
8220 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8221 rec->max_size, 1, 0);
8224 btrfs_release_path(path);
8227 if (back->is_data) {
8231 dback = to_data_backref(back);
8232 if (back->full_backref)
8233 parent = dback->parent;
8237 for (i = 0; i < dback->found_ref; i++) {
8238 /* if parent != 0, we're doing a full backref
8239 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8240 * just makes the backref allocator create a data
8243 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8244 rec->start, rec->max_size,
8248 BTRFS_FIRST_FREE_OBJECTID :
8254 fprintf(stderr, "adding new data backref"
8255 " on %llu %s %llu owner %llu"
8256 " offset %llu found %d\n",
8257 (unsigned long long)rec->start,
8258 back->full_backref ?
8260 back->full_backref ?
8261 (unsigned long long)parent :
8262 (unsigned long long)dback->root,
8263 (unsigned long long)dback->owner,
8264 (unsigned long long)dback->offset,
8268 struct tree_backref *tback;
8270 tback = to_tree_backref(back);
8271 if (back->full_backref)
8272 parent = tback->parent;
8276 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8277 rec->start, rec->max_size,
8278 parent, tback->root, 0, 0);
8279 fprintf(stderr, "adding new tree backref on "
8280 "start %llu len %llu parent %llu root %llu\n",
8281 rec->start, rec->max_size, parent, tback->root);
8284 btrfs_release_path(path);
8288 static struct extent_entry *find_entry(struct list_head *entries,
8289 u64 bytenr, u64 bytes)
8291 struct extent_entry *entry = NULL;
8293 list_for_each_entry(entry, entries, list) {
8294 if (entry->bytenr == bytenr && entry->bytes == bytes)
8301 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8303 struct extent_entry *entry, *best = NULL, *prev = NULL;
8305 list_for_each_entry(entry, entries, list) {
8307 * If there are as many broken entries as entries then we know
8308 * not to trust this particular entry.
8310 if (entry->broken == entry->count)
8314 * Special case, when there are only two entries and 'best' is
8324 * If our current entry == best then we can't be sure our best
8325 * is really the best, so we need to keep searching.
8327 if (best && best->count == entry->count) {
8333 /* Prev == entry, not good enough, have to keep searching */
8334 if (!prev->broken && prev->count == entry->count)
8338 best = (prev->count > entry->count) ? prev : entry;
8339 else if (best->count < entry->count)
8347 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8348 struct data_backref *dback, struct extent_entry *entry)
8350 struct btrfs_trans_handle *trans;
8351 struct btrfs_root *root;
8352 struct btrfs_file_extent_item *fi;
8353 struct extent_buffer *leaf;
8354 struct btrfs_key key;
8358 key.objectid = dback->root;
8359 key.type = BTRFS_ROOT_ITEM_KEY;
8360 key.offset = (u64)-1;
8361 root = btrfs_read_fs_root(info, &key);
8363 fprintf(stderr, "Couldn't find root for our ref\n");
8368 * The backref points to the original offset of the extent if it was
8369 * split, so we need to search down to the offset we have and then walk
8370 * forward until we find the backref we're looking for.
8372 key.objectid = dback->owner;
8373 key.type = BTRFS_EXTENT_DATA_KEY;
8374 key.offset = dback->offset;
8375 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8377 fprintf(stderr, "Error looking up ref %d\n", ret);
8382 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8383 ret = btrfs_next_leaf(root, path);
8385 fprintf(stderr, "Couldn't find our ref, next\n");
8389 leaf = path->nodes[0];
8390 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8391 if (key.objectid != dback->owner ||
8392 key.type != BTRFS_EXTENT_DATA_KEY) {
8393 fprintf(stderr, "Couldn't find our ref, search\n");
8396 fi = btrfs_item_ptr(leaf, path->slots[0],
8397 struct btrfs_file_extent_item);
8398 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8399 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8401 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8406 btrfs_release_path(path);
8408 trans = btrfs_start_transaction(root, 1);
8410 return PTR_ERR(trans);
8413 * Ok we have the key of the file extent we want to fix, now we can cow
8414 * down to the thing and fix it.
8416 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8418 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8419 key.objectid, key.type, key.offset, ret);
8423 fprintf(stderr, "Well that's odd, we just found this key "
8424 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8429 leaf = path->nodes[0];
8430 fi = btrfs_item_ptr(leaf, path->slots[0],
8431 struct btrfs_file_extent_item);
8433 if (btrfs_file_extent_compression(leaf, fi) &&
8434 dback->disk_bytenr != entry->bytenr) {
8435 fprintf(stderr, "Ref doesn't match the record start and is "
8436 "compressed, please take a btrfs-image of this file "
8437 "system and send it to a btrfs developer so they can "
8438 "complete this functionality for bytenr %Lu\n",
8439 dback->disk_bytenr);
8444 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8445 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8446 } else if (dback->disk_bytenr > entry->bytenr) {
8447 u64 off_diff, offset;
8449 off_diff = dback->disk_bytenr - entry->bytenr;
8450 offset = btrfs_file_extent_offset(leaf, fi);
8451 if (dback->disk_bytenr + offset +
8452 btrfs_file_extent_num_bytes(leaf, fi) >
8453 entry->bytenr + entry->bytes) {
8454 fprintf(stderr, "Ref is past the entry end, please "
8455 "take a btrfs-image of this file system and "
8456 "send it to a btrfs developer, ref %Lu\n",
8457 dback->disk_bytenr);
8462 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8463 btrfs_set_file_extent_offset(leaf, fi, offset);
8464 } else if (dback->disk_bytenr < entry->bytenr) {
8467 offset = btrfs_file_extent_offset(leaf, fi);
8468 if (dback->disk_bytenr + offset < entry->bytenr) {
8469 fprintf(stderr, "Ref is before the entry start, please"
8470 " take a btrfs-image of this file system and "
8471 "send it to a btrfs developer, ref %Lu\n",
8472 dback->disk_bytenr);
8477 offset += dback->disk_bytenr;
8478 offset -= entry->bytenr;
8479 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8480 btrfs_set_file_extent_offset(leaf, fi, offset);
8483 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8486 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8487 * only do this if we aren't using compression, otherwise it's a
8490 if (!btrfs_file_extent_compression(leaf, fi))
8491 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8493 printf("ram bytes may be wrong?\n");
8494 btrfs_mark_buffer_dirty(leaf);
8496 err = btrfs_commit_transaction(trans, root);
8497 btrfs_release_path(path);
8498 return ret ? ret : err;
8501 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8502 struct extent_record *rec)
8504 struct extent_backref *back;
8505 struct data_backref *dback;
8506 struct extent_entry *entry, *best = NULL;
8509 int broken_entries = 0;
8514 * Metadata is easy and the backrefs should always agree on bytenr and
8515 * size, if not we've got bigger issues.
8520 list_for_each_entry(back, &rec->backrefs, list) {
8521 if (back->full_backref || !back->is_data)
8524 dback = to_data_backref(back);
8527 * We only pay attention to backrefs that we found a real
8530 if (dback->found_ref == 0)
8534 * For now we only catch when the bytes don't match, not the
8535 * bytenr. We can easily do this at the same time, but I want
8536 * to have a fs image to test on before we just add repair
8537 * functionality willy-nilly so we know we won't screw up the
8541 entry = find_entry(&entries, dback->disk_bytenr,
8544 entry = malloc(sizeof(struct extent_entry));
8549 memset(entry, 0, sizeof(*entry));
8550 entry->bytenr = dback->disk_bytenr;
8551 entry->bytes = dback->bytes;
8552 list_add_tail(&entry->list, &entries);
8557 * If we only have on entry we may think the entries agree when
8558 * in reality they don't so we have to do some extra checking.
8560 if (dback->disk_bytenr != rec->start ||
8561 dback->bytes != rec->nr || back->broken)
8572 /* Yay all the backrefs agree, carry on good sir */
8573 if (nr_entries <= 1 && !mismatch)
8576 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8577 "%Lu\n", rec->start);
8580 * First we want to see if the backrefs can agree amongst themselves who
8581 * is right, so figure out which one of the entries has the highest
8584 best = find_most_right_entry(&entries);
8587 * Ok so we may have an even split between what the backrefs think, so
8588 * this is where we use the extent ref to see what it thinks.
8591 entry = find_entry(&entries, rec->start, rec->nr);
8592 if (!entry && (!broken_entries || !rec->found_rec)) {
8593 fprintf(stderr, "Backrefs don't agree with each other "
8594 "and extent record doesn't agree with anybody,"
8595 " so we can't fix bytenr %Lu bytes %Lu\n",
8596 rec->start, rec->nr);
8599 } else if (!entry) {
8601 * Ok our backrefs were broken, we'll assume this is the
8602 * correct value and add an entry for this range.
8604 entry = malloc(sizeof(struct extent_entry));
8609 memset(entry, 0, sizeof(*entry));
8610 entry->bytenr = rec->start;
8611 entry->bytes = rec->nr;
8612 list_add_tail(&entry->list, &entries);
8616 best = find_most_right_entry(&entries);
8618 fprintf(stderr, "Backrefs and extent record evenly "
8619 "split on who is right, this is going to "
8620 "require user input to fix bytenr %Lu bytes "
8621 "%Lu\n", rec->start, rec->nr);
8628 * I don't think this can happen currently as we'll abort() if we catch
8629 * this case higher up, but in case somebody removes that we still can't
8630 * deal with it properly here yet, so just bail out of that's the case.
8632 if (best->bytenr != rec->start) {
8633 fprintf(stderr, "Extent start and backref starts don't match, "
8634 "please use btrfs-image on this file system and send "
8635 "it to a btrfs developer so they can make fsck fix "
8636 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8637 rec->start, rec->nr);
8643 * Ok great we all agreed on an extent record, let's go find the real
8644 * references and fix up the ones that don't match.
8646 list_for_each_entry(back, &rec->backrefs, list) {
8647 if (back->full_backref || !back->is_data)
8650 dback = to_data_backref(back);
8653 * Still ignoring backrefs that don't have a real ref attached
8656 if (dback->found_ref == 0)
8659 if (dback->bytes == best->bytes &&
8660 dback->disk_bytenr == best->bytenr)
8663 ret = repair_ref(info, path, dback, best);
8669 * Ok we messed with the actual refs, which means we need to drop our
8670 * entire cache and go back and rescan. I know this is a huge pain and
8671 * adds a lot of extra work, but it's the only way to be safe. Once all
8672 * the backrefs agree we may not need to do anything to the extent
8677 while (!list_empty(&entries)) {
8678 entry = list_entry(entries.next, struct extent_entry, list);
8679 list_del_init(&entry->list);
8685 static int process_duplicates(struct cache_tree *extent_cache,
8686 struct extent_record *rec)
8688 struct extent_record *good, *tmp;
8689 struct cache_extent *cache;
8693 * If we found a extent record for this extent then return, or if we
8694 * have more than one duplicate we are likely going to need to delete
8697 if (rec->found_rec || rec->num_duplicates > 1)
8700 /* Shouldn't happen but just in case */
8701 BUG_ON(!rec->num_duplicates);
8704 * So this happens if we end up with a backref that doesn't match the
8705 * actual extent entry. So either the backref is bad or the extent
8706 * entry is bad. Either way we want to have the extent_record actually
8707 * reflect what we found in the extent_tree, so we need to take the
8708 * duplicate out and use that as the extent_record since the only way we
8709 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8711 remove_cache_extent(extent_cache, &rec->cache);
8713 good = to_extent_record(rec->dups.next);
8714 list_del_init(&good->list);
8715 INIT_LIST_HEAD(&good->backrefs);
8716 INIT_LIST_HEAD(&good->dups);
8717 good->cache.start = good->start;
8718 good->cache.size = good->nr;
8719 good->content_checked = 0;
8720 good->owner_ref_checked = 0;
8721 good->num_duplicates = 0;
8722 good->refs = rec->refs;
8723 list_splice_init(&rec->backrefs, &good->backrefs);
8725 cache = lookup_cache_extent(extent_cache, good->start,
8729 tmp = container_of(cache, struct extent_record, cache);
8732 * If we find another overlapping extent and it's found_rec is
8733 * set then it's a duplicate and we need to try and delete
8736 if (tmp->found_rec || tmp->num_duplicates > 0) {
8737 if (list_empty(&good->list))
8738 list_add_tail(&good->list,
8739 &duplicate_extents);
8740 good->num_duplicates += tmp->num_duplicates + 1;
8741 list_splice_init(&tmp->dups, &good->dups);
8742 list_del_init(&tmp->list);
8743 list_add_tail(&tmp->list, &good->dups);
8744 remove_cache_extent(extent_cache, &tmp->cache);
8749 * Ok we have another non extent item backed extent rec, so lets
8750 * just add it to this extent and carry on like we did above.
8752 good->refs += tmp->refs;
8753 list_splice_init(&tmp->backrefs, &good->backrefs);
8754 remove_cache_extent(extent_cache, &tmp->cache);
8757 ret = insert_cache_extent(extent_cache, &good->cache);
8760 return good->num_duplicates ? 0 : 1;
8763 static int delete_duplicate_records(struct btrfs_root *root,
8764 struct extent_record *rec)
8766 struct btrfs_trans_handle *trans;
8767 LIST_HEAD(delete_list);
8768 struct btrfs_path path;
8769 struct extent_record *tmp, *good, *n;
8772 struct btrfs_key key;
8774 btrfs_init_path(&path);
8777 /* Find the record that covers all of the duplicates. */
8778 list_for_each_entry(tmp, &rec->dups, list) {
8779 if (good->start < tmp->start)
8781 if (good->nr > tmp->nr)
8784 if (tmp->start + tmp->nr < good->start + good->nr) {
8785 fprintf(stderr, "Ok we have overlapping extents that "
8786 "aren't completely covered by each other, this "
8787 "is going to require more careful thought. "
8788 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8789 tmp->start, tmp->nr, good->start, good->nr);
8796 list_add_tail(&rec->list, &delete_list);
8798 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8801 list_move_tail(&tmp->list, &delete_list);
8804 root = root->fs_info->extent_root;
8805 trans = btrfs_start_transaction(root, 1);
8806 if (IS_ERR(trans)) {
8807 ret = PTR_ERR(trans);
8811 list_for_each_entry(tmp, &delete_list, list) {
8812 if (tmp->found_rec == 0)
8814 key.objectid = tmp->start;
8815 key.type = BTRFS_EXTENT_ITEM_KEY;
8816 key.offset = tmp->nr;
8818 /* Shouldn't happen but just in case */
8819 if (tmp->metadata) {
8820 fprintf(stderr, "Well this shouldn't happen, extent "
8821 "record overlaps but is metadata? "
8822 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8826 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8832 ret = btrfs_del_item(trans, root, &path);
8835 btrfs_release_path(&path);
8838 err = btrfs_commit_transaction(trans, root);
8842 while (!list_empty(&delete_list)) {
8843 tmp = to_extent_record(delete_list.next);
8844 list_del_init(&tmp->list);
8850 while (!list_empty(&rec->dups)) {
8851 tmp = to_extent_record(rec->dups.next);
8852 list_del_init(&tmp->list);
8856 btrfs_release_path(&path);
8858 if (!ret && !nr_del)
8859 rec->num_duplicates = 0;
8861 return ret ? ret : nr_del;
8864 static int find_possible_backrefs(struct btrfs_fs_info *info,
8865 struct btrfs_path *path,
8866 struct cache_tree *extent_cache,
8867 struct extent_record *rec)
8869 struct btrfs_root *root;
8870 struct extent_backref *back;
8871 struct data_backref *dback;
8872 struct cache_extent *cache;
8873 struct btrfs_file_extent_item *fi;
8874 struct btrfs_key key;
8878 list_for_each_entry(back, &rec->backrefs, list) {
8879 /* Don't care about full backrefs (poor unloved backrefs) */
8880 if (back->full_backref || !back->is_data)
8883 dback = to_data_backref(back);
8885 /* We found this one, we don't need to do a lookup */
8886 if (dback->found_ref)
8889 key.objectid = dback->root;
8890 key.type = BTRFS_ROOT_ITEM_KEY;
8891 key.offset = (u64)-1;
8893 root = btrfs_read_fs_root(info, &key);
8895 /* No root, definitely a bad ref, skip */
8896 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8898 /* Other err, exit */
8900 return PTR_ERR(root);
8902 key.objectid = dback->owner;
8903 key.type = BTRFS_EXTENT_DATA_KEY;
8904 key.offset = dback->offset;
8905 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8907 btrfs_release_path(path);
8910 /* Didn't find it, we can carry on */
8915 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8916 struct btrfs_file_extent_item);
8917 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8918 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8919 btrfs_release_path(path);
8920 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8922 struct extent_record *tmp;
8923 tmp = container_of(cache, struct extent_record, cache);
8926 * If we found an extent record for the bytenr for this
8927 * particular backref then we can't add it to our
8928 * current extent record. We only want to add backrefs
8929 * that don't have a corresponding extent item in the
8930 * extent tree since they likely belong to this record
8931 * and we need to fix it if it doesn't match bytenrs.
8937 dback->found_ref += 1;
8938 dback->disk_bytenr = bytenr;
8939 dback->bytes = bytes;
8942 * Set this so the verify backref code knows not to trust the
8943 * values in this backref.
8952 * Record orphan data ref into corresponding root.
8954 * Return 0 if the extent item contains data ref and recorded.
8955 * Return 1 if the extent item contains no useful data ref
8956 * On that case, it may contains only shared_dataref or metadata backref
8957 * or the file extent exists(this should be handled by the extent bytenr
8959 * Return <0 if something goes wrong.
8961 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8962 struct extent_record *rec)
8964 struct btrfs_key key;
8965 struct btrfs_root *dest_root;
8966 struct extent_backref *back;
8967 struct data_backref *dback;
8968 struct orphan_data_extent *orphan;
8969 struct btrfs_path path;
8970 int recorded_data_ref = 0;
8975 btrfs_init_path(&path);
8976 list_for_each_entry(back, &rec->backrefs, list) {
8977 if (back->full_backref || !back->is_data ||
8978 !back->found_extent_tree)
8980 dback = to_data_backref(back);
8981 if (dback->found_ref)
8983 key.objectid = dback->root;
8984 key.type = BTRFS_ROOT_ITEM_KEY;
8985 key.offset = (u64)-1;
8987 dest_root = btrfs_read_fs_root(fs_info, &key);
8989 /* For non-exist root we just skip it */
8990 if (IS_ERR(dest_root) || !dest_root)
8993 key.objectid = dback->owner;
8994 key.type = BTRFS_EXTENT_DATA_KEY;
8995 key.offset = dback->offset;
8997 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8998 btrfs_release_path(&path);
9000 * For ret < 0, it's OK since the fs-tree may be corrupted,
9001 * we need to record it for inode/file extent rebuild.
9002 * For ret > 0, we record it only for file extent rebuild.
9003 * For ret == 0, the file extent exists but only bytenr
9004 * mismatch, let the original bytenr fix routine to handle,
9010 orphan = malloc(sizeof(*orphan));
9015 INIT_LIST_HEAD(&orphan->list);
9016 orphan->root = dback->root;
9017 orphan->objectid = dback->owner;
9018 orphan->offset = dback->offset;
9019 orphan->disk_bytenr = rec->cache.start;
9020 orphan->disk_len = rec->cache.size;
9021 list_add(&dest_root->orphan_data_extents, &orphan->list);
9022 recorded_data_ref = 1;
9025 btrfs_release_path(&path);
9027 return !recorded_data_ref;
9033 * when an incorrect extent item is found, this will delete
9034 * all of the existing entries for it and recreate them
9035 * based on what the tree scan found.
9037 static int fixup_extent_refs(struct btrfs_fs_info *info,
9038 struct cache_tree *extent_cache,
9039 struct extent_record *rec)
9041 struct btrfs_trans_handle *trans = NULL;
9043 struct btrfs_path path;
9044 struct list_head *cur = rec->backrefs.next;
9045 struct cache_extent *cache;
9046 struct extent_backref *back;
9050 if (rec->flag_block_full_backref)
9051 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9053 btrfs_init_path(&path);
9054 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9056 * Sometimes the backrefs themselves are so broken they don't
9057 * get attached to any meaningful rec, so first go back and
9058 * check any of our backrefs that we couldn't find and throw
9059 * them into the list if we find the backref so that
9060 * verify_backrefs can figure out what to do.
9062 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9067 /* step one, make sure all of the backrefs agree */
9068 ret = verify_backrefs(info, &path, rec);
9072 trans = btrfs_start_transaction(info->extent_root, 1);
9073 if (IS_ERR(trans)) {
9074 ret = PTR_ERR(trans);
9078 /* step two, delete all the existing records */
9079 ret = delete_extent_records(trans, info->extent_root, &path,
9085 /* was this block corrupt? If so, don't add references to it */
9086 cache = lookup_cache_extent(info->corrupt_blocks,
9087 rec->start, rec->max_size);
9093 /* step three, recreate all the refs we did find */
9094 while(cur != &rec->backrefs) {
9095 back = to_extent_backref(cur);
9099 * if we didn't find any references, don't create a
9102 if (!back->found_ref)
9105 rec->bad_full_backref = 0;
9106 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9114 int err = btrfs_commit_transaction(trans, info->extent_root);
9120 fprintf(stderr, "Repaired extent references for %llu\n",
9121 (unsigned long long)rec->start);
9123 btrfs_release_path(&path);
9127 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9128 struct extent_record *rec)
9130 struct btrfs_trans_handle *trans;
9131 struct btrfs_root *root = fs_info->extent_root;
9132 struct btrfs_path path;
9133 struct btrfs_extent_item *ei;
9134 struct btrfs_key key;
9138 key.objectid = rec->start;
9139 if (rec->metadata) {
9140 key.type = BTRFS_METADATA_ITEM_KEY;
9141 key.offset = rec->info_level;
9143 key.type = BTRFS_EXTENT_ITEM_KEY;
9144 key.offset = rec->max_size;
9147 trans = btrfs_start_transaction(root, 0);
9149 return PTR_ERR(trans);
9151 btrfs_init_path(&path);
9152 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9154 btrfs_release_path(&path);
9155 btrfs_commit_transaction(trans, root);
9158 fprintf(stderr, "Didn't find extent for %llu\n",
9159 (unsigned long long)rec->start);
9160 btrfs_release_path(&path);
9161 btrfs_commit_transaction(trans, root);
9165 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9166 struct btrfs_extent_item);
9167 flags = btrfs_extent_flags(path.nodes[0], ei);
9168 if (rec->flag_block_full_backref) {
9169 fprintf(stderr, "setting full backref on %llu\n",
9170 (unsigned long long)key.objectid);
9171 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9173 fprintf(stderr, "clearing full backref on %llu\n",
9174 (unsigned long long)key.objectid);
9175 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9177 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9178 btrfs_mark_buffer_dirty(path.nodes[0]);
9179 btrfs_release_path(&path);
9180 ret = btrfs_commit_transaction(trans, root);
9182 fprintf(stderr, "Repaired extent flags for %llu\n",
9183 (unsigned long long)rec->start);
9188 /* right now we only prune from the extent allocation tree */
9189 static int prune_one_block(struct btrfs_trans_handle *trans,
9190 struct btrfs_fs_info *info,
9191 struct btrfs_corrupt_block *corrupt)
9194 struct btrfs_path path;
9195 struct extent_buffer *eb;
9199 int level = corrupt->level + 1;
9201 btrfs_init_path(&path);
9203 /* we want to stop at the parent to our busted block */
9204 path.lowest_level = level;
9206 ret = btrfs_search_slot(trans, info->extent_root,
9207 &corrupt->key, &path, -1, 1);
9212 eb = path.nodes[level];
9219 * hopefully the search gave us the block we want to prune,
9220 * lets try that first
9222 slot = path.slots[level];
9223 found = btrfs_node_blockptr(eb, slot);
9224 if (found == corrupt->cache.start)
9227 nritems = btrfs_header_nritems(eb);
9229 /* the search failed, lets scan this node and hope we find it */
9230 for (slot = 0; slot < nritems; slot++) {
9231 found = btrfs_node_blockptr(eb, slot);
9232 if (found == corrupt->cache.start)
9236 * we couldn't find the bad block. TODO, search all the nodes for pointers
9239 if (eb == info->extent_root->node) {
9244 btrfs_release_path(&path);
9249 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9250 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9253 btrfs_release_path(&path);
9257 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9259 struct btrfs_trans_handle *trans = NULL;
9260 struct cache_extent *cache;
9261 struct btrfs_corrupt_block *corrupt;
9264 cache = search_cache_extent(info->corrupt_blocks, 0);
9268 trans = btrfs_start_transaction(info->extent_root, 1);
9270 return PTR_ERR(trans);
9272 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9273 prune_one_block(trans, info, corrupt);
9274 remove_cache_extent(info->corrupt_blocks, cache);
9277 return btrfs_commit_transaction(trans, info->extent_root);
9281 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9283 struct btrfs_block_group_cache *cache;
9288 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9289 &start, &end, EXTENT_DIRTY);
9292 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9297 cache = btrfs_lookup_first_block_group(fs_info, start);
9302 start = cache->key.objectid + cache->key.offset;
9306 static int check_extent_refs(struct btrfs_root *root,
9307 struct cache_tree *extent_cache)
9309 struct extent_record *rec;
9310 struct cache_extent *cache;
9316 * if we're doing a repair, we have to make sure
9317 * we don't allocate from the problem extents.
9318 * In the worst case, this will be all the
9321 cache = search_cache_extent(extent_cache, 0);
9323 rec = container_of(cache, struct extent_record, cache);
9324 set_extent_dirty(root->fs_info->excluded_extents,
9326 rec->start + rec->max_size - 1);
9327 cache = next_cache_extent(cache);
9330 /* pin down all the corrupted blocks too */
9331 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9333 set_extent_dirty(root->fs_info->excluded_extents,
9335 cache->start + cache->size - 1);
9336 cache = next_cache_extent(cache);
9338 prune_corrupt_blocks(root->fs_info);
9339 reset_cached_block_groups(root->fs_info);
9342 reset_cached_block_groups(root->fs_info);
9345 * We need to delete any duplicate entries we find first otherwise we
9346 * could mess up the extent tree when we have backrefs that actually
9347 * belong to a different extent item and not the weird duplicate one.
9349 while (repair && !list_empty(&duplicate_extents)) {
9350 rec = to_extent_record(duplicate_extents.next);
9351 list_del_init(&rec->list);
9353 /* Sometimes we can find a backref before we find an actual
9354 * extent, so we need to process it a little bit to see if there
9355 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9356 * if this is a backref screwup. If we need to delete stuff
9357 * process_duplicates() will return 0, otherwise it will return
9360 if (process_duplicates(extent_cache, rec))
9362 ret = delete_duplicate_records(root, rec);
9366 * delete_duplicate_records will return the number of entries
9367 * deleted, so if it's greater than 0 then we know we actually
9368 * did something and we need to remove.
9381 cache = search_cache_extent(extent_cache, 0);
9384 rec = container_of(cache, struct extent_record, cache);
9385 if (rec->num_duplicates) {
9386 fprintf(stderr, "extent item %llu has multiple extent "
9387 "items\n", (unsigned long long)rec->start);
9391 if (rec->refs != rec->extent_item_refs) {
9392 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9393 (unsigned long long)rec->start,
9394 (unsigned long long)rec->nr);
9395 fprintf(stderr, "extent item %llu, found %llu\n",
9396 (unsigned long long)rec->extent_item_refs,
9397 (unsigned long long)rec->refs);
9398 ret = record_orphan_data_extents(root->fs_info, rec);
9404 if (all_backpointers_checked(rec, 1)) {
9405 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9406 (unsigned long long)rec->start,
9407 (unsigned long long)rec->nr);
9411 if (!rec->owner_ref_checked) {
9412 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9413 (unsigned long long)rec->start,
9414 (unsigned long long)rec->nr);
9419 if (repair && fix) {
9420 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9426 if (rec->bad_full_backref) {
9427 fprintf(stderr, "bad full backref, on [%llu]\n",
9428 (unsigned long long)rec->start);
9430 ret = fixup_extent_flags(root->fs_info, rec);
9438 * Although it's not a extent ref's problem, we reuse this
9439 * routine for error reporting.
9440 * No repair function yet.
9442 if (rec->crossing_stripes) {
9444 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9445 rec->start, rec->start + rec->max_size);
9449 if (rec->wrong_chunk_type) {
9451 "bad extent [%llu, %llu), type mismatch with chunk\n",
9452 rec->start, rec->start + rec->max_size);
9456 remove_cache_extent(extent_cache, cache);
9457 free_all_extent_backrefs(rec);
9458 if (!init_extent_tree && repair && (!cur_err || fix))
9459 clear_extent_dirty(root->fs_info->excluded_extents,
9461 rec->start + rec->max_size - 1);
9466 if (ret && ret != -EAGAIN) {
9467 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9470 struct btrfs_trans_handle *trans;
9472 root = root->fs_info->extent_root;
9473 trans = btrfs_start_transaction(root, 1);
9474 if (IS_ERR(trans)) {
9475 ret = PTR_ERR(trans);
9479 ret = btrfs_fix_block_accounting(trans, root);
9482 ret = btrfs_commit_transaction(trans, root);
9491 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9495 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9496 stripe_size = length;
9497 stripe_size /= num_stripes;
9498 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9499 stripe_size = length * 2;
9500 stripe_size /= num_stripes;
9501 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9502 stripe_size = length;
9503 stripe_size /= (num_stripes - 1);
9504 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9505 stripe_size = length;
9506 stripe_size /= (num_stripes - 2);
9508 stripe_size = length;
9514 * Check the chunk with its block group/dev list ref:
9515 * Return 0 if all refs seems valid.
9516 * Return 1 if part of refs seems valid, need later check for rebuild ref
9517 * like missing block group and needs to search extent tree to rebuild them.
9518 * Return -1 if essential refs are missing and unable to rebuild.
9520 static int check_chunk_refs(struct chunk_record *chunk_rec,
9521 struct block_group_tree *block_group_cache,
9522 struct device_extent_tree *dev_extent_cache,
9525 struct cache_extent *block_group_item;
9526 struct block_group_record *block_group_rec;
9527 struct cache_extent *dev_extent_item;
9528 struct device_extent_record *dev_extent_rec;
9532 int metadump_v2 = 0;
9536 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9539 if (block_group_item) {
9540 block_group_rec = container_of(block_group_item,
9541 struct block_group_record,
9543 if (chunk_rec->length != block_group_rec->offset ||
9544 chunk_rec->offset != block_group_rec->objectid ||
9546 chunk_rec->type_flags != block_group_rec->flags)) {
9549 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9550 chunk_rec->objectid,
9555 chunk_rec->type_flags,
9556 block_group_rec->objectid,
9557 block_group_rec->type,
9558 block_group_rec->offset,
9559 block_group_rec->offset,
9560 block_group_rec->objectid,
9561 block_group_rec->flags);
9564 list_del_init(&block_group_rec->list);
9565 chunk_rec->bg_rec = block_group_rec;
9570 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9571 chunk_rec->objectid,
9576 chunk_rec->type_flags);
9583 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9584 chunk_rec->num_stripes);
9585 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9586 devid = chunk_rec->stripes[i].devid;
9587 offset = chunk_rec->stripes[i].offset;
9588 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9589 devid, offset, length);
9590 if (dev_extent_item) {
9591 dev_extent_rec = container_of(dev_extent_item,
9592 struct device_extent_record,
9594 if (dev_extent_rec->objectid != devid ||
9595 dev_extent_rec->offset != offset ||
9596 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9597 dev_extent_rec->length != length) {
9600 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9601 chunk_rec->objectid,
9604 chunk_rec->stripes[i].devid,
9605 chunk_rec->stripes[i].offset,
9606 dev_extent_rec->objectid,
9607 dev_extent_rec->offset,
9608 dev_extent_rec->length);
9611 list_move(&dev_extent_rec->chunk_list,
9612 &chunk_rec->dextents);
9617 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9618 chunk_rec->objectid,
9621 chunk_rec->stripes[i].devid,
9622 chunk_rec->stripes[i].offset);
9629 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9630 int check_chunks(struct cache_tree *chunk_cache,
9631 struct block_group_tree *block_group_cache,
9632 struct device_extent_tree *dev_extent_cache,
9633 struct list_head *good, struct list_head *bad,
9634 struct list_head *rebuild, int silent)
9636 struct cache_extent *chunk_item;
9637 struct chunk_record *chunk_rec;
9638 struct block_group_record *bg_rec;
9639 struct device_extent_record *dext_rec;
9643 chunk_item = first_cache_extent(chunk_cache);
9644 while (chunk_item) {
9645 chunk_rec = container_of(chunk_item, struct chunk_record,
9647 err = check_chunk_refs(chunk_rec, block_group_cache,
9648 dev_extent_cache, silent);
9651 if (err == 0 && good)
9652 list_add_tail(&chunk_rec->list, good);
9653 if (err > 0 && rebuild)
9654 list_add_tail(&chunk_rec->list, rebuild);
9656 list_add_tail(&chunk_rec->list, bad);
9657 chunk_item = next_cache_extent(chunk_item);
9660 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9663 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9671 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9675 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9686 static int check_device_used(struct device_record *dev_rec,
9687 struct device_extent_tree *dext_cache)
9689 struct cache_extent *cache;
9690 struct device_extent_record *dev_extent_rec;
9693 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9695 dev_extent_rec = container_of(cache,
9696 struct device_extent_record,
9698 if (dev_extent_rec->objectid != dev_rec->devid)
9701 list_del_init(&dev_extent_rec->device_list);
9702 total_byte += dev_extent_rec->length;
9703 cache = next_cache_extent(cache);
9706 if (total_byte != dev_rec->byte_used) {
9708 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9709 total_byte, dev_rec->byte_used, dev_rec->objectid,
9710 dev_rec->type, dev_rec->offset);
9717 /* check btrfs_dev_item -> btrfs_dev_extent */
9718 static int check_devices(struct rb_root *dev_cache,
9719 struct device_extent_tree *dev_extent_cache)
9721 struct rb_node *dev_node;
9722 struct device_record *dev_rec;
9723 struct device_extent_record *dext_rec;
9727 dev_node = rb_first(dev_cache);
9729 dev_rec = container_of(dev_node, struct device_record, node);
9730 err = check_device_used(dev_rec, dev_extent_cache);
9734 dev_node = rb_next(dev_node);
9736 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9739 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9740 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9747 static int add_root_item_to_list(struct list_head *head,
9748 u64 objectid, u64 bytenr, u64 last_snapshot,
9749 u8 level, u8 drop_level,
9750 struct btrfs_key *drop_key)
9753 struct root_item_record *ri_rec;
9754 ri_rec = malloc(sizeof(*ri_rec));
9757 ri_rec->bytenr = bytenr;
9758 ri_rec->objectid = objectid;
9759 ri_rec->level = level;
9760 ri_rec->drop_level = drop_level;
9761 ri_rec->last_snapshot = last_snapshot;
9763 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9764 list_add_tail(&ri_rec->list, head);
9769 static void free_root_item_list(struct list_head *list)
9771 struct root_item_record *ri_rec;
9773 while (!list_empty(list)) {
9774 ri_rec = list_first_entry(list, struct root_item_record,
9776 list_del_init(&ri_rec->list);
9781 static int deal_root_from_list(struct list_head *list,
9782 struct btrfs_root *root,
9783 struct block_info *bits,
9785 struct cache_tree *pending,
9786 struct cache_tree *seen,
9787 struct cache_tree *reada,
9788 struct cache_tree *nodes,
9789 struct cache_tree *extent_cache,
9790 struct cache_tree *chunk_cache,
9791 struct rb_root *dev_cache,
9792 struct block_group_tree *block_group_cache,
9793 struct device_extent_tree *dev_extent_cache)
9798 while (!list_empty(list)) {
9799 struct root_item_record *rec;
9800 struct extent_buffer *buf;
9801 rec = list_entry(list->next,
9802 struct root_item_record, list);
9804 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9805 if (!extent_buffer_uptodate(buf)) {
9806 free_extent_buffer(buf);
9810 ret = add_root_to_pending(buf, extent_cache, pending,
9811 seen, nodes, rec->objectid);
9815 * To rebuild extent tree, we need deal with snapshot
9816 * one by one, otherwise we deal with node firstly which
9817 * can maximize readahead.
9820 ret = run_next_block(root, bits, bits_nr, &last,
9821 pending, seen, reada, nodes,
9822 extent_cache, chunk_cache,
9823 dev_cache, block_group_cache,
9824 dev_extent_cache, rec);
9828 free_extent_buffer(buf);
9829 list_del(&rec->list);
9835 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9836 reada, nodes, extent_cache, chunk_cache,
9837 dev_cache, block_group_cache,
9838 dev_extent_cache, NULL);
9848 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
9850 struct rb_root dev_cache;
9851 struct cache_tree chunk_cache;
9852 struct block_group_tree block_group_cache;
9853 struct device_extent_tree dev_extent_cache;
9854 struct cache_tree extent_cache;
9855 struct cache_tree seen;
9856 struct cache_tree pending;
9857 struct cache_tree reada;
9858 struct cache_tree nodes;
9859 struct extent_io_tree excluded_extents;
9860 struct cache_tree corrupt_blocks;
9861 struct btrfs_path path;
9862 struct btrfs_key key;
9863 struct btrfs_key found_key;
9865 struct block_info *bits;
9867 struct extent_buffer *leaf;
9869 struct btrfs_root_item ri;
9870 struct list_head dropping_trees;
9871 struct list_head normal_trees;
9872 struct btrfs_root *root1;
9873 struct btrfs_root *root;
9877 root = fs_info->fs_root;
9878 dev_cache = RB_ROOT;
9879 cache_tree_init(&chunk_cache);
9880 block_group_tree_init(&block_group_cache);
9881 device_extent_tree_init(&dev_extent_cache);
9883 cache_tree_init(&extent_cache);
9884 cache_tree_init(&seen);
9885 cache_tree_init(&pending);
9886 cache_tree_init(&nodes);
9887 cache_tree_init(&reada);
9888 cache_tree_init(&corrupt_blocks);
9889 extent_io_tree_init(&excluded_extents);
9890 INIT_LIST_HEAD(&dropping_trees);
9891 INIT_LIST_HEAD(&normal_trees);
9894 fs_info->excluded_extents = &excluded_extents;
9895 fs_info->fsck_extent_cache = &extent_cache;
9896 fs_info->free_extent_hook = free_extent_hook;
9897 fs_info->corrupt_blocks = &corrupt_blocks;
9901 bits = malloc(bits_nr * sizeof(struct block_info));
9907 if (ctx.progress_enabled) {
9908 ctx.tp = TASK_EXTENTS;
9909 task_start(ctx.info);
9913 root1 = fs_info->tree_root;
9914 level = btrfs_header_level(root1->node);
9915 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9916 root1->node->start, 0, level, 0, NULL);
9919 root1 = fs_info->chunk_root;
9920 level = btrfs_header_level(root1->node);
9921 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9922 root1->node->start, 0, level, 0, NULL);
9925 btrfs_init_path(&path);
9928 key.type = BTRFS_ROOT_ITEM_KEY;
9929 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
9933 leaf = path.nodes[0];
9934 slot = path.slots[0];
9935 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9936 ret = btrfs_next_leaf(root, &path);
9939 leaf = path.nodes[0];
9940 slot = path.slots[0];
9942 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9943 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9944 unsigned long offset;
9947 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9948 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9949 last_snapshot = btrfs_root_last_snapshot(&ri);
9950 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9951 level = btrfs_root_level(&ri);
9952 ret = add_root_item_to_list(&normal_trees,
9954 btrfs_root_bytenr(&ri),
9955 last_snapshot, level,
9960 level = btrfs_root_level(&ri);
9961 objectid = found_key.objectid;
9962 btrfs_disk_key_to_cpu(&found_key,
9964 ret = add_root_item_to_list(&dropping_trees,
9966 btrfs_root_bytenr(&ri),
9967 last_snapshot, level,
9968 ri.drop_level, &found_key);
9975 btrfs_release_path(&path);
9978 * check_block can return -EAGAIN if it fixes something, please keep
9979 * this in mind when dealing with return values from these functions, if
9980 * we get -EAGAIN we want to fall through and restart the loop.
9982 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9983 &seen, &reada, &nodes, &extent_cache,
9984 &chunk_cache, &dev_cache, &block_group_cache,
9991 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9992 &pending, &seen, &reada, &nodes,
9993 &extent_cache, &chunk_cache, &dev_cache,
9994 &block_group_cache, &dev_extent_cache);
10001 ret = check_chunks(&chunk_cache, &block_group_cache,
10002 &dev_extent_cache, NULL, NULL, NULL, 0);
10004 if (ret == -EAGAIN)
10009 ret = check_extent_refs(root, &extent_cache);
10011 if (ret == -EAGAIN)
10016 ret = check_devices(&dev_cache, &dev_extent_cache);
10021 task_stop(ctx.info);
10023 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10024 extent_io_tree_cleanup(&excluded_extents);
10025 fs_info->fsck_extent_cache = NULL;
10026 fs_info->free_extent_hook = NULL;
10027 fs_info->corrupt_blocks = NULL;
10028 fs_info->excluded_extents = NULL;
10031 free_chunk_cache_tree(&chunk_cache);
10032 free_device_cache_tree(&dev_cache);
10033 free_block_group_tree(&block_group_cache);
10034 free_device_extent_tree(&dev_extent_cache);
10035 free_extent_cache_tree(&seen);
10036 free_extent_cache_tree(&pending);
10037 free_extent_cache_tree(&reada);
10038 free_extent_cache_tree(&nodes);
10039 free_root_item_list(&normal_trees);
10040 free_root_item_list(&dropping_trees);
10043 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10044 free_extent_cache_tree(&seen);
10045 free_extent_cache_tree(&pending);
10046 free_extent_cache_tree(&reada);
10047 free_extent_cache_tree(&nodes);
10048 free_chunk_cache_tree(&chunk_cache);
10049 free_block_group_tree(&block_group_cache);
10050 free_device_cache_tree(&dev_cache);
10051 free_device_extent_tree(&dev_extent_cache);
10052 free_extent_record_cache(&extent_cache);
10053 free_root_item_list(&normal_trees);
10054 free_root_item_list(&dropping_trees);
10055 extent_io_tree_cleanup(&excluded_extents);
10060 * Check backrefs of a tree block given by @bytenr or @eb.
10062 * @root: the root containing the @bytenr or @eb
10063 * @eb: tree block extent buffer, can be NULL
10064 * @bytenr: bytenr of the tree block to search
10065 * @level: tree level of the tree block
10066 * @owner: owner of the tree block
10068 * Return >0 for any error found and output error message
10069 * Return 0 for no error found
10071 static int check_tree_block_ref(struct btrfs_root *root,
10072 struct extent_buffer *eb, u64 bytenr,
10073 int level, u64 owner)
10075 struct btrfs_key key;
10076 struct btrfs_root *extent_root = root->fs_info->extent_root;
10077 struct btrfs_path path;
10078 struct btrfs_extent_item *ei;
10079 struct btrfs_extent_inline_ref *iref;
10080 struct extent_buffer *leaf;
10086 u32 nodesize = root->fs_info->nodesize;
10089 int tree_reloc_root = 0;
10094 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10095 btrfs_header_bytenr(root->node) == bytenr)
10096 tree_reloc_root = 1;
10098 btrfs_init_path(&path);
10099 key.objectid = bytenr;
10100 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10101 key.type = BTRFS_METADATA_ITEM_KEY;
10103 key.type = BTRFS_EXTENT_ITEM_KEY;
10104 key.offset = (u64)-1;
10106 /* Search for the backref in extent tree */
10107 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10109 err |= BACKREF_MISSING;
10112 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10114 err |= BACKREF_MISSING;
10118 leaf = path.nodes[0];
10119 slot = path.slots[0];
10120 btrfs_item_key_to_cpu(leaf, &key, slot);
10122 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10124 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10125 skinny_level = (int)key.offset;
10126 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10128 struct btrfs_tree_block_info *info;
10130 info = (struct btrfs_tree_block_info *)(ei + 1);
10131 skinny_level = btrfs_tree_block_level(leaf, info);
10132 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10139 if (!(btrfs_extent_flags(leaf, ei) &
10140 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10142 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10143 key.objectid, nodesize,
10144 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10145 err = BACKREF_MISMATCH;
10147 header_gen = btrfs_header_generation(eb);
10148 extent_gen = btrfs_extent_generation(leaf, ei);
10149 if (header_gen != extent_gen) {
10151 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10152 key.objectid, nodesize, header_gen,
10154 err = BACKREF_MISMATCH;
10156 if (level != skinny_level) {
10158 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10159 key.objectid, nodesize, level, skinny_level);
10160 err = BACKREF_MISMATCH;
10162 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10164 "extent[%llu %u] is referred by other roots than %llu",
10165 key.objectid, nodesize, root->objectid);
10166 err = BACKREF_MISMATCH;
10171 * Iterate the extent/metadata item to find the exact backref
10173 item_size = btrfs_item_size_nr(leaf, slot);
10174 ptr = (unsigned long)iref;
10175 end = (unsigned long)ei + item_size;
10176 while (ptr < end) {
10177 iref = (struct btrfs_extent_inline_ref *)ptr;
10178 type = btrfs_extent_inline_ref_type(leaf, iref);
10179 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10181 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10182 (offset == root->objectid || offset == owner)) {
10184 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10186 * Backref of tree reloc root points to itself, no need
10187 * to check backref any more.
10189 if (tree_reloc_root)
10192 /* Check if the backref points to valid referencer */
10193 found_ref = !check_tree_block_ref(root, NULL,
10194 offset, level + 1, owner);
10199 ptr += btrfs_extent_inline_ref_size(type);
10203 * Inlined extent item doesn't have what we need, check
10204 * TREE_BLOCK_REF_KEY
10207 btrfs_release_path(&path);
10208 key.objectid = bytenr;
10209 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10210 key.offset = root->objectid;
10212 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10217 err |= BACKREF_MISSING;
10219 btrfs_release_path(&path);
10220 if (eb && (err & BACKREF_MISSING))
10221 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10222 bytenr, nodesize, owner, level);
10227 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10229 * Return >0 any error found and output error message
10230 * Return 0 for no error found
10232 static int check_extent_data_item(struct btrfs_root *root,
10233 struct extent_buffer *eb, int slot)
10235 struct btrfs_file_extent_item *fi;
10236 struct btrfs_path path;
10237 struct btrfs_root *extent_root = root->fs_info->extent_root;
10238 struct btrfs_key fi_key;
10239 struct btrfs_key dbref_key;
10240 struct extent_buffer *leaf;
10241 struct btrfs_extent_item *ei;
10242 struct btrfs_extent_inline_ref *iref;
10243 struct btrfs_extent_data_ref *dref;
10246 u64 disk_num_bytes;
10247 u64 extent_num_bytes;
10254 int found_dbackref = 0;
10258 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10259 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10261 /* Nothing to check for hole and inline data extents */
10262 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10263 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10266 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10267 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10268 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10270 /* Check unaligned disk_num_bytes and num_bytes */
10271 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10273 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10274 fi_key.objectid, fi_key.offset, disk_num_bytes,
10275 root->fs_info->sectorsize);
10276 err |= BYTES_UNALIGNED;
10278 data_bytes_allocated += disk_num_bytes;
10280 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10282 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10283 fi_key.objectid, fi_key.offset, extent_num_bytes,
10284 root->fs_info->sectorsize);
10285 err |= BYTES_UNALIGNED;
10287 data_bytes_referenced += extent_num_bytes;
10289 owner = btrfs_header_owner(eb);
10291 /* Check the extent item of the file extent in extent tree */
10292 btrfs_init_path(&path);
10293 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10294 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10295 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10297 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10301 leaf = path.nodes[0];
10302 slot = path.slots[0];
10303 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10305 extent_flags = btrfs_extent_flags(leaf, ei);
10307 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10309 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10310 disk_bytenr, disk_num_bytes,
10311 BTRFS_EXTENT_FLAG_DATA);
10312 err |= BACKREF_MISMATCH;
10315 /* Check data backref inside that extent item */
10316 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10317 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10318 ptr = (unsigned long)iref;
10319 end = (unsigned long)ei + item_size;
10320 while (ptr < end) {
10321 iref = (struct btrfs_extent_inline_ref *)ptr;
10322 type = btrfs_extent_inline_ref_type(leaf, iref);
10323 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10325 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10326 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10327 if (ref_root == owner || ref_root == root->objectid)
10328 found_dbackref = 1;
10329 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10330 found_dbackref = !check_tree_block_ref(root, NULL,
10331 btrfs_extent_inline_ref_offset(leaf, iref),
10335 if (found_dbackref)
10337 ptr += btrfs_extent_inline_ref_size(type);
10340 if (!found_dbackref) {
10341 btrfs_release_path(&path);
10343 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10344 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10345 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10346 dbref_key.offset = hash_extent_data_ref(root->objectid,
10347 fi_key.objectid, fi_key.offset);
10349 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10350 &dbref_key, &path, 0, 0);
10352 found_dbackref = 1;
10356 btrfs_release_path(&path);
10359 * Neither inlined nor EXTENT_DATA_REF found, try
10360 * SHARED_DATA_REF as last chance.
10362 dbref_key.objectid = disk_bytenr;
10363 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10364 dbref_key.offset = eb->start;
10366 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10367 &dbref_key, &path, 0, 0);
10369 found_dbackref = 1;
10375 if (!found_dbackref)
10376 err |= BACKREF_MISSING;
10377 btrfs_release_path(&path);
10378 if (err & BACKREF_MISSING) {
10379 error("data extent[%llu %llu] backref lost",
10380 disk_bytenr, disk_num_bytes);
10386 * Get real tree block level for the case like shared block
10387 * Return >= 0 as tree level
10388 * Return <0 for error
10390 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10392 struct extent_buffer *eb;
10393 struct btrfs_path path;
10394 struct btrfs_key key;
10395 struct btrfs_extent_item *ei;
10402 /* Search extent tree for extent generation and level */
10403 key.objectid = bytenr;
10404 key.type = BTRFS_METADATA_ITEM_KEY;
10405 key.offset = (u64)-1;
10407 btrfs_init_path(&path);
10408 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10411 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10419 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10420 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10421 struct btrfs_extent_item);
10422 flags = btrfs_extent_flags(path.nodes[0], ei);
10423 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10428 /* Get transid for later read_tree_block() check */
10429 transid = btrfs_extent_generation(path.nodes[0], ei);
10431 /* Get backref level as one source */
10432 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10433 backref_level = key.offset;
10435 struct btrfs_tree_block_info *info;
10437 info = (struct btrfs_tree_block_info *)(ei + 1);
10438 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10440 btrfs_release_path(&path);
10442 /* Get level from tree block as an alternative source */
10443 eb = read_tree_block(fs_info, bytenr, transid);
10444 if (!extent_buffer_uptodate(eb)) {
10445 free_extent_buffer(eb);
10448 header_level = btrfs_header_level(eb);
10449 free_extent_buffer(eb);
10451 if (header_level != backref_level)
10453 return header_level;
10456 btrfs_release_path(&path);
10461 * Check if a tree block backref is valid (points to a valid tree block)
10462 * if level == -1, level will be resolved
10463 * Return >0 for any error found and print error message
10465 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10466 u64 bytenr, int level)
10468 struct btrfs_root *root;
10469 struct btrfs_key key;
10470 struct btrfs_path path;
10471 struct extent_buffer *eb;
10472 struct extent_buffer *node;
10473 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10477 /* Query level for level == -1 special case */
10479 level = query_tree_block_level(fs_info, bytenr);
10481 err |= REFERENCER_MISSING;
10485 key.objectid = root_id;
10486 key.type = BTRFS_ROOT_ITEM_KEY;
10487 key.offset = (u64)-1;
10489 root = btrfs_read_fs_root(fs_info, &key);
10490 if (IS_ERR(root)) {
10491 err |= REFERENCER_MISSING;
10495 /* Read out the tree block to get item/node key */
10496 eb = read_tree_block(fs_info, bytenr, 0);
10497 if (!extent_buffer_uptodate(eb)) {
10498 err |= REFERENCER_MISSING;
10499 free_extent_buffer(eb);
10503 /* Empty tree, no need to check key */
10504 if (!btrfs_header_nritems(eb) && !level) {
10505 free_extent_buffer(eb);
10510 btrfs_node_key_to_cpu(eb, &key, 0);
10512 btrfs_item_key_to_cpu(eb, &key, 0);
10514 free_extent_buffer(eb);
10516 btrfs_init_path(&path);
10517 path.lowest_level = level;
10518 /* Search with the first key, to ensure we can reach it */
10519 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10521 err |= REFERENCER_MISSING;
10525 node = path.nodes[level];
10526 if (btrfs_header_bytenr(node) != bytenr) {
10528 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10529 bytenr, nodesize, bytenr,
10530 btrfs_header_bytenr(node));
10531 err |= REFERENCER_MISMATCH;
10533 if (btrfs_header_level(node) != level) {
10535 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10536 bytenr, nodesize, level,
10537 btrfs_header_level(node));
10538 err |= REFERENCER_MISMATCH;
10542 btrfs_release_path(&path);
10544 if (err & REFERENCER_MISSING) {
10546 error("extent [%llu %d] lost referencer (owner: %llu)",
10547 bytenr, nodesize, root_id);
10550 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10551 bytenr, nodesize, root_id, level);
10558 * Check if tree block @eb is tree reloc root.
10559 * Return 0 if it's not or any problem happens
10560 * Return 1 if it's a tree reloc root
10562 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10563 struct extent_buffer *eb)
10565 struct btrfs_root *tree_reloc_root;
10566 struct btrfs_key key;
10567 u64 bytenr = btrfs_header_bytenr(eb);
10568 u64 owner = btrfs_header_owner(eb);
10571 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10572 key.offset = owner;
10573 key.type = BTRFS_ROOT_ITEM_KEY;
10575 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10576 if (IS_ERR(tree_reloc_root))
10579 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10581 btrfs_free_fs_root(tree_reloc_root);
10586 * Check referencer for shared block backref
10587 * If level == -1, this function will resolve the level.
10589 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10590 u64 parent, u64 bytenr, int level)
10592 struct extent_buffer *eb;
10594 int found_parent = 0;
10597 eb = read_tree_block(fs_info, parent, 0);
10598 if (!extent_buffer_uptodate(eb))
10602 level = query_tree_block_level(fs_info, bytenr);
10606 /* It's possible it's a tree reloc root */
10607 if (parent == bytenr) {
10608 if (is_tree_reloc_root(fs_info, eb))
10613 if (level + 1 != btrfs_header_level(eb))
10616 nr = btrfs_header_nritems(eb);
10617 for (i = 0; i < nr; i++) {
10618 if (bytenr == btrfs_node_blockptr(eb, i)) {
10624 free_extent_buffer(eb);
10625 if (!found_parent) {
10627 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10628 bytenr, fs_info->nodesize, parent, level);
10629 return REFERENCER_MISSING;
10635 * Check referencer for normal (inlined) data ref
10636 * If len == 0, it will be resolved by searching in extent tree
10638 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10639 u64 root_id, u64 objectid, u64 offset,
10640 u64 bytenr, u64 len, u32 count)
10642 struct btrfs_root *root;
10643 struct btrfs_root *extent_root = fs_info->extent_root;
10644 struct btrfs_key key;
10645 struct btrfs_path path;
10646 struct extent_buffer *leaf;
10647 struct btrfs_file_extent_item *fi;
10648 u32 found_count = 0;
10653 key.objectid = bytenr;
10654 key.type = BTRFS_EXTENT_ITEM_KEY;
10655 key.offset = (u64)-1;
10657 btrfs_init_path(&path);
10658 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10661 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10664 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10665 if (key.objectid != bytenr ||
10666 key.type != BTRFS_EXTENT_ITEM_KEY)
10669 btrfs_release_path(&path);
10671 key.objectid = root_id;
10672 key.type = BTRFS_ROOT_ITEM_KEY;
10673 key.offset = (u64)-1;
10674 btrfs_init_path(&path);
10676 root = btrfs_read_fs_root(fs_info, &key);
10680 key.objectid = objectid;
10681 key.type = BTRFS_EXTENT_DATA_KEY;
10683 * It can be nasty as data backref offset is
10684 * file offset - file extent offset, which is smaller or
10685 * equal to original backref offset. The only special case is
10686 * overflow. So we need to special check and do further search.
10688 key.offset = offset & (1ULL << 63) ? 0 : offset;
10690 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10695 * Search afterwards to get correct one
10696 * NOTE: As we must do a comprehensive check on the data backref to
10697 * make sure the dref count also matches, we must iterate all file
10698 * extents for that inode.
10701 leaf = path.nodes[0];
10702 slot = path.slots[0];
10704 if (slot >= btrfs_header_nritems(leaf))
10706 btrfs_item_key_to_cpu(leaf, &key, slot);
10707 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10709 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10711 * Except normal disk bytenr and disk num bytes, we still
10712 * need to do extra check on dbackref offset as
10713 * dbackref offset = file_offset - file_extent_offset
10715 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10716 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10717 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10722 ret = btrfs_next_item(root, &path);
10727 btrfs_release_path(&path);
10728 if (found_count != count) {
10730 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10731 bytenr, len, root_id, objectid, offset, count, found_count);
10732 return REFERENCER_MISSING;
10738 * Check if the referencer of a shared data backref exists
10740 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10741 u64 parent, u64 bytenr)
10743 struct extent_buffer *eb;
10744 struct btrfs_key key;
10745 struct btrfs_file_extent_item *fi;
10747 int found_parent = 0;
10750 eb = read_tree_block(fs_info, parent, 0);
10751 if (!extent_buffer_uptodate(eb))
10754 nr = btrfs_header_nritems(eb);
10755 for (i = 0; i < nr; i++) {
10756 btrfs_item_key_to_cpu(eb, &key, i);
10757 if (key.type != BTRFS_EXTENT_DATA_KEY)
10760 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10761 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10764 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10771 free_extent_buffer(eb);
10772 if (!found_parent) {
10773 error("shared extent %llu referencer lost (parent: %llu)",
10775 return REFERENCER_MISSING;
10781 * This function will check a given extent item, including its backref and
10782 * itself (like crossing stripe boundary and type)
10784 * Since we don't use extent_record anymore, introduce new error bit
10786 static int check_extent_item(struct btrfs_fs_info *fs_info,
10787 struct extent_buffer *eb, int slot)
10789 struct btrfs_extent_item *ei;
10790 struct btrfs_extent_inline_ref *iref;
10791 struct btrfs_extent_data_ref *dref;
10795 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10796 u32 item_size = btrfs_item_size_nr(eb, slot);
10801 struct btrfs_key key;
10805 btrfs_item_key_to_cpu(eb, &key, slot);
10806 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10807 bytes_used += key.offset;
10809 bytes_used += nodesize;
10811 if (item_size < sizeof(*ei)) {
10813 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10814 * old thing when on disk format is still un-determined.
10815 * No need to care about it anymore
10817 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10821 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10822 flags = btrfs_extent_flags(eb, ei);
10824 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10826 if (metadata && check_crossing_stripes(global_info, key.objectid,
10828 error("bad metadata [%llu, %llu) crossing stripe boundary",
10829 key.objectid, key.objectid + nodesize);
10830 err |= CROSSING_STRIPE_BOUNDARY;
10833 ptr = (unsigned long)(ei + 1);
10835 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10836 /* Old EXTENT_ITEM metadata */
10837 struct btrfs_tree_block_info *info;
10839 info = (struct btrfs_tree_block_info *)ptr;
10840 level = btrfs_tree_block_level(eb, info);
10841 ptr += sizeof(struct btrfs_tree_block_info);
10843 /* New METADATA_ITEM */
10844 level = key.offset;
10846 end = (unsigned long)ei + item_size;
10849 /* Reached extent item end normally */
10853 /* Beyond extent item end, wrong item size */
10855 err |= ITEM_SIZE_MISMATCH;
10856 error("extent item at bytenr %llu slot %d has wrong size",
10861 /* Now check every backref in this extent item */
10862 iref = (struct btrfs_extent_inline_ref *)ptr;
10863 type = btrfs_extent_inline_ref_type(eb, iref);
10864 offset = btrfs_extent_inline_ref_offset(eb, iref);
10866 case BTRFS_TREE_BLOCK_REF_KEY:
10867 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10871 case BTRFS_SHARED_BLOCK_REF_KEY:
10872 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10876 case BTRFS_EXTENT_DATA_REF_KEY:
10877 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10878 ret = check_extent_data_backref(fs_info,
10879 btrfs_extent_data_ref_root(eb, dref),
10880 btrfs_extent_data_ref_objectid(eb, dref),
10881 btrfs_extent_data_ref_offset(eb, dref),
10882 key.objectid, key.offset,
10883 btrfs_extent_data_ref_count(eb, dref));
10886 case BTRFS_SHARED_DATA_REF_KEY:
10887 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10891 error("extent[%llu %d %llu] has unknown ref type: %d",
10892 key.objectid, key.type, key.offset, type);
10893 err |= UNKNOWN_TYPE;
10897 ptr += btrfs_extent_inline_ref_size(type);
10905 * Check if a dev extent item is referred correctly by its chunk
10907 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10908 struct extent_buffer *eb, int slot)
10910 struct btrfs_root *chunk_root = fs_info->chunk_root;
10911 struct btrfs_dev_extent *ptr;
10912 struct btrfs_path path;
10913 struct btrfs_key chunk_key;
10914 struct btrfs_key devext_key;
10915 struct btrfs_chunk *chunk;
10916 struct extent_buffer *l;
10920 int found_chunk = 0;
10923 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10924 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10925 length = btrfs_dev_extent_length(eb, ptr);
10927 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10928 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10929 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10931 btrfs_init_path(&path);
10932 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10937 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10938 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10943 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10946 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10947 for (i = 0; i < num_stripes; i++) {
10948 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10949 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10951 if (devid == devext_key.objectid &&
10952 offset == devext_key.offset) {
10958 btrfs_release_path(&path);
10959 if (!found_chunk) {
10961 "device extent[%llu, %llu, %llu] did not find the related chunk",
10962 devext_key.objectid, devext_key.offset, length);
10963 return REFERENCER_MISSING;
10969 * Check if the used space is correct with the dev item
10971 static int check_dev_item(struct btrfs_fs_info *fs_info,
10972 struct extent_buffer *eb, int slot)
10974 struct btrfs_root *dev_root = fs_info->dev_root;
10975 struct btrfs_dev_item *dev_item;
10976 struct btrfs_path path;
10977 struct btrfs_key key;
10978 struct btrfs_dev_extent *ptr;
10984 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10985 dev_id = btrfs_device_id(eb, dev_item);
10986 used = btrfs_device_bytes_used(eb, dev_item);
10988 key.objectid = dev_id;
10989 key.type = BTRFS_DEV_EXTENT_KEY;
10992 btrfs_init_path(&path);
10993 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10995 btrfs_item_key_to_cpu(eb, &key, slot);
10996 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10997 key.objectid, key.type, key.offset);
10998 btrfs_release_path(&path);
10999 return REFERENCER_MISSING;
11002 /* Iterate dev_extents to calculate the used space of a device */
11004 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11007 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11008 if (key.objectid > dev_id)
11010 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11013 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11014 struct btrfs_dev_extent);
11015 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11017 ret = btrfs_next_item(dev_root, &path);
11021 btrfs_release_path(&path);
11023 if (used != total) {
11024 btrfs_item_key_to_cpu(eb, &key, slot);
11026 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11027 total, used, BTRFS_ROOT_TREE_OBJECTID,
11028 BTRFS_DEV_EXTENT_KEY, dev_id);
11029 return ACCOUNTING_MISMATCH;
11035 * Check a block group item with its referener (chunk) and its used space
11036 * with extent/metadata item
11038 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11039 struct extent_buffer *eb, int slot)
11041 struct btrfs_root *extent_root = fs_info->extent_root;
11042 struct btrfs_root *chunk_root = fs_info->chunk_root;
11043 struct btrfs_block_group_item *bi;
11044 struct btrfs_block_group_item bg_item;
11045 struct btrfs_path path;
11046 struct btrfs_key bg_key;
11047 struct btrfs_key chunk_key;
11048 struct btrfs_key extent_key;
11049 struct btrfs_chunk *chunk;
11050 struct extent_buffer *leaf;
11051 struct btrfs_extent_item *ei;
11052 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11060 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11061 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11062 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11063 used = btrfs_block_group_used(&bg_item);
11064 bg_flags = btrfs_block_group_flags(&bg_item);
11066 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11067 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11068 chunk_key.offset = bg_key.objectid;
11070 btrfs_init_path(&path);
11071 /* Search for the referencer chunk */
11072 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11075 "block group[%llu %llu] did not find the related chunk item",
11076 bg_key.objectid, bg_key.offset);
11077 err |= REFERENCER_MISSING;
11079 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11080 struct btrfs_chunk);
11081 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11084 "block group[%llu %llu] related chunk item length does not match",
11085 bg_key.objectid, bg_key.offset);
11086 err |= REFERENCER_MISMATCH;
11089 btrfs_release_path(&path);
11091 /* Search from the block group bytenr */
11092 extent_key.objectid = bg_key.objectid;
11093 extent_key.type = 0;
11094 extent_key.offset = 0;
11096 btrfs_init_path(&path);
11097 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11101 /* Iterate extent tree to account used space */
11103 leaf = path.nodes[0];
11105 /* Search slot can point to the last item beyond leaf nritems */
11106 if (path.slots[0] >= btrfs_header_nritems(leaf))
11109 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11110 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11113 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11114 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11116 if (extent_key.objectid < bg_key.objectid)
11119 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11122 total += extent_key.offset;
11124 ei = btrfs_item_ptr(leaf, path.slots[0],
11125 struct btrfs_extent_item);
11126 flags = btrfs_extent_flags(leaf, ei);
11127 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11128 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11130 "bad extent[%llu, %llu) type mismatch with chunk",
11131 extent_key.objectid,
11132 extent_key.objectid + extent_key.offset);
11133 err |= CHUNK_TYPE_MISMATCH;
11135 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11136 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11137 BTRFS_BLOCK_GROUP_METADATA))) {
11139 "bad extent[%llu, %llu) type mismatch with chunk",
11140 extent_key.objectid,
11141 extent_key.objectid + nodesize);
11142 err |= CHUNK_TYPE_MISMATCH;
11146 ret = btrfs_next_item(extent_root, &path);
11152 btrfs_release_path(&path);
11154 if (total != used) {
11156 "block group[%llu %llu] used %llu but extent items used %llu",
11157 bg_key.objectid, bg_key.offset, used, total);
11158 err |= ACCOUNTING_MISMATCH;
11164 * Check a chunk item.
11165 * Including checking all referred dev_extents and block group
11167 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11168 struct extent_buffer *eb, int slot)
11170 struct btrfs_root *extent_root = fs_info->extent_root;
11171 struct btrfs_root *dev_root = fs_info->dev_root;
11172 struct btrfs_path path;
11173 struct btrfs_key chunk_key;
11174 struct btrfs_key bg_key;
11175 struct btrfs_key devext_key;
11176 struct btrfs_chunk *chunk;
11177 struct extent_buffer *leaf;
11178 struct btrfs_block_group_item *bi;
11179 struct btrfs_block_group_item bg_item;
11180 struct btrfs_dev_extent *ptr;
11192 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11193 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11194 length = btrfs_chunk_length(eb, chunk);
11195 chunk_end = chunk_key.offset + length;
11196 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11199 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11201 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11204 type = btrfs_chunk_type(eb, chunk);
11206 bg_key.objectid = chunk_key.offset;
11207 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11208 bg_key.offset = length;
11210 btrfs_init_path(&path);
11211 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11214 "chunk[%llu %llu) did not find the related block group item",
11215 chunk_key.offset, chunk_end);
11216 err |= REFERENCER_MISSING;
11218 leaf = path.nodes[0];
11219 bi = btrfs_item_ptr(leaf, path.slots[0],
11220 struct btrfs_block_group_item);
11221 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11223 if (btrfs_block_group_flags(&bg_item) != type) {
11225 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11226 chunk_key.offset, chunk_end, type,
11227 btrfs_block_group_flags(&bg_item));
11228 err |= REFERENCER_MISSING;
11232 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11233 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11234 for (i = 0; i < num_stripes; i++) {
11235 btrfs_release_path(&path);
11236 btrfs_init_path(&path);
11237 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11238 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11239 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11241 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11244 goto not_match_dev;
11246 leaf = path.nodes[0];
11247 ptr = btrfs_item_ptr(leaf, path.slots[0],
11248 struct btrfs_dev_extent);
11249 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11250 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11251 if (objectid != chunk_key.objectid ||
11252 offset != chunk_key.offset ||
11253 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11254 goto not_match_dev;
11257 err |= BACKREF_MISSING;
11259 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11260 chunk_key.objectid, chunk_end, i);
11263 btrfs_release_path(&path);
11269 * Main entry function to check known items and update related accounting info
11271 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11273 struct btrfs_fs_info *fs_info = root->fs_info;
11274 struct btrfs_key key;
11277 struct btrfs_extent_data_ref *dref;
11282 btrfs_item_key_to_cpu(eb, &key, slot);
11286 case BTRFS_EXTENT_DATA_KEY:
11287 ret = check_extent_data_item(root, eb, slot);
11290 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11291 ret = check_block_group_item(fs_info, eb, slot);
11294 case BTRFS_DEV_ITEM_KEY:
11295 ret = check_dev_item(fs_info, eb, slot);
11298 case BTRFS_CHUNK_ITEM_KEY:
11299 ret = check_chunk_item(fs_info, eb, slot);
11302 case BTRFS_DEV_EXTENT_KEY:
11303 ret = check_dev_extent_item(fs_info, eb, slot);
11306 case BTRFS_EXTENT_ITEM_KEY:
11307 case BTRFS_METADATA_ITEM_KEY:
11308 ret = check_extent_item(fs_info, eb, slot);
11311 case BTRFS_EXTENT_CSUM_KEY:
11312 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11314 case BTRFS_TREE_BLOCK_REF_KEY:
11315 ret = check_tree_block_backref(fs_info, key.offset,
11319 case BTRFS_EXTENT_DATA_REF_KEY:
11320 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11321 ret = check_extent_data_backref(fs_info,
11322 btrfs_extent_data_ref_root(eb, dref),
11323 btrfs_extent_data_ref_objectid(eb, dref),
11324 btrfs_extent_data_ref_offset(eb, dref),
11326 btrfs_extent_data_ref_count(eb, dref));
11329 case BTRFS_SHARED_BLOCK_REF_KEY:
11330 ret = check_shared_block_backref(fs_info, key.offset,
11334 case BTRFS_SHARED_DATA_REF_KEY:
11335 ret = check_shared_data_backref(fs_info, key.offset,
11343 if (++slot < btrfs_header_nritems(eb))
11350 * Helper function for later fs/subvol tree check. To determine if a tree
11351 * block should be checked.
11352 * This function will ensure only the direct referencer with lowest rootid to
11353 * check a fs/subvolume tree block.
11355 * Backref check at extent tree would detect errors like missing subvolume
11356 * tree, so we can do aggressive check to reduce duplicated checks.
11358 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11360 struct btrfs_root *extent_root = root->fs_info->extent_root;
11361 struct btrfs_key key;
11362 struct btrfs_path path;
11363 struct extent_buffer *leaf;
11365 struct btrfs_extent_item *ei;
11371 struct btrfs_extent_inline_ref *iref;
11374 btrfs_init_path(&path);
11375 key.objectid = btrfs_header_bytenr(eb);
11376 key.type = BTRFS_METADATA_ITEM_KEY;
11377 key.offset = (u64)-1;
11380 * Any failure in backref resolving means we can't determine
11381 * whom the tree block belongs to.
11382 * So in that case, we need to check that tree block
11384 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11388 ret = btrfs_previous_extent_item(extent_root, &path,
11389 btrfs_header_bytenr(eb));
11393 leaf = path.nodes[0];
11394 slot = path.slots[0];
11395 btrfs_item_key_to_cpu(leaf, &key, slot);
11396 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11398 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11399 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11401 struct btrfs_tree_block_info *info;
11403 info = (struct btrfs_tree_block_info *)(ei + 1);
11404 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11407 item_size = btrfs_item_size_nr(leaf, slot);
11408 ptr = (unsigned long)iref;
11409 end = (unsigned long)ei + item_size;
11410 while (ptr < end) {
11411 iref = (struct btrfs_extent_inline_ref *)ptr;
11412 type = btrfs_extent_inline_ref_type(leaf, iref);
11413 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11416 * We only check the tree block if current root is
11417 * the lowest referencer of it.
11419 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11420 offset < root->objectid) {
11421 btrfs_release_path(&path);
11425 ptr += btrfs_extent_inline_ref_size(type);
11428 * Normally we should also check keyed tree block ref, but that may be
11429 * very time consuming. Inlined ref should already make us skip a lot
11430 * of refs now. So skip search keyed tree block ref.
11434 btrfs_release_path(&path);
11439 * Traversal function for tree block. We will do:
11440 * 1) Skip shared fs/subvolume tree blocks
11441 * 2) Update related bytes accounting
11442 * 3) Pre-order traversal
11444 static int traverse_tree_block(struct btrfs_root *root,
11445 struct extent_buffer *node)
11447 struct extent_buffer *eb;
11448 struct btrfs_key key;
11449 struct btrfs_key drop_key;
11457 * Skip shared fs/subvolume tree block, in that case they will
11458 * be checked by referencer with lowest rootid
11460 if (is_fstree(root->objectid) && !should_check(root, node))
11463 /* Update bytes accounting */
11464 total_btree_bytes += node->len;
11465 if (fs_root_objectid(btrfs_header_owner(node)))
11466 total_fs_tree_bytes += node->len;
11467 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11468 total_extent_tree_bytes += node->len;
11470 /* pre-order tranversal, check itself first */
11471 level = btrfs_header_level(node);
11472 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11473 btrfs_header_level(node),
11474 btrfs_header_owner(node));
11478 "check %s failed root %llu bytenr %llu level %d, force continue check",
11479 level ? "node":"leaf", root->objectid,
11480 btrfs_header_bytenr(node), btrfs_header_level(node));
11483 btree_space_waste += btrfs_leaf_free_space(root, node);
11484 ret = check_leaf_items(root, node);
11489 nr = btrfs_header_nritems(node);
11490 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11491 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11492 sizeof(struct btrfs_key_ptr);
11494 /* Then check all its children */
11495 for (i = 0; i < nr; i++) {
11496 u64 blocknr = btrfs_node_blockptr(node, i);
11498 btrfs_node_key_to_cpu(node, &key, i);
11499 if (level == root->root_item.drop_level &&
11500 is_dropped_key(&key, &drop_key))
11504 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11505 * to call the function itself.
11507 eb = read_tree_block(root->fs_info, blocknr, 0);
11508 if (extent_buffer_uptodate(eb)) {
11509 ret = traverse_tree_block(root, eb);
11512 free_extent_buffer(eb);
11519 * Low memory usage version check_chunks_and_extents.
11521 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11523 struct btrfs_path path;
11524 struct btrfs_key key;
11525 struct btrfs_root *root1;
11526 struct btrfs_root *root;
11527 struct btrfs_root *cur_root;
11531 root = fs_info->fs_root;
11533 root1 = root->fs_info->chunk_root;
11534 ret = traverse_tree_block(root1, root1->node);
11537 root1 = root->fs_info->tree_root;
11538 ret = traverse_tree_block(root1, root1->node);
11541 btrfs_init_path(&path);
11542 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11544 key.type = BTRFS_ROOT_ITEM_KEY;
11546 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11548 error("cannot find extent treet in tree_root");
11553 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11554 if (key.type != BTRFS_ROOT_ITEM_KEY)
11556 key.offset = (u64)-1;
11558 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11559 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11562 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11563 if (IS_ERR(cur_root) || !cur_root) {
11564 error("failed to read tree: %lld", key.objectid);
11568 ret = traverse_tree_block(cur_root, cur_root->node);
11571 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11572 btrfs_free_fs_root(cur_root);
11574 ret = btrfs_next_item(root1, &path);
11580 btrfs_release_path(&path);
11584 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11588 if (!ctx.progress_enabled)
11589 fprintf(stderr, "checking extents\n");
11590 if (check_mode == CHECK_MODE_LOWMEM)
11591 ret = check_chunks_and_extents_v2(fs_info);
11593 ret = check_chunks_and_extents(fs_info);
11598 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11599 struct btrfs_root *root, int overwrite)
11601 struct extent_buffer *c;
11602 struct extent_buffer *old = root->node;
11605 struct btrfs_disk_key disk_key = {0,0,0};
11611 extent_buffer_get(c);
11614 c = btrfs_alloc_free_block(trans, root,
11615 root->fs_info->nodesize,
11616 root->root_key.objectid,
11617 &disk_key, level, 0, 0);
11620 extent_buffer_get(c);
11624 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11625 btrfs_set_header_level(c, level);
11626 btrfs_set_header_bytenr(c, c->start);
11627 btrfs_set_header_generation(c, trans->transid);
11628 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11629 btrfs_set_header_owner(c, root->root_key.objectid);
11631 write_extent_buffer(c, root->fs_info->fsid,
11632 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11634 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11635 btrfs_header_chunk_tree_uuid(c),
11638 btrfs_mark_buffer_dirty(c);
11640 * this case can happen in the following case:
11642 * 1.overwrite previous root.
11644 * 2.reinit reloc data root, this is because we skip pin
11645 * down reloc data tree before which means we can allocate
11646 * same block bytenr here.
11648 if (old->start == c->start) {
11649 btrfs_set_root_generation(&root->root_item,
11651 root->root_item.level = btrfs_header_level(root->node);
11652 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11653 &root->root_key, &root->root_item);
11655 free_extent_buffer(c);
11659 free_extent_buffer(old);
11661 add_root_to_dirty_list(root);
11665 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11666 struct extent_buffer *eb, int tree_root)
11668 struct extent_buffer *tmp;
11669 struct btrfs_root_item *ri;
11670 struct btrfs_key key;
11672 int level = btrfs_header_level(eb);
11678 * If we have pinned this block before, don't pin it again.
11679 * This can not only avoid forever loop with broken filesystem
11680 * but also give us some speedups.
11682 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11683 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11686 btrfs_pin_extent(fs_info, eb->start, eb->len);
11688 nritems = btrfs_header_nritems(eb);
11689 for (i = 0; i < nritems; i++) {
11691 btrfs_item_key_to_cpu(eb, &key, i);
11692 if (key.type != BTRFS_ROOT_ITEM_KEY)
11694 /* Skip the extent root and reloc roots */
11695 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11696 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11697 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11699 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11700 bytenr = btrfs_disk_root_bytenr(eb, ri);
11703 * If at any point we start needing the real root we
11704 * will have to build a stump root for the root we are
11705 * in, but for now this doesn't actually use the root so
11706 * just pass in extent_root.
11708 tmp = read_tree_block(fs_info, bytenr, 0);
11709 if (!extent_buffer_uptodate(tmp)) {
11710 fprintf(stderr, "Error reading root block\n");
11713 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11714 free_extent_buffer(tmp);
11718 bytenr = btrfs_node_blockptr(eb, i);
11720 /* If we aren't the tree root don't read the block */
11721 if (level == 1 && !tree_root) {
11722 btrfs_pin_extent(fs_info, bytenr,
11723 fs_info->nodesize);
11727 tmp = read_tree_block(fs_info, bytenr, 0);
11728 if (!extent_buffer_uptodate(tmp)) {
11729 fprintf(stderr, "Error reading tree block\n");
11732 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11733 free_extent_buffer(tmp);
11742 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11746 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11750 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11753 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11755 struct btrfs_block_group_cache *cache;
11756 struct btrfs_path path;
11757 struct extent_buffer *leaf;
11758 struct btrfs_chunk *chunk;
11759 struct btrfs_key key;
11763 btrfs_init_path(&path);
11765 key.type = BTRFS_CHUNK_ITEM_KEY;
11767 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11769 btrfs_release_path(&path);
11774 * We do this in case the block groups were screwed up and had alloc
11775 * bits that aren't actually set on the chunks. This happens with
11776 * restored images every time and could happen in real life I guess.
11778 fs_info->avail_data_alloc_bits = 0;
11779 fs_info->avail_metadata_alloc_bits = 0;
11780 fs_info->avail_system_alloc_bits = 0;
11782 /* First we need to create the in-memory block groups */
11784 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11785 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11787 btrfs_release_path(&path);
11795 leaf = path.nodes[0];
11796 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11797 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11802 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11803 btrfs_add_block_group(fs_info, 0,
11804 btrfs_chunk_type(leaf, chunk),
11805 key.objectid, key.offset,
11806 btrfs_chunk_length(leaf, chunk));
11807 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11808 key.offset + btrfs_chunk_length(leaf, chunk));
11813 cache = btrfs_lookup_first_block_group(fs_info, start);
11817 start = cache->key.objectid + cache->key.offset;
11820 btrfs_release_path(&path);
11824 static int reset_balance(struct btrfs_trans_handle *trans,
11825 struct btrfs_fs_info *fs_info)
11827 struct btrfs_root *root = fs_info->tree_root;
11828 struct btrfs_path path;
11829 struct extent_buffer *leaf;
11830 struct btrfs_key key;
11831 int del_slot, del_nr = 0;
11835 btrfs_init_path(&path);
11836 key.objectid = BTRFS_BALANCE_OBJECTID;
11837 key.type = BTRFS_BALANCE_ITEM_KEY;
11839 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11844 goto reinit_data_reloc;
11849 ret = btrfs_del_item(trans, root, &path);
11852 btrfs_release_path(&path);
11854 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11855 key.type = BTRFS_ROOT_ITEM_KEY;
11857 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11861 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11866 ret = btrfs_del_items(trans, root, &path,
11873 btrfs_release_path(&path);
11876 ret = btrfs_search_slot(trans, root, &key, &path,
11883 leaf = path.nodes[0];
11884 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11885 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11887 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11892 del_slot = path.slots[0];
11901 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11905 btrfs_release_path(&path);
11908 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11909 key.type = BTRFS_ROOT_ITEM_KEY;
11910 key.offset = (u64)-1;
11911 root = btrfs_read_fs_root(fs_info, &key);
11912 if (IS_ERR(root)) {
11913 fprintf(stderr, "Error reading data reloc tree\n");
11914 ret = PTR_ERR(root);
11917 record_root_in_trans(trans, root);
11918 ret = btrfs_fsck_reinit_root(trans, root, 0);
11921 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11923 btrfs_release_path(&path);
11927 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11928 struct btrfs_fs_info *fs_info)
11934 * The only reason we don't do this is because right now we're just
11935 * walking the trees we find and pinning down their bytes, we don't look
11936 * at any of the leaves. In order to do mixed groups we'd have to check
11937 * the leaves of any fs roots and pin down the bytes for any file
11938 * extents we find. Not hard but why do it if we don't have to?
11940 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11941 fprintf(stderr, "We don't support re-initing the extent tree "
11942 "for mixed block groups yet, please notify a btrfs "
11943 "developer you want to do this so they can add this "
11944 "functionality.\n");
11949 * first we need to walk all of the trees except the extent tree and pin
11950 * down the bytes that are in use so we don't overwrite any existing
11953 ret = pin_metadata_blocks(fs_info);
11955 fprintf(stderr, "error pinning down used bytes\n");
11960 * Need to drop all the block groups since we're going to recreate all
11963 btrfs_free_block_groups(fs_info);
11964 ret = reset_block_groups(fs_info);
11966 fprintf(stderr, "error resetting the block groups\n");
11970 /* Ok we can allocate now, reinit the extent root */
11971 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11973 fprintf(stderr, "extent root initialization failed\n");
11975 * When the transaction code is updated we should end the
11976 * transaction, but for now progs only knows about commit so
11977 * just return an error.
11983 * Now we have all the in-memory block groups setup so we can make
11984 * allocations properly, and the metadata we care about is safe since we
11985 * pinned all of it above.
11988 struct btrfs_block_group_cache *cache;
11990 cache = btrfs_lookup_first_block_group(fs_info, start);
11993 start = cache->key.objectid + cache->key.offset;
11994 ret = btrfs_insert_item(trans, fs_info->extent_root,
11995 &cache->key, &cache->item,
11996 sizeof(cache->item));
11998 fprintf(stderr, "Error adding block group\n");
12001 btrfs_extent_post_op(trans, fs_info->extent_root);
12004 ret = reset_balance(trans, fs_info);
12006 fprintf(stderr, "error resetting the pending balance\n");
12011 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12013 struct btrfs_path path;
12014 struct btrfs_trans_handle *trans;
12015 struct btrfs_key key;
12018 printf("Recowing metadata block %llu\n", eb->start);
12019 key.objectid = btrfs_header_owner(eb);
12020 key.type = BTRFS_ROOT_ITEM_KEY;
12021 key.offset = (u64)-1;
12023 root = btrfs_read_fs_root(root->fs_info, &key);
12024 if (IS_ERR(root)) {
12025 fprintf(stderr, "Couldn't find owner root %llu\n",
12027 return PTR_ERR(root);
12030 trans = btrfs_start_transaction(root, 1);
12032 return PTR_ERR(trans);
12034 btrfs_init_path(&path);
12035 path.lowest_level = btrfs_header_level(eb);
12036 if (path.lowest_level)
12037 btrfs_node_key_to_cpu(eb, &key, 0);
12039 btrfs_item_key_to_cpu(eb, &key, 0);
12041 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12042 btrfs_commit_transaction(trans, root);
12043 btrfs_release_path(&path);
12047 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12049 struct btrfs_path path;
12050 struct btrfs_trans_handle *trans;
12051 struct btrfs_key key;
12054 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12055 bad->key.type, bad->key.offset);
12056 key.objectid = bad->root_id;
12057 key.type = BTRFS_ROOT_ITEM_KEY;
12058 key.offset = (u64)-1;
12060 root = btrfs_read_fs_root(root->fs_info, &key);
12061 if (IS_ERR(root)) {
12062 fprintf(stderr, "Couldn't find owner root %llu\n",
12064 return PTR_ERR(root);
12067 trans = btrfs_start_transaction(root, 1);
12069 return PTR_ERR(trans);
12071 btrfs_init_path(&path);
12072 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12078 ret = btrfs_del_item(trans, root, &path);
12080 btrfs_commit_transaction(trans, root);
12081 btrfs_release_path(&path);
12085 static int zero_log_tree(struct btrfs_root *root)
12087 struct btrfs_trans_handle *trans;
12090 trans = btrfs_start_transaction(root, 1);
12091 if (IS_ERR(trans)) {
12092 ret = PTR_ERR(trans);
12095 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12096 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12097 ret = btrfs_commit_transaction(trans, root);
12101 static int populate_csum(struct btrfs_trans_handle *trans,
12102 struct btrfs_root *csum_root, char *buf, u64 start,
12105 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12110 while (offset < len) {
12111 sectorsize = fs_info->sectorsize;
12112 ret = read_extent_data(fs_info, buf, start + offset,
12116 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12117 start + offset, buf, sectorsize);
12120 offset += sectorsize;
12125 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12126 struct btrfs_root *csum_root,
12127 struct btrfs_root *cur_root)
12129 struct btrfs_path path;
12130 struct btrfs_key key;
12131 struct extent_buffer *node;
12132 struct btrfs_file_extent_item *fi;
12139 buf = malloc(cur_root->fs_info->sectorsize);
12143 btrfs_init_path(&path);
12147 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12150 /* Iterate all regular file extents and fill its csum */
12152 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12154 if (key.type != BTRFS_EXTENT_DATA_KEY)
12156 node = path.nodes[0];
12157 slot = path.slots[0];
12158 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12159 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12161 start = btrfs_file_extent_disk_bytenr(node, fi);
12162 len = btrfs_file_extent_disk_num_bytes(node, fi);
12164 ret = populate_csum(trans, csum_root, buf, start, len);
12165 if (ret == -EEXIST)
12171 * TODO: if next leaf is corrupted, jump to nearest next valid
12174 ret = btrfs_next_item(cur_root, &path);
12184 btrfs_release_path(&path);
12189 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12190 struct btrfs_root *csum_root)
12192 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12193 struct btrfs_path path;
12194 struct btrfs_root *tree_root = fs_info->tree_root;
12195 struct btrfs_root *cur_root;
12196 struct extent_buffer *node;
12197 struct btrfs_key key;
12201 btrfs_init_path(&path);
12202 key.objectid = BTRFS_FS_TREE_OBJECTID;
12204 key.type = BTRFS_ROOT_ITEM_KEY;
12205 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12214 node = path.nodes[0];
12215 slot = path.slots[0];
12216 btrfs_item_key_to_cpu(node, &key, slot);
12217 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12219 if (key.type != BTRFS_ROOT_ITEM_KEY)
12221 if (!is_fstree(key.objectid))
12223 key.offset = (u64)-1;
12225 cur_root = btrfs_read_fs_root(fs_info, &key);
12226 if (IS_ERR(cur_root) || !cur_root) {
12227 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12231 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12236 ret = btrfs_next_item(tree_root, &path);
12246 btrfs_release_path(&path);
12250 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12251 struct btrfs_root *csum_root)
12253 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12254 struct btrfs_path path;
12255 struct btrfs_extent_item *ei;
12256 struct extent_buffer *leaf;
12258 struct btrfs_key key;
12261 btrfs_init_path(&path);
12263 key.type = BTRFS_EXTENT_ITEM_KEY;
12265 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12267 btrfs_release_path(&path);
12271 buf = malloc(csum_root->fs_info->sectorsize);
12273 btrfs_release_path(&path);
12278 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12279 ret = btrfs_next_leaf(extent_root, &path);
12287 leaf = path.nodes[0];
12289 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12290 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12295 ei = btrfs_item_ptr(leaf, path.slots[0],
12296 struct btrfs_extent_item);
12297 if (!(btrfs_extent_flags(leaf, ei) &
12298 BTRFS_EXTENT_FLAG_DATA)) {
12303 ret = populate_csum(trans, csum_root, buf, key.objectid,
12310 btrfs_release_path(&path);
12316 * Recalculate the csum and put it into the csum tree.
12318 * Extent tree init will wipe out all the extent info, so in that case, we
12319 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12320 * will use fs/subvol trees to init the csum tree.
12322 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12323 struct btrfs_root *csum_root,
12324 int search_fs_tree)
12326 if (search_fs_tree)
12327 return fill_csum_tree_from_fs(trans, csum_root);
12329 return fill_csum_tree_from_extent(trans, csum_root);
12332 static void free_roots_info_cache(void)
12334 if (!roots_info_cache)
12337 while (!cache_tree_empty(roots_info_cache)) {
12338 struct cache_extent *entry;
12339 struct root_item_info *rii;
12341 entry = first_cache_extent(roots_info_cache);
12344 remove_cache_extent(roots_info_cache, entry);
12345 rii = container_of(entry, struct root_item_info, cache_extent);
12349 free(roots_info_cache);
12350 roots_info_cache = NULL;
12353 static int build_roots_info_cache(struct btrfs_fs_info *info)
12356 struct btrfs_key key;
12357 struct extent_buffer *leaf;
12358 struct btrfs_path path;
12360 if (!roots_info_cache) {
12361 roots_info_cache = malloc(sizeof(*roots_info_cache));
12362 if (!roots_info_cache)
12364 cache_tree_init(roots_info_cache);
12367 btrfs_init_path(&path);
12369 key.type = BTRFS_EXTENT_ITEM_KEY;
12371 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12374 leaf = path.nodes[0];
12377 struct btrfs_key found_key;
12378 struct btrfs_extent_item *ei;
12379 struct btrfs_extent_inline_ref *iref;
12380 int slot = path.slots[0];
12385 struct cache_extent *entry;
12386 struct root_item_info *rii;
12388 if (slot >= btrfs_header_nritems(leaf)) {
12389 ret = btrfs_next_leaf(info->extent_root, &path);
12396 leaf = path.nodes[0];
12397 slot = path.slots[0];
12400 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12402 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12403 found_key.type != BTRFS_METADATA_ITEM_KEY)
12406 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12407 flags = btrfs_extent_flags(leaf, ei);
12409 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12410 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12413 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12414 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12415 level = found_key.offset;
12417 struct btrfs_tree_block_info *binfo;
12419 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12420 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12421 level = btrfs_tree_block_level(leaf, binfo);
12425 * For a root extent, it must be of the following type and the
12426 * first (and only one) iref in the item.
12428 type = btrfs_extent_inline_ref_type(leaf, iref);
12429 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12432 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12433 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12435 rii = malloc(sizeof(struct root_item_info));
12440 rii->cache_extent.start = root_id;
12441 rii->cache_extent.size = 1;
12442 rii->level = (u8)-1;
12443 entry = &rii->cache_extent;
12444 ret = insert_cache_extent(roots_info_cache, entry);
12447 rii = container_of(entry, struct root_item_info,
12451 ASSERT(rii->cache_extent.start == root_id);
12452 ASSERT(rii->cache_extent.size == 1);
12454 if (level > rii->level || rii->level == (u8)-1) {
12455 rii->level = level;
12456 rii->bytenr = found_key.objectid;
12457 rii->gen = btrfs_extent_generation(leaf, ei);
12458 rii->node_count = 1;
12459 } else if (level == rii->level) {
12467 btrfs_release_path(&path);
12472 static int maybe_repair_root_item(struct btrfs_path *path,
12473 const struct btrfs_key *root_key,
12474 const int read_only_mode)
12476 const u64 root_id = root_key->objectid;
12477 struct cache_extent *entry;
12478 struct root_item_info *rii;
12479 struct btrfs_root_item ri;
12480 unsigned long offset;
12482 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12485 "Error: could not find extent items for root %llu\n",
12486 root_key->objectid);
12490 rii = container_of(entry, struct root_item_info, cache_extent);
12491 ASSERT(rii->cache_extent.start == root_id);
12492 ASSERT(rii->cache_extent.size == 1);
12494 if (rii->node_count != 1) {
12496 "Error: could not find btree root extent for root %llu\n",
12501 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12502 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12504 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12505 btrfs_root_level(&ri) != rii->level ||
12506 btrfs_root_generation(&ri) != rii->gen) {
12509 * If we're in repair mode but our caller told us to not update
12510 * the root item, i.e. just check if it needs to be updated, don't
12511 * print this message, since the caller will call us again shortly
12512 * for the same root item without read only mode (the caller will
12513 * open a transaction first).
12515 if (!(read_only_mode && repair))
12517 "%sroot item for root %llu,"
12518 " current bytenr %llu, current gen %llu, current level %u,"
12519 " new bytenr %llu, new gen %llu, new level %u\n",
12520 (read_only_mode ? "" : "fixing "),
12522 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12523 btrfs_root_level(&ri),
12524 rii->bytenr, rii->gen, rii->level);
12526 if (btrfs_root_generation(&ri) > rii->gen) {
12528 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12529 root_id, btrfs_root_generation(&ri), rii->gen);
12533 if (!read_only_mode) {
12534 btrfs_set_root_bytenr(&ri, rii->bytenr);
12535 btrfs_set_root_level(&ri, rii->level);
12536 btrfs_set_root_generation(&ri, rii->gen);
12537 write_extent_buffer(path->nodes[0], &ri,
12538 offset, sizeof(ri));
12548 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12549 * caused read-only snapshots to be corrupted if they were created at a moment
12550 * when the source subvolume/snapshot had orphan items. The issue was that the
12551 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12552 * node instead of the post orphan cleanup root node.
12553 * So this function, and its callees, just detects and fixes those cases. Even
12554 * though the regression was for read-only snapshots, this function applies to
12555 * any snapshot/subvolume root.
12556 * This must be run before any other repair code - not doing it so, makes other
12557 * repair code delete or modify backrefs in the extent tree for example, which
12558 * will result in an inconsistent fs after repairing the root items.
12560 static int repair_root_items(struct btrfs_fs_info *info)
12562 struct btrfs_path path;
12563 struct btrfs_key key;
12564 struct extent_buffer *leaf;
12565 struct btrfs_trans_handle *trans = NULL;
12568 int need_trans = 0;
12570 btrfs_init_path(&path);
12572 ret = build_roots_info_cache(info);
12576 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12577 key.type = BTRFS_ROOT_ITEM_KEY;
12582 * Avoid opening and committing transactions if a leaf doesn't have
12583 * any root items that need to be fixed, so that we avoid rotating
12584 * backup roots unnecessarily.
12587 trans = btrfs_start_transaction(info->tree_root, 1);
12588 if (IS_ERR(trans)) {
12589 ret = PTR_ERR(trans);
12594 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12598 leaf = path.nodes[0];
12601 struct btrfs_key found_key;
12603 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12604 int no_more_keys = find_next_key(&path, &key);
12606 btrfs_release_path(&path);
12608 ret = btrfs_commit_transaction(trans,
12620 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12622 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12624 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12627 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12631 if (!trans && repair) {
12634 btrfs_release_path(&path);
12644 free_roots_info_cache();
12645 btrfs_release_path(&path);
12647 btrfs_commit_transaction(trans, info->tree_root);
12654 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12656 struct btrfs_trans_handle *trans;
12657 struct btrfs_block_group_cache *bg_cache;
12661 /* Clear all free space cache inodes and its extent data */
12663 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12666 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12669 current = bg_cache->key.objectid + bg_cache->key.offset;
12672 /* Don't forget to set cache_generation to -1 */
12673 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12674 if (IS_ERR(trans)) {
12675 error("failed to update super block cache generation");
12676 return PTR_ERR(trans);
12678 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12679 btrfs_commit_transaction(trans, fs_info->tree_root);
12684 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12689 if (clear_version == 1) {
12690 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12692 "free space cache v2 detected, use --clear-space-cache v2");
12696 printf("Clearing free space cache\n");
12697 ret = clear_free_space_cache(fs_info);
12699 error("failed to clear free space cache");
12702 printf("Free space cache cleared\n");
12704 } else if (clear_version == 2) {
12705 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12706 printf("no free space cache v2 to clear\n");
12710 printf("Clear free space cache v2\n");
12711 ret = btrfs_clear_free_space_tree(fs_info);
12713 error("failed to clear free space cache v2: %d", ret);
12716 printf("free space cache v2 cleared\n");
12723 const char * const cmd_check_usage[] = {
12724 "btrfs check [options] <device>",
12725 "Check structural integrity of a filesystem (unmounted).",
12726 "Check structural integrity of an unmounted filesystem. Verify internal",
12727 "trees' consistency and item connectivity. In the repair mode try to",
12728 "fix the problems found. ",
12729 "WARNING: the repair mode is considered dangerous",
12731 "-s|--super <superblock> use this superblock copy",
12732 "-b|--backup use the first valid backup root copy",
12733 "--force skip mount checks, repair is not possible",
12734 "--repair try to repair the filesystem",
12735 "--readonly run in read-only mode (default)",
12736 "--init-csum-tree create a new CRC tree",
12737 "--init-extent-tree create a new extent tree",
12738 "--mode <MODE> allows choice of memory/IO trade-offs",
12739 " where MODE is one of:",
12740 " original - read inodes and extents to memory (requires",
12741 " more memory, does less IO)",
12742 " lowmem - try to use less memory but read blocks again",
12744 "--check-data-csum verify checksums of data blocks",
12745 "-Q|--qgroup-report print a report on qgroup consistency",
12746 "-E|--subvol-extents <subvolid>",
12747 " print subvolume extents and sharing state",
12748 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12749 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12750 "-p|--progress indicate progress",
12751 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12755 int cmd_check(int argc, char **argv)
12757 struct cache_tree root_cache;
12758 struct btrfs_root *root;
12759 struct btrfs_fs_info *info;
12762 u64 tree_root_bytenr = 0;
12763 u64 chunk_root_bytenr = 0;
12764 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12768 int init_csum_tree = 0;
12770 int clear_space_cache = 0;
12771 int qgroup_report = 0;
12772 int qgroups_repaired = 0;
12773 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12778 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12779 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12780 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12781 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
12782 GETOPT_VAL_FORCE };
12783 static const struct option long_options[] = {
12784 { "super", required_argument, NULL, 's' },
12785 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12786 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12787 { "init-csum-tree", no_argument, NULL,
12788 GETOPT_VAL_INIT_CSUM },
12789 { "init-extent-tree", no_argument, NULL,
12790 GETOPT_VAL_INIT_EXTENT },
12791 { "check-data-csum", no_argument, NULL,
12792 GETOPT_VAL_CHECK_CSUM },
12793 { "backup", no_argument, NULL, 'b' },
12794 { "subvol-extents", required_argument, NULL, 'E' },
12795 { "qgroup-report", no_argument, NULL, 'Q' },
12796 { "tree-root", required_argument, NULL, 'r' },
12797 { "chunk-root", required_argument, NULL,
12798 GETOPT_VAL_CHUNK_TREE },
12799 { "progress", no_argument, NULL, 'p' },
12800 { "mode", required_argument, NULL,
12802 { "clear-space-cache", required_argument, NULL,
12803 GETOPT_VAL_CLEAR_SPACE_CACHE},
12804 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
12805 { NULL, 0, NULL, 0}
12808 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12812 case 'a': /* ignored */ break;
12814 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12817 num = arg_strtou64(optarg);
12818 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12820 "super mirror should be less than %d",
12821 BTRFS_SUPER_MIRROR_MAX);
12824 bytenr = btrfs_sb_offset(((int)num));
12825 printf("using SB copy %llu, bytenr %llu\n", num,
12826 (unsigned long long)bytenr);
12832 subvolid = arg_strtou64(optarg);
12835 tree_root_bytenr = arg_strtou64(optarg);
12837 case GETOPT_VAL_CHUNK_TREE:
12838 chunk_root_bytenr = arg_strtou64(optarg);
12841 ctx.progress_enabled = true;
12845 usage(cmd_check_usage);
12846 case GETOPT_VAL_REPAIR:
12847 printf("enabling repair mode\n");
12849 ctree_flags |= OPEN_CTREE_WRITES;
12851 case GETOPT_VAL_READONLY:
12854 case GETOPT_VAL_INIT_CSUM:
12855 printf("Creating a new CRC tree\n");
12856 init_csum_tree = 1;
12858 ctree_flags |= OPEN_CTREE_WRITES;
12860 case GETOPT_VAL_INIT_EXTENT:
12861 init_extent_tree = 1;
12862 ctree_flags |= (OPEN_CTREE_WRITES |
12863 OPEN_CTREE_NO_BLOCK_GROUPS);
12866 case GETOPT_VAL_CHECK_CSUM:
12867 check_data_csum = 1;
12869 case GETOPT_VAL_MODE:
12870 check_mode = parse_check_mode(optarg);
12871 if (check_mode == CHECK_MODE_UNKNOWN) {
12872 error("unknown mode: %s", optarg);
12876 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12877 if (strcmp(optarg, "v1") == 0) {
12878 clear_space_cache = 1;
12879 } else if (strcmp(optarg, "v2") == 0) {
12880 clear_space_cache = 2;
12881 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12884 "invalid argument to --clear-space-cache, must be v1 or v2");
12887 ctree_flags |= OPEN_CTREE_WRITES;
12889 case GETOPT_VAL_FORCE:
12895 if (check_argc_exact(argc - optind, 1))
12896 usage(cmd_check_usage);
12898 if (ctx.progress_enabled) {
12899 ctx.tp = TASK_NOTHING;
12900 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12903 /* This check is the only reason for --readonly to exist */
12904 if (readonly && repair) {
12905 error("repair options are not compatible with --readonly");
12910 * Not supported yet
12912 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12913 error("low memory mode doesn't support repair yet");
12918 cache_tree_init(&root_cache);
12920 ret = check_mounted(argv[optind]);
12923 error("could not check mount status: %s",
12929 "%s is currently mounted, use --force if you really intend to check the filesystem",
12937 error("repair and --force is not yet supported");
12944 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
12948 "filesystem mounted, continuing because of --force");
12952 /* only allow partial opening under repair mode */
12954 ctree_flags |= OPEN_CTREE_PARTIAL;
12956 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12957 chunk_root_bytenr, ctree_flags);
12959 error("cannot open file system");
12965 global_info = info;
12966 root = info->fs_root;
12967 uuid_unparse(info->super_copy->fsid, uuidbuf);
12969 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12972 * Check the bare minimum before starting anything else that could rely
12973 * on it, namely the tree roots, any local consistency checks
12975 if (!extent_buffer_uptodate(info->tree_root->node) ||
12976 !extent_buffer_uptodate(info->dev_root->node) ||
12977 !extent_buffer_uptodate(info->chunk_root->node)) {
12978 error("critical roots corrupted, unable to check the filesystem");
12984 if (clear_space_cache) {
12985 ret = do_clear_free_space_cache(info, clear_space_cache);
12991 * repair mode will force us to commit transaction which
12992 * will make us fail to load log tree when mounting.
12994 if (repair && btrfs_super_log_root(info->super_copy)) {
12995 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13001 ret = zero_log_tree(root);
13004 error("failed to zero log tree: %d", ret);
13009 if (qgroup_report) {
13010 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13012 ret = qgroup_verify_all(info);
13019 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13020 subvolid, argv[optind], uuidbuf);
13021 ret = print_extent_state(info, subvolid);
13026 if (init_extent_tree || init_csum_tree) {
13027 struct btrfs_trans_handle *trans;
13029 trans = btrfs_start_transaction(info->extent_root, 0);
13030 if (IS_ERR(trans)) {
13031 error("error starting transaction");
13032 ret = PTR_ERR(trans);
13037 if (init_extent_tree) {
13038 printf("Creating a new extent tree\n");
13039 ret = reinit_extent_tree(trans, info);
13045 if (init_csum_tree) {
13046 printf("Reinitialize checksum tree\n");
13047 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13049 error("checksum tree initialization failed: %d",
13056 ret = fill_csum_tree(trans, info->csum_root,
13060 error("checksum tree refilling failed: %d", ret);
13065 * Ok now we commit and run the normal fsck, which will add
13066 * extent entries for all of the items it finds.
13068 ret = btrfs_commit_transaction(trans, info->extent_root);
13073 if (!extent_buffer_uptodate(info->extent_root->node)) {
13074 error("critical: extent_root, unable to check the filesystem");
13079 if (!extent_buffer_uptodate(info->csum_root->node)) {
13080 error("critical: csum_root, unable to check the filesystem");
13086 ret = do_check_chunks_and_extents(info);
13090 "errors found in extent allocation tree or chunk allocation");
13092 ret = repair_root_items(info);
13095 error("failed to repair root items: %s", strerror(-ret));
13099 fprintf(stderr, "Fixed %d roots.\n", ret);
13101 } else if (ret > 0) {
13103 "Found %d roots with an outdated root item.\n",
13106 "Please run a filesystem check with the option --repair to fix them.\n");
13112 if (!ctx.progress_enabled) {
13113 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13114 fprintf(stderr, "checking free space tree\n");
13116 fprintf(stderr, "checking free space cache\n");
13118 ret = check_space_cache(root);
13121 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13122 error("errors found in free space tree");
13124 error("errors found in free space cache");
13129 * We used to have to have these hole extents in between our real
13130 * extents so if we don't have this flag set we need to make sure there
13131 * are no gaps in the file extents for inodes, otherwise we can just
13132 * ignore it when this happens.
13134 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13135 ret = do_check_fs_roots(info, &root_cache);
13138 error("errors found in fs roots");
13142 fprintf(stderr, "checking csums\n");
13143 ret = check_csums(root);
13146 error("errors found in csum tree");
13150 fprintf(stderr, "checking root refs\n");
13151 /* For low memory mode, check_fs_roots_v2 handles root refs */
13152 if (check_mode != CHECK_MODE_LOWMEM) {
13153 ret = check_root_refs(root, &root_cache);
13156 error("errors found in root refs");
13161 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13162 struct extent_buffer *eb;
13164 eb = list_first_entry(&root->fs_info->recow_ebs,
13165 struct extent_buffer, recow);
13166 list_del_init(&eb->recow);
13167 ret = recow_extent_buffer(root, eb);
13170 error("fails to fix transid errors");
13175 while (!list_empty(&delete_items)) {
13176 struct bad_item *bad;
13178 bad = list_first_entry(&delete_items, struct bad_item, list);
13179 list_del_init(&bad->list);
13181 ret = delete_bad_item(root, bad);
13187 if (info->quota_enabled) {
13188 fprintf(stderr, "checking quota groups\n");
13189 ret = qgroup_verify_all(info);
13192 error("failed to check quota groups");
13196 ret = repair_qgroups(info, &qgroups_repaired);
13199 error("failed to repair quota groups");
13205 if (!list_empty(&root->fs_info->recow_ebs)) {
13206 error("transid errors in file system");
13211 printf("found %llu bytes used, ",
13212 (unsigned long long)bytes_used);
13214 printf("error(s) found\n");
13216 printf("no error found\n");
13217 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13218 printf("total tree bytes: %llu\n",
13219 (unsigned long long)total_btree_bytes);
13220 printf("total fs tree bytes: %llu\n",
13221 (unsigned long long)total_fs_tree_bytes);
13222 printf("total extent tree bytes: %llu\n",
13223 (unsigned long long)total_extent_tree_bytes);
13224 printf("btree space waste bytes: %llu\n",
13225 (unsigned long long)btree_space_waste);
13226 printf("file data blocks allocated: %llu\n referenced %llu\n",
13227 (unsigned long long)data_bytes_allocated,
13228 (unsigned long long)data_bytes_referenced);
13230 free_qgroup_counts();
13231 free_root_recs_tree(&root_cache);
13235 if (ctx.progress_enabled)
13236 task_deinit(ctx.info);