2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
140 * Much like data_backref, just removed the undetermined members
141 * and change it to use list_head.
142 * During extent scan, it is stored in root->orphan_data_extent.
143 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145 struct orphan_data_extent {
146 struct list_head list;
154 struct tree_backref {
155 struct extent_backref node;
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 return container_of(back, struct tree_backref, node);
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
170 struct extent_record {
171 struct list_head backrefs;
172 struct list_head dups;
173 struct list_head list;
174 struct cache_extent cache;
175 struct btrfs_disk_key parent_key;
180 u64 extent_item_refs;
182 u64 parent_generation;
186 unsigned int flag_block_full_backref:2;
187 unsigned int found_rec:1;
188 unsigned int content_checked:1;
189 unsigned int owner_ref_checked:1;
190 unsigned int is_root:1;
191 unsigned int metadata:1;
192 unsigned int bad_full_backref:1;
193 unsigned int crossing_stripes:1;
194 unsigned int wrong_chunk_type:1;
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 return container_of(entry, struct extent_record, list);
202 struct inode_backref {
203 struct list_head list;
204 unsigned int found_dir_item:1;
205 unsigned int found_dir_index:1;
206 unsigned int found_inode_ref:1;
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 return list_entry(entry, struct inode_backref, list);
221 struct root_item_record {
222 struct list_head list;
228 struct btrfs_key drop_key;
231 #define REF_ERR_NO_DIR_ITEM (1 << 0)
232 #define REF_ERR_NO_DIR_INDEX (1 << 1)
233 #define REF_ERR_NO_INODE_REF (1 << 2)
234 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
235 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
236 #define REF_ERR_DUP_INODE_REF (1 << 5)
237 #define REF_ERR_INDEX_UNMATCH (1 << 6)
238 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
239 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
240 #define REF_ERR_NO_ROOT_REF (1 << 9)
241 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
242 #define REF_ERR_DUP_ROOT_REF (1 << 11)
243 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
245 struct file_extent_hole {
251 struct inode_record {
252 struct list_head backrefs;
253 unsigned int checked:1;
254 unsigned int merging:1;
255 unsigned int found_inode_item:1;
256 unsigned int found_dir_item:1;
257 unsigned int found_file_extent:1;
258 unsigned int found_csum_item:1;
259 unsigned int some_csum_missing:1;
260 unsigned int nodatasum:1;
273 struct rb_root holes;
274 struct list_head orphan_extents;
279 #define I_ERR_NO_INODE_ITEM (1 << 0)
280 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
281 #define I_ERR_DUP_INODE_ITEM (1 << 2)
282 #define I_ERR_DUP_DIR_INDEX (1 << 3)
283 #define I_ERR_ODD_DIR_ITEM (1 << 4)
284 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
285 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
286 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
287 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
288 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
289 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
290 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
291 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
292 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
293 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
295 struct root_backref {
296 struct list_head list;
297 unsigned int found_dir_item:1;
298 unsigned int found_dir_index:1;
299 unsigned int found_back_ref:1;
300 unsigned int found_forward_ref:1;
301 unsigned int reachable:1;
310 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 return list_entry(entry, struct root_backref, list);
316 struct list_head backrefs;
317 struct cache_extent cache;
318 unsigned int found_root_item:1;
324 struct cache_extent cache;
329 struct cache_extent cache;
330 struct cache_tree root_cache;
331 struct cache_tree inode_cache;
332 struct inode_record *current;
341 struct walk_control {
342 struct cache_tree shared;
343 struct shared_node *nodes[BTRFS_MAX_LEVEL];
349 struct btrfs_key key;
351 struct list_head list;
354 struct extent_entry {
359 struct list_head list;
362 struct root_item_info {
363 /* level of the root */
365 /* number of nodes at this level, must be 1 for a root */
369 struct cache_extent cache_extent;
373 * Error bit for low memory mode check.
375 * Currently no caller cares about it yet. Just internal use for error
378 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
379 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
380 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
381 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
382 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
383 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
384 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
385 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
386 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
387 #define CHUNK_TYPE_MISMATCH (1 << 8)
389 static void *print_status_check(void *p)
391 struct task_ctx *priv = p;
392 const char work_indicator[] = { '.', 'o', 'O', 'o' };
394 static char *task_position_string[] = {
396 "checking free space cache",
400 task_period_start(priv->info, 1000 /* 1s */);
402 if (priv->tp == TASK_NOTHING)
406 printf("%s [%c]\r", task_position_string[priv->tp],
407 work_indicator[count % 4]);
410 task_period_wait(priv->info);
415 static int print_status_return(void *p)
423 static enum btrfs_check_mode parse_check_mode(const char *str)
425 if (strcmp(str, "lowmem") == 0)
426 return CHECK_MODE_LOWMEM;
427 if (strcmp(str, "orig") == 0)
428 return CHECK_MODE_ORIGINAL;
429 if (strcmp(str, "original") == 0)
430 return CHECK_MODE_ORIGINAL;
432 return CHECK_MODE_UNKNOWN;
435 /* Compatible function to allow reuse of old codes */
436 static u64 first_extent_gap(struct rb_root *holes)
438 struct file_extent_hole *hole;
440 if (RB_EMPTY_ROOT(holes))
443 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
447 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 struct file_extent_hole *hole1;
450 struct file_extent_hole *hole2;
452 hole1 = rb_entry(node1, struct file_extent_hole, node);
453 hole2 = rb_entry(node2, struct file_extent_hole, node);
455 if (hole1->start > hole2->start)
457 if (hole1->start < hole2->start)
459 /* Now hole1->start == hole2->start */
460 if (hole1->len >= hole2->len)
462 * Hole 1 will be merge center
463 * Same hole will be merged later
466 /* Hole 2 will be merge center */
471 * Add a hole to the record
473 * This will do hole merge for copy_file_extent_holes(),
474 * which will ensure there won't be continuous holes.
476 static int add_file_extent_hole(struct rb_root *holes,
479 struct file_extent_hole *hole;
480 struct file_extent_hole *prev = NULL;
481 struct file_extent_hole *next = NULL;
483 hole = malloc(sizeof(*hole));
488 /* Since compare will not return 0, no -EEXIST will happen */
489 rb_insert(holes, &hole->node, compare_hole);
491 /* simple merge with previous hole */
492 if (rb_prev(&hole->node))
493 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495 if (prev && prev->start + prev->len >= hole->start) {
496 hole->len = hole->start + hole->len - prev->start;
497 hole->start = prev->start;
498 rb_erase(&prev->node, holes);
503 /* iterate merge with next holes */
505 if (!rb_next(&hole->node))
507 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509 if (hole->start + hole->len >= next->start) {
510 if (hole->start + hole->len <= next->start + next->len)
511 hole->len = next->start + next->len -
513 rb_erase(&next->node, holes);
522 static int compare_hole_range(struct rb_node *node, void *data)
524 struct file_extent_hole *hole;
527 hole = (struct file_extent_hole *)data;
530 hole = rb_entry(node, struct file_extent_hole, node);
531 if (start < hole->start)
533 if (start >= hole->start && start < hole->start + hole->len)
539 * Delete a hole in the record
541 * This will do the hole split and is much restrict than add.
543 static int del_file_extent_hole(struct rb_root *holes,
546 struct file_extent_hole *hole;
547 struct file_extent_hole tmp;
552 struct rb_node *node;
559 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 hole = rb_entry(node, struct file_extent_hole, node);
563 if (start + len > hole->start + hole->len)
567 * Now there will be no overlap, delete the hole and re-add the
568 * split(s) if they exists.
570 if (start > hole->start) {
571 prev_start = hole->start;
572 prev_len = start - hole->start;
575 if (hole->start + hole->len > start + len) {
576 next_start = start + len;
577 next_len = hole->start + hole->len - start - len;
580 rb_erase(node, holes);
583 ret = add_file_extent_hole(holes, prev_start, prev_len);
588 ret = add_file_extent_hole(holes, next_start, next_len);
595 static int copy_file_extent_holes(struct rb_root *dst,
598 struct file_extent_hole *hole;
599 struct rb_node *node;
602 node = rb_first(src);
604 hole = rb_entry(node, struct file_extent_hole, node);
605 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 node = rb_next(node);
613 static void free_file_extent_holes(struct rb_root *holes)
615 struct rb_node *node;
616 struct file_extent_hole *hole;
618 node = rb_first(holes);
620 hole = rb_entry(node, struct file_extent_hole, node);
621 rb_erase(node, holes);
623 node = rb_first(holes);
627 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629 static void record_root_in_trans(struct btrfs_trans_handle *trans,
630 struct btrfs_root *root)
632 if (root->last_trans != trans->transid) {
633 root->track_dirty = 1;
634 root->last_trans = trans->transid;
635 root->commit_root = root->node;
636 extent_buffer_get(root->node);
640 static u8 imode_to_type(u32 imode)
643 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
644 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
645 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
646 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
647 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
648 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
649 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
650 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
653 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
657 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 struct device_record *rec1;
660 struct device_record *rec2;
662 rec1 = rb_entry(node1, struct device_record, node);
663 rec2 = rb_entry(node2, struct device_record, node);
664 if (rec1->devid > rec2->devid)
666 else if (rec1->devid < rec2->devid)
672 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 struct inode_record *rec;
675 struct inode_backref *backref;
676 struct inode_backref *orig;
677 struct inode_backref *tmp;
678 struct orphan_data_extent *src_orphan;
679 struct orphan_data_extent *dst_orphan;
684 rec = malloc(sizeof(*rec));
686 return ERR_PTR(-ENOMEM);
687 memcpy(rec, orig_rec, sizeof(*rec));
689 INIT_LIST_HEAD(&rec->backrefs);
690 INIT_LIST_HEAD(&rec->orphan_extents);
691 rec->holes = RB_ROOT;
693 list_for_each_entry(orig, &orig_rec->backrefs, list) {
694 size = sizeof(*orig) + orig->namelen + 1;
695 backref = malloc(size);
700 memcpy(backref, orig, size);
701 list_add_tail(&backref->list, &rec->backrefs);
703 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
704 dst_orphan = malloc(sizeof(*dst_orphan));
709 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
710 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
719 rb = rb_first(&rec->holes);
721 struct file_extent_hole *hole;
723 hole = rb_entry(rb, struct file_extent_hole, node);
729 if (!list_empty(&rec->backrefs))
730 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
731 list_del(&orig->list);
735 if (!list_empty(&rec->orphan_extents))
736 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
737 list_del(&orig->list);
746 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 struct orphan_data_extent *orphan;
751 if (list_empty(orphan_extents))
753 printf("The following data extent is lost in tree %llu:\n",
755 list_for_each_entry(orphan, orphan_extents, list) {
756 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
757 orphan->objectid, orphan->offset, orphan->disk_bytenr,
762 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 u64 root_objectid = root->root_key.objectid;
765 int errors = rec->errors;
769 /* reloc root errors, we print its corresponding fs root objectid*/
770 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
771 root_objectid = root->root_key.offset;
772 fprintf(stderr, "reloc");
774 fprintf(stderr, "root %llu inode %llu errors %x",
775 (unsigned long long) root_objectid,
776 (unsigned long long) rec->ino, rec->errors);
778 if (errors & I_ERR_NO_INODE_ITEM)
779 fprintf(stderr, ", no inode item");
780 if (errors & I_ERR_NO_ORPHAN_ITEM)
781 fprintf(stderr, ", no orphan item");
782 if (errors & I_ERR_DUP_INODE_ITEM)
783 fprintf(stderr, ", dup inode item");
784 if (errors & I_ERR_DUP_DIR_INDEX)
785 fprintf(stderr, ", dup dir index");
786 if (errors & I_ERR_ODD_DIR_ITEM)
787 fprintf(stderr, ", odd dir item");
788 if (errors & I_ERR_ODD_FILE_EXTENT)
789 fprintf(stderr, ", odd file extent");
790 if (errors & I_ERR_BAD_FILE_EXTENT)
791 fprintf(stderr, ", bad file extent");
792 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
793 fprintf(stderr, ", file extent overlap");
794 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
795 fprintf(stderr, ", file extent discount");
796 if (errors & I_ERR_DIR_ISIZE_WRONG)
797 fprintf(stderr, ", dir isize wrong");
798 if (errors & I_ERR_FILE_NBYTES_WRONG)
799 fprintf(stderr, ", nbytes wrong");
800 if (errors & I_ERR_ODD_CSUM_ITEM)
801 fprintf(stderr, ", odd csum item");
802 if (errors & I_ERR_SOME_CSUM_MISSING)
803 fprintf(stderr, ", some csum missing");
804 if (errors & I_ERR_LINK_COUNT_WRONG)
805 fprintf(stderr, ", link count wrong");
806 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
807 fprintf(stderr, ", orphan file extent");
808 fprintf(stderr, "\n");
809 /* Print the orphan extents if needed */
810 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
811 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813 /* Print the holes if needed */
814 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
815 struct file_extent_hole *hole;
816 struct rb_node *node;
819 node = rb_first(&rec->holes);
820 fprintf(stderr, "Found file extent holes:\n");
823 hole = rb_entry(node, struct file_extent_hole, node);
824 fprintf(stderr, "\tstart: %llu, len: %llu\n",
825 hole->start, hole->len);
826 node = rb_next(node);
829 fprintf(stderr, "\tstart: 0, len: %llu\n",
831 root->fs_info->sectorsize));
835 static void print_ref_error(int errors)
837 if (errors & REF_ERR_NO_DIR_ITEM)
838 fprintf(stderr, ", no dir item");
839 if (errors & REF_ERR_NO_DIR_INDEX)
840 fprintf(stderr, ", no dir index");
841 if (errors & REF_ERR_NO_INODE_REF)
842 fprintf(stderr, ", no inode ref");
843 if (errors & REF_ERR_DUP_DIR_ITEM)
844 fprintf(stderr, ", dup dir item");
845 if (errors & REF_ERR_DUP_DIR_INDEX)
846 fprintf(stderr, ", dup dir index");
847 if (errors & REF_ERR_DUP_INODE_REF)
848 fprintf(stderr, ", dup inode ref");
849 if (errors & REF_ERR_INDEX_UNMATCH)
850 fprintf(stderr, ", index mismatch");
851 if (errors & REF_ERR_FILETYPE_UNMATCH)
852 fprintf(stderr, ", filetype mismatch");
853 if (errors & REF_ERR_NAME_TOO_LONG)
854 fprintf(stderr, ", name too long");
855 if (errors & REF_ERR_NO_ROOT_REF)
856 fprintf(stderr, ", no root ref");
857 if (errors & REF_ERR_NO_ROOT_BACKREF)
858 fprintf(stderr, ", no root backref");
859 if (errors & REF_ERR_DUP_ROOT_REF)
860 fprintf(stderr, ", dup root ref");
861 if (errors & REF_ERR_DUP_ROOT_BACKREF)
862 fprintf(stderr, ", dup root backref");
863 fprintf(stderr, "\n");
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869 struct ptr_node *node;
870 struct cache_extent *cache;
871 struct inode_record *rec = NULL;
874 cache = lookup_cache_extent(inode_cache, ino, 1);
876 node = container_of(cache, struct ptr_node, cache);
878 if (mod && rec->refs > 1) {
879 node->data = clone_inode_rec(rec);
880 if (IS_ERR(node->data))
886 rec = calloc(1, sizeof(*rec));
888 return ERR_PTR(-ENOMEM);
890 rec->extent_start = (u64)-1;
892 INIT_LIST_HEAD(&rec->backrefs);
893 INIT_LIST_HEAD(&rec->orphan_extents);
894 rec->holes = RB_ROOT;
896 node = malloc(sizeof(*node));
899 return ERR_PTR(-ENOMEM);
901 node->cache.start = ino;
902 node->cache.size = 1;
905 if (ino == BTRFS_FREE_INO_OBJECTID)
908 ret = insert_cache_extent(inode_cache, &node->cache);
910 return ERR_PTR(-EEXIST);
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 struct orphan_data_extent *orphan;
919 while (!list_empty(orphan_extents)) {
920 orphan = list_entry(orphan_extents->next,
921 struct orphan_data_extent, list);
922 list_del(&orphan->list);
927 static void free_inode_rec(struct inode_record *rec)
929 struct inode_backref *backref;
934 while (!list_empty(&rec->backrefs)) {
935 backref = to_inode_backref(rec->backrefs.next);
936 list_del(&backref->list);
939 free_orphan_data_extents(&rec->orphan_extents);
940 free_file_extent_holes(&rec->holes);
944 static int can_free_inode_rec(struct inode_record *rec)
946 if (!rec->errors && rec->checked && rec->found_inode_item &&
947 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953 struct inode_record *rec)
955 struct cache_extent *cache;
956 struct inode_backref *tmp, *backref;
957 struct ptr_node *node;
960 if (!rec->found_inode_item)
963 filetype = imode_to_type(rec->imode);
964 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965 if (backref->found_dir_item && backref->found_dir_index) {
966 if (backref->filetype != filetype)
967 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968 if (!backref->errors && backref->found_inode_ref &&
969 rec->nlink == rec->found_link) {
970 list_del(&backref->list);
976 if (!rec->checked || rec->merging)
979 if (S_ISDIR(rec->imode)) {
980 if (rec->found_size != rec->isize)
981 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982 if (rec->found_file_extent)
983 rec->errors |= I_ERR_ODD_FILE_EXTENT;
984 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985 if (rec->found_dir_item)
986 rec->errors |= I_ERR_ODD_DIR_ITEM;
987 if (rec->found_size != rec->nbytes)
988 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989 if (rec->nlink > 0 && !no_holes &&
990 (rec->extent_end < rec->isize ||
991 first_extent_gap(&rec->holes) < rec->isize))
992 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996 if (rec->found_csum_item && rec->nodatasum)
997 rec->errors |= I_ERR_ODD_CSUM_ITEM;
998 if (rec->some_csum_missing && !rec->nodatasum)
999 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002 BUG_ON(rec->refs != 1);
1003 if (can_free_inode_rec(rec)) {
1004 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005 node = container_of(cache, struct ptr_node, cache);
1006 BUG_ON(node->data != rec);
1007 remove_cache_extent(inode_cache, &node->cache);
1009 free_inode_rec(rec);
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 struct btrfs_path path;
1016 struct btrfs_key key;
1019 key.objectid = BTRFS_ORPHAN_OBJECTID;
1020 key.type = BTRFS_ORPHAN_ITEM_KEY;
1023 btrfs_init_path(&path);
1024 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025 btrfs_release_path(&path);
1031 static int process_inode_item(struct extent_buffer *eb,
1032 int slot, struct btrfs_key *key,
1033 struct shared_node *active_node)
1035 struct inode_record *rec;
1036 struct btrfs_inode_item *item;
1038 rec = active_node->current;
1039 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040 if (rec->found_inode_item) {
1041 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045 rec->nlink = btrfs_inode_nlink(eb, item);
1046 rec->isize = btrfs_inode_size(eb, item);
1047 rec->nbytes = btrfs_inode_nbytes(eb, item);
1048 rec->imode = btrfs_inode_mode(eb, item);
1049 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->found_inode_item = 1;
1052 if (rec->nlink == 0)
1053 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054 maybe_free_inode_rec(&active_node->inode_cache, rec);
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 int namelen, u64 dir)
1062 struct inode_backref *backref;
1064 list_for_each_entry(backref, &rec->backrefs, list) {
1065 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 if (backref->dir != dir || backref->namelen != namelen)
1069 if (memcmp(name, backref->name, namelen))
1074 backref = malloc(sizeof(*backref) + namelen + 1);
1077 memset(backref, 0, sizeof(*backref));
1079 backref->namelen = namelen;
1080 memcpy(backref->name, name, namelen);
1081 backref->name[namelen] = '\0';
1082 list_add_tail(&backref->list, &rec->backrefs);
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087 u64 ino, u64 dir, u64 index,
1088 const char *name, int namelen,
1089 u8 filetype, u8 itemtype, int errors)
1091 struct inode_record *rec;
1092 struct inode_backref *backref;
1094 rec = get_inode_rec(inode_cache, ino, 1);
1095 BUG_ON(IS_ERR(rec));
1096 backref = get_inode_backref(rec, name, namelen, dir);
1099 backref->errors |= errors;
1100 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101 if (backref->found_dir_index)
1102 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103 if (backref->found_inode_ref && backref->index != index)
1104 backref->errors |= REF_ERR_INDEX_UNMATCH;
1105 if (backref->found_dir_item && backref->filetype != filetype)
1106 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108 backref->index = index;
1109 backref->filetype = filetype;
1110 backref->found_dir_index = 1;
1111 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 if (backref->found_dir_item)
1114 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115 if (backref->found_dir_index && backref->filetype != filetype)
1116 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118 backref->filetype = filetype;
1119 backref->found_dir_item = 1;
1120 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122 if (backref->found_inode_ref)
1123 backref->errors |= REF_ERR_DUP_INODE_REF;
1124 if (backref->found_dir_index && backref->index != index)
1125 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 backref->index = index;
1129 backref->ref_type = itemtype;
1130 backref->found_inode_ref = 1;
1135 maybe_free_inode_rec(inode_cache, rec);
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140 struct cache_tree *dst_cache)
1142 struct inode_backref *backref;
1147 list_for_each_entry(backref, &src->backrefs, list) {
1148 if (backref->found_dir_index) {
1149 add_inode_backref(dst_cache, dst->ino, backref->dir,
1150 backref->index, backref->name,
1151 backref->namelen, backref->filetype,
1152 BTRFS_DIR_INDEX_KEY, backref->errors);
1154 if (backref->found_dir_item) {
1156 add_inode_backref(dst_cache, dst->ino,
1157 backref->dir, 0, backref->name,
1158 backref->namelen, backref->filetype,
1159 BTRFS_DIR_ITEM_KEY, backref->errors);
1161 if (backref->found_inode_ref) {
1162 add_inode_backref(dst_cache, dst->ino,
1163 backref->dir, backref->index,
1164 backref->name, backref->namelen, 0,
1165 backref->ref_type, backref->errors);
1169 if (src->found_dir_item)
1170 dst->found_dir_item = 1;
1171 if (src->found_file_extent)
1172 dst->found_file_extent = 1;
1173 if (src->found_csum_item)
1174 dst->found_csum_item = 1;
1175 if (src->some_csum_missing)
1176 dst->some_csum_missing = 1;
1177 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1183 BUG_ON(src->found_link < dir_count);
1184 dst->found_link += src->found_link - dir_count;
1185 dst->found_size += src->found_size;
1186 if (src->extent_start != (u64)-1) {
1187 if (dst->extent_start == (u64)-1) {
1188 dst->extent_start = src->extent_start;
1189 dst->extent_end = src->extent_end;
1191 if (dst->extent_end > src->extent_start)
1192 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193 else if (dst->extent_end < src->extent_start) {
1194 ret = add_file_extent_hole(&dst->holes,
1196 src->extent_start - dst->extent_end);
1198 if (dst->extent_end < src->extent_end)
1199 dst->extent_end = src->extent_end;
1203 dst->errors |= src->errors;
1204 if (src->found_inode_item) {
1205 if (!dst->found_inode_item) {
1206 dst->nlink = src->nlink;
1207 dst->isize = src->isize;
1208 dst->nbytes = src->nbytes;
1209 dst->imode = src->imode;
1210 dst->nodatasum = src->nodatasum;
1211 dst->found_inode_item = 1;
1213 dst->errors |= I_ERR_DUP_INODE_ITEM;
1221 static int splice_shared_node(struct shared_node *src_node,
1222 struct shared_node *dst_node)
1224 struct cache_extent *cache;
1225 struct ptr_node *node, *ins;
1226 struct cache_tree *src, *dst;
1227 struct inode_record *rec, *conflict;
1228 u64 current_ino = 0;
1232 if (--src_node->refs == 0)
1234 if (src_node->current)
1235 current_ino = src_node->current->ino;
1237 src = &src_node->root_cache;
1238 dst = &dst_node->root_cache;
1240 cache = search_cache_extent(src, 0);
1242 node = container_of(cache, struct ptr_node, cache);
1244 cache = next_cache_extent(cache);
1247 remove_cache_extent(src, &node->cache);
1250 ins = malloc(sizeof(*ins));
1252 ins->cache.start = node->cache.start;
1253 ins->cache.size = node->cache.size;
1257 ret = insert_cache_extent(dst, &ins->cache);
1258 if (ret == -EEXIST) {
1259 conflict = get_inode_rec(dst, rec->ino, 1);
1260 BUG_ON(IS_ERR(conflict));
1261 merge_inode_recs(rec, conflict, dst);
1263 conflict->checked = 1;
1264 if (dst_node->current == conflict)
1265 dst_node->current = NULL;
1267 maybe_free_inode_rec(dst, conflict);
1268 free_inode_rec(rec);
1275 if (src == &src_node->root_cache) {
1276 src = &src_node->inode_cache;
1277 dst = &dst_node->inode_cache;
1281 if (current_ino > 0 && (!dst_node->current ||
1282 current_ino > dst_node->current->ino)) {
1283 if (dst_node->current) {
1284 dst_node->current->checked = 1;
1285 maybe_free_inode_rec(dst, dst_node->current);
1287 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288 BUG_ON(IS_ERR(dst_node->current));
1293 static void free_inode_ptr(struct cache_extent *cache)
1295 struct ptr_node *node;
1296 struct inode_record *rec;
1298 node = container_of(cache, struct ptr_node, cache);
1300 free_inode_rec(rec);
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309 struct cache_extent *cache;
1310 struct shared_node *node;
1312 cache = lookup_cache_extent(shared, bytenr, 1);
1314 node = container_of(cache, struct shared_node, cache);
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 struct shared_node *node;
1325 node = calloc(1, sizeof(*node));
1328 node->cache.start = bytenr;
1329 node->cache.size = 1;
1330 cache_tree_init(&node->root_cache);
1331 cache_tree_init(&node->inode_cache);
1334 ret = insert_cache_extent(shared, &node->cache);
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340 struct walk_control *wc, int level)
1342 struct shared_node *node;
1343 struct shared_node *dest;
1346 if (level == wc->active_node)
1349 BUG_ON(wc->active_node <= level);
1350 node = find_shared_node(&wc->shared, bytenr);
1352 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 node = find_shared_node(&wc->shared, bytenr);
1355 wc->nodes[level] = node;
1356 wc->active_node = level;
1360 if (wc->root_level == wc->active_node &&
1361 btrfs_root_refs(&root->root_item) == 0) {
1362 if (--node->refs == 0) {
1363 free_inode_recs_tree(&node->root_cache);
1364 free_inode_recs_tree(&node->inode_cache);
1365 remove_cache_extent(&wc->shared, &node->cache);
1371 dest = wc->nodes[wc->active_node];
1372 splice_shared_node(node, dest);
1373 if (node->refs == 0) {
1374 remove_cache_extent(&wc->shared, &node->cache);
1380 static int leave_shared_node(struct btrfs_root *root,
1381 struct walk_control *wc, int level)
1383 struct shared_node *node;
1384 struct shared_node *dest;
1387 if (level == wc->root_level)
1390 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1394 BUG_ON(i >= BTRFS_MAX_LEVEL);
1396 node = wc->nodes[wc->active_node];
1397 wc->nodes[wc->active_node] = NULL;
1398 wc->active_node = i;
1400 dest = wc->nodes[wc->active_node];
1401 if (wc->active_node < wc->root_level ||
1402 btrfs_root_refs(&root->root_item) > 0) {
1403 BUG_ON(node->refs <= 1);
1404 splice_shared_node(node, dest);
1406 BUG_ON(node->refs < 2);
1415 * 1 - if the root with id child_root_id is a child of root parent_root_id
1416 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1417 * has other root(s) as parent(s)
1418 * 2 - if the root child_root_id doesn't have any parent roots
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423 struct btrfs_path path;
1424 struct btrfs_key key;
1425 struct extent_buffer *leaf;
1429 btrfs_init_path(&path);
1431 key.objectid = parent_root_id;
1432 key.type = BTRFS_ROOT_REF_KEY;
1433 key.offset = child_root_id;
1434 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1438 btrfs_release_path(&path);
1442 key.objectid = child_root_id;
1443 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1451 leaf = path.nodes[0];
1452 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456 leaf = path.nodes[0];
1459 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460 if (key.objectid != child_root_id ||
1461 key.type != BTRFS_ROOT_BACKREF_KEY)
1466 if (key.offset == parent_root_id) {
1467 btrfs_release_path(&path);
1474 btrfs_release_path(&path);
1477 return has_parent ? 0 : 2;
1480 static int process_dir_item(struct extent_buffer *eb,
1481 int slot, struct btrfs_key *key,
1482 struct shared_node *active_node)
1492 struct btrfs_dir_item *di;
1493 struct inode_record *rec;
1494 struct cache_tree *root_cache;
1495 struct cache_tree *inode_cache;
1496 struct btrfs_key location;
1497 char namebuf[BTRFS_NAME_LEN];
1499 root_cache = &active_node->root_cache;
1500 inode_cache = &active_node->inode_cache;
1501 rec = active_node->current;
1502 rec->found_dir_item = 1;
1504 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1505 total = btrfs_item_size_nr(eb, slot);
1506 while (cur < total) {
1508 btrfs_dir_item_key_to_cpu(eb, di, &location);
1509 name_len = btrfs_dir_name_len(eb, di);
1510 data_len = btrfs_dir_data_len(eb, di);
1511 filetype = btrfs_dir_type(eb, di);
1513 rec->found_size += name_len;
1514 if (cur + sizeof(*di) + name_len > total ||
1515 name_len > BTRFS_NAME_LEN) {
1516 error = REF_ERR_NAME_TOO_LONG;
1518 if (cur + sizeof(*di) > total)
1520 len = min_t(u32, total - cur - sizeof(*di),
1527 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529 if (key->type == BTRFS_DIR_ITEM_KEY &&
1530 key->offset != btrfs_name_hash(namebuf, len)) {
1531 rec->errors |= I_ERR_ODD_DIR_ITEM;
1532 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1533 key->objectid, key->offset, namebuf, len, filetype,
1534 key->offset, btrfs_name_hash(namebuf, len));
1537 if (location.type == BTRFS_INODE_ITEM_KEY) {
1538 add_inode_backref(inode_cache, location.objectid,
1539 key->objectid, key->offset, namebuf,
1540 len, filetype, key->type, error);
1541 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1542 add_inode_backref(root_cache, location.objectid,
1543 key->objectid, key->offset,
1544 namebuf, len, filetype,
1547 fprintf(stderr, "invalid location in dir item %u\n",
1549 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1550 key->objectid, key->offset, namebuf,
1551 len, filetype, key->type, error);
1554 len = sizeof(*di) + name_len + data_len;
1555 di = (struct btrfs_dir_item *)((char *)di + len);
1558 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1559 rec->errors |= I_ERR_DUP_DIR_INDEX;
1564 static int process_inode_ref(struct extent_buffer *eb,
1565 int slot, struct btrfs_key *key,
1566 struct shared_node *active_node)
1574 struct cache_tree *inode_cache;
1575 struct btrfs_inode_ref *ref;
1576 char namebuf[BTRFS_NAME_LEN];
1578 inode_cache = &active_node->inode_cache;
1580 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1581 total = btrfs_item_size_nr(eb, slot);
1582 while (cur < total) {
1583 name_len = btrfs_inode_ref_name_len(eb, ref);
1584 index = btrfs_inode_ref_index(eb, ref);
1586 /* inode_ref + namelen should not cross item boundary */
1587 if (cur + sizeof(*ref) + name_len > total ||
1588 name_len > BTRFS_NAME_LEN) {
1589 if (total < cur + sizeof(*ref))
1592 /* Still try to read out the remaining part */
1593 len = min_t(u32, total - cur - sizeof(*ref),
1595 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1602 add_inode_backref(inode_cache, key->objectid, key->offset,
1603 index, namebuf, len, 0, key->type, error);
1605 len = sizeof(*ref) + name_len;
1606 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1612 static int process_inode_extref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1623 struct cache_tree *inode_cache;
1624 struct btrfs_inode_extref *extref;
1625 char namebuf[BTRFS_NAME_LEN];
1627 inode_cache = &active_node->inode_cache;
1629 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1630 total = btrfs_item_size_nr(eb, slot);
1631 while (cur < total) {
1632 name_len = btrfs_inode_extref_name_len(eb, extref);
1633 index = btrfs_inode_extref_index(eb, extref);
1634 parent = btrfs_inode_extref_parent(eb, extref);
1635 if (name_len <= BTRFS_NAME_LEN) {
1639 len = BTRFS_NAME_LEN;
1640 error = REF_ERR_NAME_TOO_LONG;
1642 read_extent_buffer(eb, namebuf,
1643 (unsigned long)(extref + 1), len);
1644 add_inode_backref(inode_cache, key->objectid, parent,
1645 index, namebuf, len, 0, key->type, error);
1647 len = sizeof(*extref) + name_len;
1648 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1655 static int count_csum_range(struct btrfs_root *root, u64 start,
1656 u64 len, u64 *found)
1658 struct btrfs_key key;
1659 struct btrfs_path path;
1660 struct extent_buffer *leaf;
1665 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667 btrfs_init_path(&path);
1669 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671 key.type = BTRFS_EXTENT_CSUM_KEY;
1673 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1677 if (ret > 0 && path.slots[0] > 0) {
1678 leaf = path.nodes[0];
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1680 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1681 key.type == BTRFS_EXTENT_CSUM_KEY)
1686 leaf = path.nodes[0];
1687 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1688 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1693 leaf = path.nodes[0];
1696 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1697 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1698 key.type != BTRFS_EXTENT_CSUM_KEY)
1701 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1702 if (key.offset >= start + len)
1705 if (key.offset > start)
1708 size = btrfs_item_size_nr(leaf, path.slots[0]);
1709 csum_end = key.offset + (size / csum_size) *
1710 root->fs_info->sectorsize;
1711 if (csum_end > start) {
1712 size = min(csum_end - start, len);
1721 btrfs_release_path(&path);
1727 static int process_file_extent(struct btrfs_root *root,
1728 struct extent_buffer *eb,
1729 int slot, struct btrfs_key *key,
1730 struct shared_node *active_node)
1732 struct inode_record *rec;
1733 struct btrfs_file_extent_item *fi;
1735 u64 disk_bytenr = 0;
1736 u64 extent_offset = 0;
1737 u64 mask = root->fs_info->sectorsize - 1;
1741 rec = active_node->current;
1742 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743 rec->found_file_extent = 1;
1745 if (rec->extent_start == (u64)-1) {
1746 rec->extent_start = key->offset;
1747 rec->extent_end = key->offset;
1750 if (rec->extent_end > key->offset)
1751 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752 else if (rec->extent_end < key->offset) {
1753 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754 key->offset - rec->extent_end);
1759 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760 extent_type = btrfs_file_extent_type(eb, fi);
1762 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766 rec->found_size += num_bytes;
1767 num_bytes = (num_bytes + mask) & ~mask;
1768 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772 extent_offset = btrfs_file_extent_offset(eb, fi);
1773 if (num_bytes == 0 || (num_bytes & mask))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (num_bytes + extent_offset >
1776 btrfs_file_extent_ram_bytes(eb, fi))
1777 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779 (btrfs_file_extent_compression(eb, fi) ||
1780 btrfs_file_extent_encryption(eb, fi) ||
1781 btrfs_file_extent_other_encoding(eb, fi)))
1782 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783 if (disk_bytenr > 0)
1784 rec->found_size += num_bytes;
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788 rec->extent_end = key->offset + num_bytes;
1791 * The data reloc tree will copy full extents into its inode and then
1792 * copy the corresponding csums. Because the extent it copied could be
1793 * a preallocated extent that hasn't been written to yet there may be no
1794 * csums to copy, ergo we won't have csums for our file extent. This is
1795 * ok so just don't bother checking csums if the inode belongs to the
1798 if (disk_bytenr > 0 &&
1799 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801 if (btrfs_file_extent_compression(eb, fi))
1802 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804 disk_bytenr += extent_offset;
1806 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811 rec->found_csum_item = 1;
1812 if (found < num_bytes)
1813 rec->some_csum_missing = 1;
1814 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823 struct walk_control *wc)
1825 struct btrfs_key key;
1829 struct cache_tree *inode_cache;
1830 struct shared_node *active_node;
1832 if (wc->root_level == wc->active_node &&
1833 btrfs_root_refs(&root->root_item) == 0)
1836 active_node = wc->nodes[wc->active_node];
1837 inode_cache = &active_node->inode_cache;
1838 nritems = btrfs_header_nritems(eb);
1839 for (i = 0; i < nritems; i++) {
1840 btrfs_item_key_to_cpu(eb, &key, i);
1842 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847 if (active_node->current == NULL ||
1848 active_node->current->ino < key.objectid) {
1849 if (active_node->current) {
1850 active_node->current->checked = 1;
1851 maybe_free_inode_rec(inode_cache,
1852 active_node->current);
1854 active_node->current = get_inode_rec(inode_cache,
1856 BUG_ON(IS_ERR(active_node->current));
1859 case BTRFS_DIR_ITEM_KEY:
1860 case BTRFS_DIR_INDEX_KEY:
1861 ret = process_dir_item(eb, i, &key, active_node);
1863 case BTRFS_INODE_REF_KEY:
1864 ret = process_inode_ref(eb, i, &key, active_node);
1866 case BTRFS_INODE_EXTREF_KEY:
1867 ret = process_inode_extref(eb, i, &key, active_node);
1869 case BTRFS_INODE_ITEM_KEY:
1870 ret = process_inode_item(eb, i, &key, active_node);
1872 case BTRFS_EXTENT_DATA_KEY:
1873 ret = process_file_extent(root, eb, i, &key,
1884 u64 bytenr[BTRFS_MAX_LEVEL];
1885 u64 refs[BTRFS_MAX_LEVEL];
1886 int need_check[BTRFS_MAX_LEVEL];
1889 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1890 struct node_refs *nrefs, u64 level);
1891 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1892 unsigned int ext_ref);
1895 * Returns >0 Found error, not fatal, should continue
1896 * Returns <0 Fatal error, must exit the whole check
1897 * Returns 0 No errors found
1899 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1900 struct node_refs *nrefs, int *level, int ext_ref)
1902 struct extent_buffer *cur = path->nodes[0];
1903 struct btrfs_key key;
1907 int root_level = btrfs_header_level(root->node);
1909 int ret = 0; /* Final return value */
1910 int err = 0; /* Positive error bitmap */
1912 cur_bytenr = cur->start;
1914 /* skip to first inode item or the first inode number change */
1915 nritems = btrfs_header_nritems(cur);
1916 for (i = 0; i < nritems; i++) {
1917 btrfs_item_key_to_cpu(cur, &key, i);
1919 first_ino = key.objectid;
1920 if (key.type == BTRFS_INODE_ITEM_KEY ||
1921 (first_ino && first_ino != key.objectid))
1925 path->slots[0] = nritems;
1931 err |= check_inode_item(root, path, ext_ref);
1933 if (err & LAST_ITEM)
1936 /* still have inode items in thie leaf */
1937 if (cur->start == cur_bytenr)
1941 * we have switched to another leaf, above nodes may
1942 * have changed, here walk down the path, if a node
1943 * or leaf is shared, check whether we can skip this
1946 for (i = root_level; i >= 0; i--) {
1947 if (path->nodes[i]->start == nrefs->bytenr[i])
1950 ret = update_nodes_refs(root,
1951 path->nodes[i]->start,
1956 if (!nrefs->need_check[i]) {
1962 for (i = 0; i < *level; i++) {
1963 free_extent_buffer(path->nodes[i]);
1964 path->nodes[i] = NULL;
1973 static void reada_walk_down(struct btrfs_root *root,
1974 struct extent_buffer *node, int slot)
1976 struct btrfs_fs_info *fs_info = root->fs_info;
1983 level = btrfs_header_level(node);
1987 nritems = btrfs_header_nritems(node);
1988 for (i = slot; i < nritems; i++) {
1989 bytenr = btrfs_node_blockptr(node, i);
1990 ptr_gen = btrfs_node_ptr_generation(node, i);
1991 readahead_tree_block(fs_info, bytenr, ptr_gen);
1996 * Check the child node/leaf by the following condition:
1997 * 1. the first item key of the node/leaf should be the same with the one
1999 * 2. block in parent node should match the child node/leaf.
2000 * 3. generation of parent node and child's header should be consistent.
2002 * Or the child node/leaf pointed by the key in parent is not valid.
2004 * We hope to check leaf owner too, but since subvol may share leaves,
2005 * which makes leaf owner check not so strong, key check should be
2006 * sufficient enough for that case.
2008 static int check_child_node(struct extent_buffer *parent, int slot,
2009 struct extent_buffer *child)
2011 struct btrfs_key parent_key;
2012 struct btrfs_key child_key;
2015 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2016 if (btrfs_header_level(child) == 0)
2017 btrfs_item_key_to_cpu(child, &child_key, 0);
2019 btrfs_node_key_to_cpu(child, &child_key, 0);
2021 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2024 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2025 parent_key.objectid, parent_key.type, parent_key.offset,
2026 child_key.objectid, child_key.type, child_key.offset);
2028 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2030 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2031 btrfs_node_blockptr(parent, slot),
2032 btrfs_header_bytenr(child));
2034 if (btrfs_node_ptr_generation(parent, slot) !=
2035 btrfs_header_generation(child)) {
2037 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2038 btrfs_header_generation(child),
2039 btrfs_node_ptr_generation(parent, slot));
2045 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2046 * in every fs or file tree check. Here we find its all root ids, and only check
2047 * it in the fs or file tree which has the smallest root id.
2049 static int need_check(struct btrfs_root *root, struct ulist *roots)
2051 struct rb_node *node;
2052 struct ulist_node *u;
2054 if (roots->nnodes == 1)
2057 node = rb_first(&roots->root);
2058 u = rb_entry(node, struct ulist_node, rb_node);
2060 * current root id is not smallest, we skip it and let it be checked
2061 * in the fs or file tree who hash the smallest root id.
2063 if (root->objectid != u->val)
2070 * for a tree node or leaf, we record its reference count, so later if we still
2071 * process this node or leaf, don't need to compute its reference count again.
2073 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2074 struct node_refs *nrefs, u64 level)
2078 struct ulist *roots;
2080 if (nrefs->bytenr[level] != bytenr) {
2081 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2082 level, 1, &refs, NULL);
2086 nrefs->bytenr[level] = bytenr;
2087 nrefs->refs[level] = refs;
2089 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2094 check = need_check(root, roots);
2096 nrefs->need_check[level] = check;
2098 nrefs->need_check[level] = 1;
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106 struct walk_control *wc, int *level,
2107 struct node_refs *nrefs)
2109 enum btrfs_tree_block_status status;
2112 struct btrfs_fs_info *fs_info = root->fs_info;
2113 struct extent_buffer *next;
2114 struct extent_buffer *cur;
2118 WARN_ON(*level < 0);
2119 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2121 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122 refs = nrefs->refs[*level];
2125 ret = btrfs_lookup_extent_info(NULL, root,
2126 path->nodes[*level]->start,
2127 *level, 1, &refs, NULL);
2132 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133 nrefs->refs[*level] = refs;
2137 ret = enter_shared_node(root, path->nodes[*level]->start,
2145 while (*level >= 0) {
2146 WARN_ON(*level < 0);
2147 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148 cur = path->nodes[*level];
2150 if (btrfs_header_level(cur) != *level)
2153 if (path->slots[*level] >= btrfs_header_nritems(cur))
2156 ret = process_one_leaf(root, cur, wc);
2161 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2164 if (bytenr == nrefs->bytenr[*level - 1]) {
2165 refs = nrefs->refs[*level - 1];
2167 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168 *level - 1, 1, &refs, NULL);
2172 nrefs->bytenr[*level - 1] = bytenr;
2173 nrefs->refs[*level - 1] = refs;
2178 ret = enter_shared_node(root, bytenr, refs,
2181 path->slots[*level]++;
2186 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188 free_extent_buffer(next);
2189 reada_walk_down(root, cur, path->slots[*level]);
2190 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191 if (!extent_buffer_uptodate(next)) {
2192 struct btrfs_key node_key;
2194 btrfs_node_key_to_cpu(path->nodes[*level],
2196 path->slots[*level]);
2197 btrfs_add_corrupt_extent_record(root->fs_info,
2199 path->nodes[*level]->start,
2200 root->fs_info->nodesize,
2207 ret = check_child_node(cur, path->slots[*level], next);
2209 free_extent_buffer(next);
2214 if (btrfs_is_leaf(next))
2215 status = btrfs_check_leaf(root, NULL, next);
2217 status = btrfs_check_node(root, NULL, next);
2218 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219 free_extent_buffer(next);
2224 *level = *level - 1;
2225 free_extent_buffer(path->nodes[*level]);
2226 path->nodes[*level] = next;
2227 path->slots[*level] = 0;
2230 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2234 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2235 unsigned int ext_ref);
2238 * Returns >0 Found error, should continue
2239 * Returns <0 Fatal error, must exit the whole check
2240 * Returns 0 No errors found
2242 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2243 int *level, struct node_refs *nrefs, int ext_ref)
2245 enum btrfs_tree_block_status status;
2248 struct btrfs_fs_info *fs_info = root->fs_info;
2249 struct extent_buffer *next;
2250 struct extent_buffer *cur;
2253 WARN_ON(*level < 0);
2254 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2256 ret = update_nodes_refs(root, path->nodes[*level]->start,
2261 while (*level >= 0) {
2262 WARN_ON(*level < 0);
2263 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264 cur = path->nodes[*level];
2266 if (btrfs_header_level(cur) != *level)
2269 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271 /* Don't forgot to check leaf/node validation */
2273 ret = btrfs_check_leaf(root, NULL, cur);
2274 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278 ret = process_one_leaf_v2(root, path, nrefs,
2282 ret = btrfs_check_node(root, NULL, cur);
2283 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2288 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2291 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2294 if (!nrefs->need_check[*level - 1]) {
2295 path->slots[*level]++;
2299 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2300 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301 free_extent_buffer(next);
2302 reada_walk_down(root, cur, path->slots[*level]);
2303 next = read_tree_block(fs_info, bytenr, ptr_gen);
2304 if (!extent_buffer_uptodate(next)) {
2305 struct btrfs_key node_key;
2307 btrfs_node_key_to_cpu(path->nodes[*level],
2309 path->slots[*level]);
2310 btrfs_add_corrupt_extent_record(fs_info,
2312 path->nodes[*level]->start,
2320 ret = check_child_node(cur, path->slots[*level], next);
2324 if (btrfs_is_leaf(next))
2325 status = btrfs_check_leaf(root, NULL, next);
2327 status = btrfs_check_node(root, NULL, next);
2328 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2329 free_extent_buffer(next);
2334 *level = *level - 1;
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = next;
2337 path->slots[*level] = 0;
2342 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2343 struct walk_control *wc, int *level)
2346 struct extent_buffer *leaf;
2348 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349 leaf = path->nodes[i];
2350 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355 free_extent_buffer(path->nodes[*level]);
2356 path->nodes[*level] = NULL;
2357 BUG_ON(*level > wc->active_node);
2358 if (*level == wc->active_node)
2359 leave_shared_node(root, wc, *level);
2366 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2370 struct extent_buffer *leaf;
2372 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2373 leaf = path->nodes[i];
2374 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2379 free_extent_buffer(path->nodes[*level]);
2380 path->nodes[*level] = NULL;
2387 static int check_root_dir(struct inode_record *rec)
2389 struct inode_backref *backref;
2392 if (!rec->found_inode_item || rec->errors)
2394 if (rec->nlink != 1 || rec->found_link != 0)
2396 if (list_empty(&rec->backrefs))
2398 backref = to_inode_backref(rec->backrefs.next);
2399 if (!backref->found_inode_ref)
2401 if (backref->index != 0 || backref->namelen != 2 ||
2402 memcmp(backref->name, "..", 2))
2404 if (backref->found_dir_index || backref->found_dir_item)
2411 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2412 struct btrfs_root *root, struct btrfs_path *path,
2413 struct inode_record *rec)
2415 struct btrfs_inode_item *ei;
2416 struct btrfs_key key;
2419 key.objectid = rec->ino;
2420 key.type = BTRFS_INODE_ITEM_KEY;
2421 key.offset = (u64)-1;
2423 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2427 if (!path->slots[0]) {
2434 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2435 if (key.objectid != rec->ino) {
2440 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2441 struct btrfs_inode_item);
2442 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2443 btrfs_mark_buffer_dirty(path->nodes[0]);
2444 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2445 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2446 root->root_key.objectid);
2448 btrfs_release_path(path);
2452 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2453 struct btrfs_root *root,
2454 struct btrfs_path *path,
2455 struct inode_record *rec)
2459 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2460 btrfs_release_path(path);
2462 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2466 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2467 struct btrfs_root *root,
2468 struct btrfs_path *path,
2469 struct inode_record *rec)
2471 struct btrfs_inode_item *ei;
2472 struct btrfs_key key;
2475 key.objectid = rec->ino;
2476 key.type = BTRFS_INODE_ITEM_KEY;
2479 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2486 /* Since ret == 0, no need to check anything */
2487 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2488 struct btrfs_inode_item);
2489 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2490 btrfs_mark_buffer_dirty(path->nodes[0]);
2491 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2492 printf("reset nbytes for ino %llu root %llu\n",
2493 rec->ino, root->root_key.objectid);
2495 btrfs_release_path(path);
2499 static int add_missing_dir_index(struct btrfs_root *root,
2500 struct cache_tree *inode_cache,
2501 struct inode_record *rec,
2502 struct inode_backref *backref)
2504 struct btrfs_path path;
2505 struct btrfs_trans_handle *trans;
2506 struct btrfs_dir_item *dir_item;
2507 struct extent_buffer *leaf;
2508 struct btrfs_key key;
2509 struct btrfs_disk_key disk_key;
2510 struct inode_record *dir_rec;
2511 unsigned long name_ptr;
2512 u32 data_size = sizeof(*dir_item) + backref->namelen;
2515 trans = btrfs_start_transaction(root, 1);
2517 return PTR_ERR(trans);
2519 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2520 (unsigned long long)rec->ino);
2522 btrfs_init_path(&path);
2523 key.objectid = backref->dir;
2524 key.type = BTRFS_DIR_INDEX_KEY;
2525 key.offset = backref->index;
2526 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2529 leaf = path.nodes[0];
2530 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2532 disk_key.objectid = cpu_to_le64(rec->ino);
2533 disk_key.type = BTRFS_INODE_ITEM_KEY;
2534 disk_key.offset = 0;
2536 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2537 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2538 btrfs_set_dir_data_len(leaf, dir_item, 0);
2539 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2540 name_ptr = (unsigned long)(dir_item + 1);
2541 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2542 btrfs_mark_buffer_dirty(leaf);
2543 btrfs_release_path(&path);
2544 btrfs_commit_transaction(trans, root);
2546 backref->found_dir_index = 1;
2547 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2548 BUG_ON(IS_ERR(dir_rec));
2551 dir_rec->found_size += backref->namelen;
2552 if (dir_rec->found_size == dir_rec->isize &&
2553 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2554 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2555 if (dir_rec->found_size != dir_rec->isize)
2556 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2561 static int delete_dir_index(struct btrfs_root *root,
2562 struct inode_backref *backref)
2564 struct btrfs_trans_handle *trans;
2565 struct btrfs_dir_item *di;
2566 struct btrfs_path path;
2569 trans = btrfs_start_transaction(root, 1);
2571 return PTR_ERR(trans);
2573 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2574 (unsigned long long)backref->dir,
2575 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2576 (unsigned long long)root->objectid);
2578 btrfs_init_path(&path);
2579 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2580 backref->name, backref->namelen,
2581 backref->index, -1);
2584 btrfs_release_path(&path);
2585 btrfs_commit_transaction(trans, root);
2592 ret = btrfs_del_item(trans, root, &path);
2594 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2596 btrfs_release_path(&path);
2597 btrfs_commit_transaction(trans, root);
2601 static int create_inode_item(struct btrfs_root *root,
2602 struct inode_record *rec,
2605 struct btrfs_trans_handle *trans;
2606 struct btrfs_inode_item inode_item;
2607 time_t now = time(NULL);
2610 trans = btrfs_start_transaction(root, 1);
2611 if (IS_ERR(trans)) {
2612 ret = PTR_ERR(trans);
2616 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2617 "be incomplete, please check permissions and content after "
2618 "the fsck completes.\n", (unsigned long long)root->objectid,
2619 (unsigned long long)rec->ino);
2621 memset(&inode_item, 0, sizeof(inode_item));
2622 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2624 btrfs_set_stack_inode_nlink(&inode_item, 1);
2626 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2627 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2628 if (rec->found_dir_item) {
2629 if (rec->found_file_extent)
2630 fprintf(stderr, "root %llu inode %llu has both a dir "
2631 "item and extents, unsure if it is a dir or a "
2632 "regular file so setting it as a directory\n",
2633 (unsigned long long)root->objectid,
2634 (unsigned long long)rec->ino);
2635 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2636 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2637 } else if (!rec->found_dir_item) {
2638 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2639 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2641 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2642 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2643 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2644 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2645 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2646 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2647 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2648 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2650 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2652 btrfs_commit_transaction(trans, root);
2656 static int repair_inode_backrefs(struct btrfs_root *root,
2657 struct inode_record *rec,
2658 struct cache_tree *inode_cache,
2661 struct inode_backref *tmp, *backref;
2662 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2666 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2667 if (!delete && rec->ino == root_dirid) {
2668 if (!rec->found_inode_item) {
2669 ret = create_inode_item(root, rec, 1);
2676 /* Index 0 for root dir's are special, don't mess with it */
2677 if (rec->ino == root_dirid && backref->index == 0)
2681 ((backref->found_dir_index && !backref->found_inode_ref) ||
2682 (backref->found_dir_index && backref->found_inode_ref &&
2683 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2684 ret = delete_dir_index(root, backref);
2688 list_del(&backref->list);
2693 if (!delete && !backref->found_dir_index &&
2694 backref->found_dir_item && backref->found_inode_ref) {
2695 ret = add_missing_dir_index(root, inode_cache, rec,
2700 if (backref->found_dir_item &&
2701 backref->found_dir_index) {
2702 if (!backref->errors &&
2703 backref->found_inode_ref) {
2704 list_del(&backref->list);
2711 if (!delete && (!backref->found_dir_index &&
2712 !backref->found_dir_item &&
2713 backref->found_inode_ref)) {
2714 struct btrfs_trans_handle *trans;
2715 struct btrfs_key location;
2717 ret = check_dir_conflict(root, backref->name,
2723 * let nlink fixing routine to handle it,
2724 * which can do it better.
2729 location.objectid = rec->ino;
2730 location.type = BTRFS_INODE_ITEM_KEY;
2731 location.offset = 0;
2733 trans = btrfs_start_transaction(root, 1);
2734 if (IS_ERR(trans)) {
2735 ret = PTR_ERR(trans);
2738 fprintf(stderr, "adding missing dir index/item pair "
2740 (unsigned long long)rec->ino);
2741 ret = btrfs_insert_dir_item(trans, root, backref->name,
2743 backref->dir, &location,
2744 imode_to_type(rec->imode),
2747 btrfs_commit_transaction(trans, root);
2751 if (!delete && (backref->found_inode_ref &&
2752 backref->found_dir_index &&
2753 backref->found_dir_item &&
2754 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2755 !rec->found_inode_item)) {
2756 ret = create_inode_item(root, rec, 0);
2763 return ret ? ret : repaired;
2767 * To determine the file type for nlink/inode_item repair
2769 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2770 * Return -ENOENT if file type is not found.
2772 static int find_file_type(struct inode_record *rec, u8 *type)
2774 struct inode_backref *backref;
2776 /* For inode item recovered case */
2777 if (rec->found_inode_item) {
2778 *type = imode_to_type(rec->imode);
2782 list_for_each_entry(backref, &rec->backrefs, list) {
2783 if (backref->found_dir_index || backref->found_dir_item) {
2784 *type = backref->filetype;
2792 * To determine the file name for nlink repair
2794 * Return 0 if file name is found, set name and namelen.
2795 * Return -ENOENT if file name is not found.
2797 static int find_file_name(struct inode_record *rec,
2798 char *name, int *namelen)
2800 struct inode_backref *backref;
2802 list_for_each_entry(backref, &rec->backrefs, list) {
2803 if (backref->found_dir_index || backref->found_dir_item ||
2804 backref->found_inode_ref) {
2805 memcpy(name, backref->name, backref->namelen);
2806 *namelen = backref->namelen;
2813 /* Reset the nlink of the inode to the correct one */
2814 static int reset_nlink(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct btrfs_path *path,
2817 struct inode_record *rec)
2819 struct inode_backref *backref;
2820 struct inode_backref *tmp;
2821 struct btrfs_key key;
2822 struct btrfs_inode_item *inode_item;
2825 /* We don't believe this either, reset it and iterate backref */
2826 rec->found_link = 0;
2828 /* Remove all backref including the valid ones */
2829 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2830 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2831 backref->index, backref->name,
2832 backref->namelen, 0);
2836 /* remove invalid backref, so it won't be added back */
2837 if (!(backref->found_dir_index &&
2838 backref->found_dir_item &&
2839 backref->found_inode_ref)) {
2840 list_del(&backref->list);
2847 /* Set nlink to 0 */
2848 key.objectid = rec->ino;
2849 key.type = BTRFS_INODE_ITEM_KEY;
2851 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2858 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2859 struct btrfs_inode_item);
2860 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2861 btrfs_mark_buffer_dirty(path->nodes[0]);
2862 btrfs_release_path(path);
2865 * Add back valid inode_ref/dir_item/dir_index,
2866 * add_link() will handle the nlink inc, so new nlink must be correct
2868 list_for_each_entry(backref, &rec->backrefs, list) {
2869 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2870 backref->name, backref->namelen,
2871 backref->filetype, &backref->index, 1);
2876 btrfs_release_path(path);
2880 static int get_highest_inode(struct btrfs_trans_handle *trans,
2881 struct btrfs_root *root,
2882 struct btrfs_path *path,
2885 struct btrfs_key key, found_key;
2888 btrfs_init_path(path);
2889 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2891 key.type = BTRFS_INODE_ITEM_KEY;
2892 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2894 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2895 path->slots[0] - 1);
2896 *highest_ino = found_key.objectid;
2899 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2901 btrfs_release_path(path);
2905 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 char *dir_name = "lost+found";
2911 char namebuf[BTRFS_NAME_LEN] = {0};
2916 int name_recovered = 0;
2917 int type_recovered = 0;
2921 * Get file name and type first before these invalid inode ref
2922 * are deleted by remove_all_invalid_backref()
2924 name_recovered = !find_file_name(rec, namebuf, &namelen);
2925 type_recovered = !find_file_type(rec, &type);
2927 if (!name_recovered) {
2928 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2929 rec->ino, rec->ino);
2930 namelen = count_digits(rec->ino);
2931 sprintf(namebuf, "%llu", rec->ino);
2934 if (!type_recovered) {
2935 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2937 type = BTRFS_FT_REG_FILE;
2941 ret = reset_nlink(trans, root, path, rec);
2944 "Failed to reset nlink for inode %llu: %s\n",
2945 rec->ino, strerror(-ret));
2949 if (rec->found_link == 0) {
2950 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2954 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2955 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2958 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2959 dir_name, strerror(-ret));
2962 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2963 namebuf, namelen, type, NULL, 1);
2965 * Add ".INO" suffix several times to handle case where
2966 * "FILENAME.INO" is already taken by another file.
2968 while (ret == -EEXIST) {
2970 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2972 if (namelen + count_digits(rec->ino) + 1 >
2977 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2979 namelen += count_digits(rec->ino) + 1;
2980 ret = btrfs_add_link(trans, root, rec->ino,
2981 lost_found_ino, namebuf,
2982 namelen, type, NULL, 1);
2986 "Failed to link the inode %llu to %s dir: %s\n",
2987 rec->ino, dir_name, strerror(-ret));
2991 * Just increase the found_link, don't actually add the
2992 * backref. This will make things easier and this inode
2993 * record will be freed after the repair is done.
2994 * So fsck will not report problem about this inode.
2997 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2998 namelen, namebuf, dir_name);
3000 printf("Fixed the nlink of inode %llu\n", rec->ino);
3003 * Clear the flag anyway, or we will loop forever for the same inode
3004 * as it will not be removed from the bad inode list and the dead loop
3007 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3008 btrfs_release_path(path);
3013 * Check if there is any normal(reg or prealloc) file extent for given
3015 * This is used to determine the file type when neither its dir_index/item or
3016 * inode_item exists.
3018 * This will *NOT* report error, if any error happens, just consider it does
3019 * not have any normal file extent.
3021 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3023 struct btrfs_path path;
3024 struct btrfs_key key;
3025 struct btrfs_key found_key;
3026 struct btrfs_file_extent_item *fi;
3030 btrfs_init_path(&path);
3032 key.type = BTRFS_EXTENT_DATA_KEY;
3035 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3040 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3041 ret = btrfs_next_leaf(root, &path);
3048 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3050 if (found_key.objectid != ino ||
3051 found_key.type != BTRFS_EXTENT_DATA_KEY)
3053 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3054 struct btrfs_file_extent_item);
3055 type = btrfs_file_extent_type(path.nodes[0], fi);
3056 if (type != BTRFS_FILE_EXTENT_INLINE) {
3062 btrfs_release_path(&path);
3066 static u32 btrfs_type_to_imode(u8 type)
3068 static u32 imode_by_btrfs_type[] = {
3069 [BTRFS_FT_REG_FILE] = S_IFREG,
3070 [BTRFS_FT_DIR] = S_IFDIR,
3071 [BTRFS_FT_CHRDEV] = S_IFCHR,
3072 [BTRFS_FT_BLKDEV] = S_IFBLK,
3073 [BTRFS_FT_FIFO] = S_IFIFO,
3074 [BTRFS_FT_SOCK] = S_IFSOCK,
3075 [BTRFS_FT_SYMLINK] = S_IFLNK,
3078 return imode_by_btrfs_type[(type)];
3081 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3082 struct btrfs_root *root,
3083 struct btrfs_path *path,
3084 struct inode_record *rec)
3088 int type_recovered = 0;
3091 printf("Trying to rebuild inode:%llu\n", rec->ino);
3093 type_recovered = !find_file_type(rec, &filetype);
3096 * Try to determine inode type if type not found.
3098 * For found regular file extent, it must be FILE.
3099 * For found dir_item/index, it must be DIR.
3101 * For undetermined one, use FILE as fallback.
3104 * 1. If found backref(inode_index/item is already handled) to it,
3106 * Need new inode-inode ref structure to allow search for that.
3108 if (!type_recovered) {
3109 if (rec->found_file_extent &&
3110 find_normal_file_extent(root, rec->ino)) {
3112 filetype = BTRFS_FT_REG_FILE;
3113 } else if (rec->found_dir_item) {
3115 filetype = BTRFS_FT_DIR;
3116 } else if (!list_empty(&rec->orphan_extents)) {
3118 filetype = BTRFS_FT_REG_FILE;
3120 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3123 filetype = BTRFS_FT_REG_FILE;
3127 ret = btrfs_new_inode(trans, root, rec->ino,
3128 mode | btrfs_type_to_imode(filetype));
3133 * Here inode rebuild is done, we only rebuild the inode item,
3134 * don't repair the nlink(like move to lost+found).
3135 * That is the job of nlink repair.
3137 * We just fill the record and return
3139 rec->found_dir_item = 1;
3140 rec->imode = mode | btrfs_type_to_imode(filetype);
3142 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3143 /* Ensure the inode_nlinks repair function will be called */
3144 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3149 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3150 struct btrfs_root *root,
3151 struct btrfs_path *path,
3152 struct inode_record *rec)
3154 struct orphan_data_extent *orphan;
3155 struct orphan_data_extent *tmp;
3158 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3160 * Check for conflicting file extents
3162 * Here we don't know whether the extents is compressed or not,
3163 * so we can only assume it not compressed nor data offset,
3164 * and use its disk_len as extent length.
3166 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3167 orphan->offset, orphan->disk_len, 0);
3168 btrfs_release_path(path);
3173 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3174 orphan->disk_bytenr, orphan->disk_len);
3175 ret = btrfs_free_extent(trans,
3176 root->fs_info->extent_root,
3177 orphan->disk_bytenr, orphan->disk_len,
3178 0, root->objectid, orphan->objectid,
3183 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3184 orphan->offset, orphan->disk_bytenr,
3185 orphan->disk_len, orphan->disk_len);
3189 /* Update file size info */
3190 rec->found_size += orphan->disk_len;
3191 if (rec->found_size == rec->nbytes)
3192 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3194 /* Update the file extent hole info too */
3195 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3199 if (RB_EMPTY_ROOT(&rec->holes))
3200 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3202 list_del(&orphan->list);
3205 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3210 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3211 struct btrfs_root *root,
3212 struct btrfs_path *path,
3213 struct inode_record *rec)
3215 struct rb_node *node;
3216 struct file_extent_hole *hole;
3220 node = rb_first(&rec->holes);
3224 hole = rb_entry(node, struct file_extent_hole, node);
3225 ret = btrfs_punch_hole(trans, root, rec->ino,
3226 hole->start, hole->len);
3229 ret = del_file_extent_hole(&rec->holes, hole->start,
3233 if (RB_EMPTY_ROOT(&rec->holes))
3234 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3235 node = rb_first(&rec->holes);
3237 /* special case for a file losing all its file extent */
3239 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3240 round_up(rec->isize,
3241 root->fs_info->sectorsize));
3245 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3246 rec->ino, root->objectid);
3251 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3253 struct btrfs_trans_handle *trans;
3254 struct btrfs_path path;
3257 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3258 I_ERR_NO_ORPHAN_ITEM |
3259 I_ERR_LINK_COUNT_WRONG |
3260 I_ERR_NO_INODE_ITEM |
3261 I_ERR_FILE_EXTENT_ORPHAN |
3262 I_ERR_FILE_EXTENT_DISCOUNT|
3263 I_ERR_FILE_NBYTES_WRONG)))
3267 * For nlink repair, it may create a dir and add link, so
3268 * 2 for parent(256)'s dir_index and dir_item
3269 * 2 for lost+found dir's inode_item and inode_ref
3270 * 1 for the new inode_ref of the file
3271 * 2 for lost+found dir's dir_index and dir_item for the file
3273 trans = btrfs_start_transaction(root, 7);
3275 return PTR_ERR(trans);
3277 btrfs_init_path(&path);
3278 if (rec->errors & I_ERR_NO_INODE_ITEM)
3279 ret = repair_inode_no_item(trans, root, &path, rec);
3280 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3281 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3283 ret = repair_inode_discount_extent(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3285 ret = repair_inode_isize(trans, root, &path, rec);
3286 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3287 ret = repair_inode_orphan_item(trans, root, &path, rec);
3288 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3289 ret = repair_inode_nlinks(trans, root, &path, rec);
3290 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3291 ret = repair_inode_nbytes(trans, root, &path, rec);
3292 btrfs_commit_transaction(trans, root);
3293 btrfs_release_path(&path);
3297 static int check_inode_recs(struct btrfs_root *root,
3298 struct cache_tree *inode_cache)
3300 struct cache_extent *cache;
3301 struct ptr_node *node;
3302 struct inode_record *rec;
3303 struct inode_backref *backref;
3308 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3310 if (btrfs_root_refs(&root->root_item) == 0) {
3311 if (!cache_tree_empty(inode_cache))
3312 fprintf(stderr, "warning line %d\n", __LINE__);
3317 * We need to repair backrefs first because we could change some of the
3318 * errors in the inode recs.
3320 * We also need to go through and delete invalid backrefs first and then
3321 * add the correct ones second. We do this because we may get EEXIST
3322 * when adding back the correct index because we hadn't yet deleted the
3325 * For example, if we were missing a dir index then the directories
3326 * isize would be wrong, so if we fixed the isize to what we thought it
3327 * would be and then fixed the backref we'd still have a invalid fs, so
3328 * we need to add back the dir index and then check to see if the isize
3333 if (stage == 3 && !err)
3336 cache = search_cache_extent(inode_cache, 0);
3337 while (repair && cache) {
3338 node = container_of(cache, struct ptr_node, cache);
3340 cache = next_cache_extent(cache);
3342 /* Need to free everything up and rescan */
3344 remove_cache_extent(inode_cache, &node->cache);
3346 free_inode_rec(rec);
3350 if (list_empty(&rec->backrefs))
3353 ret = repair_inode_backrefs(root, rec, inode_cache,
3367 rec = get_inode_rec(inode_cache, root_dirid, 0);
3368 BUG_ON(IS_ERR(rec));
3370 ret = check_root_dir(rec);
3372 fprintf(stderr, "root %llu root dir %llu error\n",
3373 (unsigned long long)root->root_key.objectid,
3374 (unsigned long long)root_dirid);
3375 print_inode_error(root, rec);
3380 struct btrfs_trans_handle *trans;
3382 trans = btrfs_start_transaction(root, 1);
3383 if (IS_ERR(trans)) {
3384 err = PTR_ERR(trans);
3389 "root %llu missing its root dir, recreating\n",
3390 (unsigned long long)root->objectid);
3392 ret = btrfs_make_root_dir(trans, root, root_dirid);
3395 btrfs_commit_transaction(trans, root);
3399 fprintf(stderr, "root %llu root dir %llu not found\n",
3400 (unsigned long long)root->root_key.objectid,
3401 (unsigned long long)root_dirid);
3405 cache = search_cache_extent(inode_cache, 0);
3408 node = container_of(cache, struct ptr_node, cache);
3410 remove_cache_extent(inode_cache, &node->cache);
3412 if (rec->ino == root_dirid ||
3413 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3414 free_inode_rec(rec);
3418 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3419 ret = check_orphan_item(root, rec->ino);
3421 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3422 if (can_free_inode_rec(rec)) {
3423 free_inode_rec(rec);
3428 if (!rec->found_inode_item)
3429 rec->errors |= I_ERR_NO_INODE_ITEM;
3430 if (rec->found_link != rec->nlink)
3431 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3433 ret = try_repair_inode(root, rec);
3434 if (ret == 0 && can_free_inode_rec(rec)) {
3435 free_inode_rec(rec);
3441 if (!(repair && ret == 0))
3443 print_inode_error(root, rec);
3444 list_for_each_entry(backref, &rec->backrefs, list) {
3445 if (!backref->found_dir_item)
3446 backref->errors |= REF_ERR_NO_DIR_ITEM;
3447 if (!backref->found_dir_index)
3448 backref->errors |= REF_ERR_NO_DIR_INDEX;
3449 if (!backref->found_inode_ref)
3450 backref->errors |= REF_ERR_NO_INODE_REF;
3451 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3452 " namelen %u name %s filetype %d errors %x",
3453 (unsigned long long)backref->dir,
3454 (unsigned long long)backref->index,
3455 backref->namelen, backref->name,
3456 backref->filetype, backref->errors);
3457 print_ref_error(backref->errors);
3459 free_inode_rec(rec);
3461 return (error > 0) ? -1 : 0;
3464 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3467 struct cache_extent *cache;
3468 struct root_record *rec = NULL;
3471 cache = lookup_cache_extent(root_cache, objectid, 1);
3473 rec = container_of(cache, struct root_record, cache);
3475 rec = calloc(1, sizeof(*rec));
3477 return ERR_PTR(-ENOMEM);
3478 rec->objectid = objectid;
3479 INIT_LIST_HEAD(&rec->backrefs);
3480 rec->cache.start = objectid;
3481 rec->cache.size = 1;
3483 ret = insert_cache_extent(root_cache, &rec->cache);
3485 return ERR_PTR(-EEXIST);
3490 static struct root_backref *get_root_backref(struct root_record *rec,
3491 u64 ref_root, u64 dir, u64 index,
3492 const char *name, int namelen)
3494 struct root_backref *backref;
3496 list_for_each_entry(backref, &rec->backrefs, list) {
3497 if (backref->ref_root != ref_root || backref->dir != dir ||
3498 backref->namelen != namelen)
3500 if (memcmp(name, backref->name, namelen))
3505 backref = calloc(1, sizeof(*backref) + namelen + 1);
3508 backref->ref_root = ref_root;
3510 backref->index = index;
3511 backref->namelen = namelen;
3512 memcpy(backref->name, name, namelen);
3513 backref->name[namelen] = '\0';
3514 list_add_tail(&backref->list, &rec->backrefs);
3518 static void free_root_record(struct cache_extent *cache)
3520 struct root_record *rec;
3521 struct root_backref *backref;
3523 rec = container_of(cache, struct root_record, cache);
3524 while (!list_empty(&rec->backrefs)) {
3525 backref = to_root_backref(rec->backrefs.next);
3526 list_del(&backref->list);
3533 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3535 static int add_root_backref(struct cache_tree *root_cache,
3536 u64 root_id, u64 ref_root, u64 dir, u64 index,
3537 const char *name, int namelen,
3538 int item_type, int errors)
3540 struct root_record *rec;
3541 struct root_backref *backref;
3543 rec = get_root_rec(root_cache, root_id);
3544 BUG_ON(IS_ERR(rec));
3545 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3548 backref->errors |= errors;
3550 if (item_type != BTRFS_DIR_ITEM_KEY) {
3551 if (backref->found_dir_index || backref->found_back_ref ||
3552 backref->found_forward_ref) {
3553 if (backref->index != index)
3554 backref->errors |= REF_ERR_INDEX_UNMATCH;
3556 backref->index = index;
3560 if (item_type == BTRFS_DIR_ITEM_KEY) {
3561 if (backref->found_forward_ref)
3563 backref->found_dir_item = 1;
3564 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3565 backref->found_dir_index = 1;
3566 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3567 if (backref->found_forward_ref)
3568 backref->errors |= REF_ERR_DUP_ROOT_REF;
3569 else if (backref->found_dir_item)
3571 backref->found_forward_ref = 1;
3572 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3573 if (backref->found_back_ref)
3574 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3575 backref->found_back_ref = 1;
3580 if (backref->found_forward_ref && backref->found_dir_item)
3581 backref->reachable = 1;
3585 static int merge_root_recs(struct btrfs_root *root,
3586 struct cache_tree *src_cache,
3587 struct cache_tree *dst_cache)
3589 struct cache_extent *cache;
3590 struct ptr_node *node;
3591 struct inode_record *rec;
3592 struct inode_backref *backref;
3595 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3596 free_inode_recs_tree(src_cache);
3601 cache = search_cache_extent(src_cache, 0);
3604 node = container_of(cache, struct ptr_node, cache);
3606 remove_cache_extent(src_cache, &node->cache);
3609 ret = is_child_root(root, root->objectid, rec->ino);
3615 list_for_each_entry(backref, &rec->backrefs, list) {
3616 BUG_ON(backref->found_inode_ref);
3617 if (backref->found_dir_item)
3618 add_root_backref(dst_cache, rec->ino,
3619 root->root_key.objectid, backref->dir,
3620 backref->index, backref->name,
3621 backref->namelen, BTRFS_DIR_ITEM_KEY,
3623 if (backref->found_dir_index)
3624 add_root_backref(dst_cache, rec->ino,
3625 root->root_key.objectid, backref->dir,
3626 backref->index, backref->name,
3627 backref->namelen, BTRFS_DIR_INDEX_KEY,
3631 free_inode_rec(rec);
3638 static int check_root_refs(struct btrfs_root *root,
3639 struct cache_tree *root_cache)
3641 struct root_record *rec;
3642 struct root_record *ref_root;
3643 struct root_backref *backref;
3644 struct cache_extent *cache;
3650 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3651 BUG_ON(IS_ERR(rec));
3654 /* fixme: this can not detect circular references */
3657 cache = search_cache_extent(root_cache, 0);
3661 rec = container_of(cache, struct root_record, cache);
3662 cache = next_cache_extent(cache);
3664 if (rec->found_ref == 0)
3667 list_for_each_entry(backref, &rec->backrefs, list) {
3668 if (!backref->reachable)
3671 ref_root = get_root_rec(root_cache,
3673 BUG_ON(IS_ERR(ref_root));
3674 if (ref_root->found_ref > 0)
3677 backref->reachable = 0;
3679 if (rec->found_ref == 0)
3685 cache = search_cache_extent(root_cache, 0);
3689 rec = container_of(cache, struct root_record, cache);
3690 cache = next_cache_extent(cache);
3692 if (rec->found_ref == 0 &&
3693 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3694 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3695 ret = check_orphan_item(root->fs_info->tree_root,
3701 * If we don't have a root item then we likely just have
3702 * a dir item in a snapshot for this root but no actual
3703 * ref key or anything so it's meaningless.
3705 if (!rec->found_root_item)
3708 fprintf(stderr, "fs tree %llu not referenced\n",
3709 (unsigned long long)rec->objectid);
3713 if (rec->found_ref > 0 && !rec->found_root_item)
3715 list_for_each_entry(backref, &rec->backrefs, list) {
3716 if (!backref->found_dir_item)
3717 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718 if (!backref->found_dir_index)
3719 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720 if (!backref->found_back_ref)
3721 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3722 if (!backref->found_forward_ref)
3723 backref->errors |= REF_ERR_NO_ROOT_REF;
3724 if (backref->reachable && backref->errors)
3731 fprintf(stderr, "fs tree %llu refs %u %s\n",
3732 (unsigned long long)rec->objectid, rec->found_ref,
3733 rec->found_root_item ? "" : "not found");
3735 list_for_each_entry(backref, &rec->backrefs, list) {
3736 if (!backref->reachable)
3738 if (!backref->errors && rec->found_root_item)
3740 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3741 " index %llu namelen %u name %s errors %x\n",
3742 (unsigned long long)backref->ref_root,
3743 (unsigned long long)backref->dir,
3744 (unsigned long long)backref->index,
3745 backref->namelen, backref->name,
3747 print_ref_error(backref->errors);
3750 return errors > 0 ? 1 : 0;
3753 static int process_root_ref(struct extent_buffer *eb, int slot,
3754 struct btrfs_key *key,
3755 struct cache_tree *root_cache)
3761 struct btrfs_root_ref *ref;
3762 char namebuf[BTRFS_NAME_LEN];
3765 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3767 dirid = btrfs_root_ref_dirid(eb, ref);
3768 index = btrfs_root_ref_sequence(eb, ref);
3769 name_len = btrfs_root_ref_name_len(eb, ref);
3771 if (name_len <= BTRFS_NAME_LEN) {
3775 len = BTRFS_NAME_LEN;
3776 error = REF_ERR_NAME_TOO_LONG;
3778 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3780 if (key->type == BTRFS_ROOT_REF_KEY) {
3781 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3782 index, namebuf, len, key->type, error);
3784 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3785 index, namebuf, len, key->type, error);
3790 static void free_corrupt_block(struct cache_extent *cache)
3792 struct btrfs_corrupt_block *corrupt;
3794 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3798 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3801 * Repair the btree of the given root.
3803 * The fix is to remove the node key in corrupt_blocks cache_tree.
3804 * and rebalance the tree.
3805 * After the fix, the btree should be writeable.
3807 static int repair_btree(struct btrfs_root *root,
3808 struct cache_tree *corrupt_blocks)
3810 struct btrfs_trans_handle *trans;
3811 struct btrfs_path path;
3812 struct btrfs_corrupt_block *corrupt;
3813 struct cache_extent *cache;
3814 struct btrfs_key key;
3819 if (cache_tree_empty(corrupt_blocks))
3822 trans = btrfs_start_transaction(root, 1);
3823 if (IS_ERR(trans)) {
3824 ret = PTR_ERR(trans);
3825 fprintf(stderr, "Error starting transaction: %s\n",
3829 btrfs_init_path(&path);
3830 cache = first_cache_extent(corrupt_blocks);
3832 corrupt = container_of(cache, struct btrfs_corrupt_block,
3834 level = corrupt->level;
3835 path.lowest_level = level;
3836 key.objectid = corrupt->key.objectid;
3837 key.type = corrupt->key.type;
3838 key.offset = corrupt->key.offset;
3841 * Here we don't want to do any tree balance, since it may
3842 * cause a balance with corrupted brother leaf/node,
3843 * so ins_len set to 0 here.
3844 * Balance will be done after all corrupt node/leaf is deleted.
3846 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3849 offset = btrfs_node_blockptr(path.nodes[level],
3852 /* Remove the ptr */
3853 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3857 * Remove the corresponding extent
3858 * return value is not concerned.
3860 btrfs_release_path(&path);
3861 ret = btrfs_free_extent(trans, root, offset,
3862 root->fs_info->nodesize, 0,
3863 root->root_key.objectid, level - 1, 0);
3864 cache = next_cache_extent(cache);
3867 /* Balance the btree using btrfs_search_slot() */
3868 cache = first_cache_extent(corrupt_blocks);
3870 corrupt = container_of(cache, struct btrfs_corrupt_block,
3872 memcpy(&key, &corrupt->key, sizeof(key));
3873 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3876 /* return will always >0 since it won't find the item */
3878 btrfs_release_path(&path);
3879 cache = next_cache_extent(cache);
3882 btrfs_commit_transaction(trans, root);
3883 btrfs_release_path(&path);
3887 static int check_fs_root(struct btrfs_root *root,
3888 struct cache_tree *root_cache,
3889 struct walk_control *wc)
3895 struct btrfs_path path;
3896 struct shared_node root_node;
3897 struct root_record *rec;
3898 struct btrfs_root_item *root_item = &root->root_item;
3899 struct cache_tree corrupt_blocks;
3900 struct orphan_data_extent *orphan;
3901 struct orphan_data_extent *tmp;
3902 enum btrfs_tree_block_status status;
3903 struct node_refs nrefs;
3906 * Reuse the corrupt_block cache tree to record corrupted tree block
3908 * Unlike the usage in extent tree check, here we do it in a per
3909 * fs/subvol tree base.
3911 cache_tree_init(&corrupt_blocks);
3912 root->fs_info->corrupt_blocks = &corrupt_blocks;
3914 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3915 rec = get_root_rec(root_cache, root->root_key.objectid);
3916 BUG_ON(IS_ERR(rec));
3917 if (btrfs_root_refs(root_item) > 0)
3918 rec->found_root_item = 1;
3921 btrfs_init_path(&path);
3922 memset(&root_node, 0, sizeof(root_node));
3923 cache_tree_init(&root_node.root_cache);
3924 cache_tree_init(&root_node.inode_cache);
3925 memset(&nrefs, 0, sizeof(nrefs));
3927 /* Move the orphan extent record to corresponding inode_record */
3928 list_for_each_entry_safe(orphan, tmp,
3929 &root->orphan_data_extents, list) {
3930 struct inode_record *inode;
3932 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3934 BUG_ON(IS_ERR(inode));
3935 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3936 list_move(&orphan->list, &inode->orphan_extents);
3939 level = btrfs_header_level(root->node);
3940 memset(wc->nodes, 0, sizeof(wc->nodes));
3941 wc->nodes[level] = &root_node;
3942 wc->active_node = level;
3943 wc->root_level = level;
3945 /* We may not have checked the root block, lets do that now */
3946 if (btrfs_is_leaf(root->node))
3947 status = btrfs_check_leaf(root, NULL, root->node);
3949 status = btrfs_check_node(root, NULL, root->node);
3950 if (status != BTRFS_TREE_BLOCK_CLEAN)
3953 if (btrfs_root_refs(root_item) > 0 ||
3954 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3955 path.nodes[level] = root->node;
3956 extent_buffer_get(root->node);
3957 path.slots[level] = 0;
3959 struct btrfs_key key;
3960 struct btrfs_disk_key found_key;
3962 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3963 level = root_item->drop_level;
3964 path.lowest_level = level;
3965 if (level > btrfs_header_level(root->node) ||
3966 level >= BTRFS_MAX_LEVEL) {
3967 error("ignoring invalid drop level: %u", level);
3970 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3973 btrfs_node_key(path.nodes[level], &found_key,
3975 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3976 sizeof(found_key)));
3980 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3986 wret = walk_up_tree(root, &path, wc, &level);
3993 btrfs_release_path(&path);
3995 if (!cache_tree_empty(&corrupt_blocks)) {
3996 struct cache_extent *cache;
3997 struct btrfs_corrupt_block *corrupt;
3999 printf("The following tree block(s) is corrupted in tree %llu:\n",
4000 root->root_key.objectid);
4001 cache = first_cache_extent(&corrupt_blocks);
4003 corrupt = container_of(cache,
4004 struct btrfs_corrupt_block,
4006 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4007 cache->start, corrupt->level,
4008 corrupt->key.objectid, corrupt->key.type,
4009 corrupt->key.offset);
4010 cache = next_cache_extent(cache);
4013 printf("Try to repair the btree for root %llu\n",
4014 root->root_key.objectid);
4015 ret = repair_btree(root, &corrupt_blocks);
4017 fprintf(stderr, "Failed to repair btree: %s\n",
4020 printf("Btree for root %llu is fixed\n",
4021 root->root_key.objectid);
4025 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4029 if (root_node.current) {
4030 root_node.current->checked = 1;
4031 maybe_free_inode_rec(&root_node.inode_cache,
4035 err = check_inode_recs(root, &root_node.inode_cache);
4039 free_corrupt_blocks_tree(&corrupt_blocks);
4040 root->fs_info->corrupt_blocks = NULL;
4041 free_orphan_data_extents(&root->orphan_data_extents);
4045 static int fs_root_objectid(u64 objectid)
4047 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4048 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4050 return is_fstree(objectid);
4053 static int check_fs_roots(struct btrfs_root *root,
4054 struct cache_tree *root_cache)
4056 struct btrfs_path path;
4057 struct btrfs_key key;
4058 struct walk_control wc;
4059 struct extent_buffer *leaf, *tree_node;
4060 struct btrfs_root *tmp_root;
4061 struct btrfs_root *tree_root = root->fs_info->tree_root;
4065 if (ctx.progress_enabled) {
4066 ctx.tp = TASK_FS_ROOTS;
4067 task_start(ctx.info);
4071 * Just in case we made any changes to the extent tree that weren't
4072 * reflected into the free space cache yet.
4075 reset_cached_block_groups(root->fs_info);
4076 memset(&wc, 0, sizeof(wc));
4077 cache_tree_init(&wc.shared);
4078 btrfs_init_path(&path);
4083 key.type = BTRFS_ROOT_ITEM_KEY;
4084 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4089 tree_node = tree_root->node;
4091 if (tree_node != tree_root->node) {
4092 free_root_recs_tree(root_cache);
4093 btrfs_release_path(&path);
4096 leaf = path.nodes[0];
4097 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4098 ret = btrfs_next_leaf(tree_root, &path);
4104 leaf = path.nodes[0];
4106 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4107 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4108 fs_root_objectid(key.objectid)) {
4109 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4110 tmp_root = btrfs_read_fs_root_no_cache(
4111 root->fs_info, &key);
4113 key.offset = (u64)-1;
4114 tmp_root = btrfs_read_fs_root(
4115 root->fs_info, &key);
4117 if (IS_ERR(tmp_root)) {
4121 ret = check_fs_root(tmp_root, root_cache, &wc);
4122 if (ret == -EAGAIN) {
4123 free_root_recs_tree(root_cache);
4124 btrfs_release_path(&path);
4129 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4130 btrfs_free_fs_root(tmp_root);
4131 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4132 key.type == BTRFS_ROOT_BACKREF_KEY) {
4133 process_root_ref(leaf, path.slots[0], &key,
4140 btrfs_release_path(&path);
4142 free_extent_cache_tree(&wc.shared);
4143 if (!cache_tree_empty(&wc.shared))
4144 fprintf(stderr, "warning line %d\n", __LINE__);
4146 task_stop(ctx.info);
4152 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4153 * INODE_REF/INODE_EXTREF match.
4155 * @root: the root of the fs/file tree
4156 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4157 * @key: the key of the DIR_ITEM/DIR_INDEX
4158 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4159 * distinguish root_dir between normal dir/file
4160 * @name: the name in the INODE_REF/INODE_EXTREF
4161 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4162 * @mode: the st_mode of INODE_ITEM
4164 * Return 0 if no error occurred.
4165 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4166 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4168 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4169 * not match for normal dir/file.
4171 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4172 struct btrfs_key *key, u64 index, char *name,
4173 u32 namelen, u32 mode)
4175 struct btrfs_path path;
4176 struct extent_buffer *node;
4177 struct btrfs_dir_item *di;
4178 struct btrfs_key location;
4179 char namebuf[BTRFS_NAME_LEN] = {0};
4189 btrfs_init_path(&path);
4190 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4192 ret = DIR_ITEM_MISSING;
4196 /* Process root dir and goto out*/
4199 ret = ROOT_DIR_ERROR;
4201 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4203 ref_key->type == BTRFS_INODE_REF_KEY ?
4205 ref_key->objectid, ref_key->offset,
4206 key->type == BTRFS_DIR_ITEM_KEY ?
4207 "DIR_ITEM" : "DIR_INDEX");
4215 /* Process normal file/dir */
4217 ret = DIR_ITEM_MISSING;
4219 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4221 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4222 ref_key->objectid, ref_key->offset,
4223 key->type == BTRFS_DIR_ITEM_KEY ?
4224 "DIR_ITEM" : "DIR_INDEX",
4225 key->objectid, key->offset, namelen, name,
4226 imode_to_type(mode));
4230 /* Check whether inode_id/filetype/name match */
4231 node = path.nodes[0];
4232 slot = path.slots[0];
4233 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4234 total = btrfs_item_size_nr(node, slot);
4235 while (cur < total) {
4236 ret = DIR_ITEM_MISMATCH;
4237 name_len = btrfs_dir_name_len(node, di);
4238 data_len = btrfs_dir_data_len(node, di);
4240 btrfs_dir_item_key_to_cpu(node, di, &location);
4241 if (location.objectid != ref_key->objectid ||
4242 location.type != BTRFS_INODE_ITEM_KEY ||
4243 location.offset != 0)
4246 filetype = btrfs_dir_type(node, di);
4247 if (imode_to_type(mode) != filetype)
4250 if (cur + sizeof(*di) + name_len > total ||
4251 name_len > BTRFS_NAME_LEN) {
4252 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4254 key->type == BTRFS_DIR_ITEM_KEY ?
4255 "DIR_ITEM" : "DIR_INDEX",
4256 key->objectid, key->offset, name_len);
4258 if (cur + sizeof(*di) > total)
4260 len = min_t(u32, total - cur - sizeof(*di),
4266 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4267 if (len != namelen || strncmp(namebuf, name, len))
4273 len = sizeof(*di) + name_len + data_len;
4274 di = (struct btrfs_dir_item *)((char *)di + len);
4277 if (ret == DIR_ITEM_MISMATCH)
4279 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4281 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4282 ref_key->objectid, ref_key->offset,
4283 key->type == BTRFS_DIR_ITEM_KEY ?
4284 "DIR_ITEM" : "DIR_INDEX",
4285 key->objectid, key->offset, namelen, name,
4286 imode_to_type(mode));
4288 btrfs_release_path(&path);
4293 * Traverse the given INODE_REF and call find_dir_item() to find related
4294 * DIR_ITEM/DIR_INDEX.
4296 * @root: the root of the fs/file tree
4297 * @ref_key: the key of the INODE_REF
4298 * @refs: the count of INODE_REF
4299 * @mode: the st_mode of INODE_ITEM
4301 * Return 0 if no error occurred.
4303 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4304 struct extent_buffer *node, int slot, u64 *refs,
4307 struct btrfs_key key;
4308 struct btrfs_inode_ref *ref;
4309 char namebuf[BTRFS_NAME_LEN] = {0};
4317 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4318 total = btrfs_item_size_nr(node, slot);
4321 /* Update inode ref count */
4324 index = btrfs_inode_ref_index(node, ref);
4325 name_len = btrfs_inode_ref_name_len(node, ref);
4326 if (cur + sizeof(*ref) + name_len > total ||
4327 name_len > BTRFS_NAME_LEN) {
4328 warning("root %llu INODE_REF[%llu %llu] name too long",
4329 root->objectid, ref_key->objectid, ref_key->offset);
4331 if (total < cur + sizeof(*ref))
4333 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4338 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4340 /* Check root dir ref name */
4341 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4342 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4343 root->objectid, ref_key->objectid, ref_key->offset,
4345 err |= ROOT_DIR_ERROR;
4348 /* Find related DIR_INDEX */
4349 key.objectid = ref_key->offset;
4350 key.type = BTRFS_DIR_INDEX_KEY;
4352 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4355 /* Find related dir_item */
4356 key.objectid = ref_key->offset;
4357 key.type = BTRFS_DIR_ITEM_KEY;
4358 key.offset = btrfs_name_hash(namebuf, len);
4359 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4362 len = sizeof(*ref) + name_len;
4363 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4373 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4374 * DIR_ITEM/DIR_INDEX.
4376 * @root: the root of the fs/file tree
4377 * @ref_key: the key of the INODE_EXTREF
4378 * @refs: the count of INODE_EXTREF
4379 * @mode: the st_mode of INODE_ITEM
4381 * Return 0 if no error occurred.
4383 static int check_inode_extref(struct btrfs_root *root,
4384 struct btrfs_key *ref_key,
4385 struct extent_buffer *node, int slot, u64 *refs,
4388 struct btrfs_key key;
4389 struct btrfs_inode_extref *extref;
4390 char namebuf[BTRFS_NAME_LEN] = {0};
4400 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4401 total = btrfs_item_size_nr(node, slot);
4404 /* update inode ref count */
4406 name_len = btrfs_inode_extref_name_len(node, extref);
4407 index = btrfs_inode_extref_index(node, extref);
4408 parent = btrfs_inode_extref_parent(node, extref);
4409 if (name_len <= BTRFS_NAME_LEN) {
4412 len = BTRFS_NAME_LEN;
4413 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4414 root->objectid, ref_key->objectid, ref_key->offset);
4416 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4418 /* Check root dir ref name */
4419 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4420 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4421 root->objectid, ref_key->objectid, ref_key->offset,
4423 err |= ROOT_DIR_ERROR;
4426 /* find related dir_index */
4427 key.objectid = parent;
4428 key.type = BTRFS_DIR_INDEX_KEY;
4430 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4433 /* find related dir_item */
4434 key.objectid = parent;
4435 key.type = BTRFS_DIR_ITEM_KEY;
4436 key.offset = btrfs_name_hash(namebuf, len);
4437 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4440 len = sizeof(*extref) + name_len;
4441 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4451 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4452 * DIR_ITEM/DIR_INDEX match.
4454 * @root: the root of the fs/file tree
4455 * @key: the key of the INODE_REF/INODE_EXTREF
4456 * @name: the name in the INODE_REF/INODE_EXTREF
4457 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4458 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4460 * @ext_ref: the EXTENDED_IREF feature
4462 * Return 0 if no error occurred.
4463 * Return >0 for error bitmap
4465 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4466 char *name, int namelen, u64 index,
4467 unsigned int ext_ref)
4469 struct btrfs_path path;
4470 struct btrfs_inode_ref *ref;
4471 struct btrfs_inode_extref *extref;
4472 struct extent_buffer *node;
4473 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4484 btrfs_init_path(&path);
4485 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4487 ret = INODE_REF_MISSING;
4491 node = path.nodes[0];
4492 slot = path.slots[0];
4494 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4495 total = btrfs_item_size_nr(node, slot);
4497 /* Iterate all entry of INODE_REF */
4498 while (cur < total) {
4499 ret = INODE_REF_MISSING;
4501 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4502 ref_index = btrfs_inode_ref_index(node, ref);
4503 if (index != (u64)-1 && index != ref_index)
4506 if (cur + sizeof(*ref) + ref_namelen > total ||
4507 ref_namelen > BTRFS_NAME_LEN) {
4508 warning("root %llu INODE %s[%llu %llu] name too long",
4510 key->type == BTRFS_INODE_REF_KEY ?
4512 key->objectid, key->offset);
4514 if (cur + sizeof(*ref) > total)
4516 len = min_t(u32, total - cur - sizeof(*ref),
4522 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4525 if (len != namelen || strncmp(ref_namebuf, name, len))
4531 len = sizeof(*ref) + ref_namelen;
4532 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4537 /* Skip if not support EXTENDED_IREF feature */
4541 btrfs_release_path(&path);
4542 btrfs_init_path(&path);
4544 dir_id = key->offset;
4545 key->type = BTRFS_INODE_EXTREF_KEY;
4546 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4548 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4550 ret = INODE_REF_MISSING;
4554 node = path.nodes[0];
4555 slot = path.slots[0];
4557 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4559 total = btrfs_item_size_nr(node, slot);
4561 /* Iterate all entry of INODE_EXTREF */
4562 while (cur < total) {
4563 ret = INODE_REF_MISSING;
4565 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4566 ref_index = btrfs_inode_extref_index(node, extref);
4567 parent = btrfs_inode_extref_parent(node, extref);
4568 if (index != (u64)-1 && index != ref_index)
4571 if (parent != dir_id)
4574 if (ref_namelen <= BTRFS_NAME_LEN) {
4577 len = BTRFS_NAME_LEN;
4578 warning("root %llu INODE %s[%llu %llu] name too long",
4580 key->type == BTRFS_INODE_REF_KEY ?
4582 key->objectid, key->offset);
4584 read_extent_buffer(node, ref_namebuf,
4585 (unsigned long)(extref + 1), len);
4587 if (len != namelen || strncmp(ref_namebuf, name, len))
4594 len = sizeof(*extref) + ref_namelen;
4595 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4600 btrfs_release_path(&path);
4605 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4606 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4608 * @root: the root of the fs/file tree
4609 * @key: the key of the INODE_REF/INODE_EXTREF
4610 * @size: the st_size of the INODE_ITEM
4611 * @ext_ref: the EXTENDED_IREF feature
4613 * Return 0 if no error occurred.
4615 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4616 struct extent_buffer *node, int slot, u64 *size,
4617 unsigned int ext_ref)
4619 struct btrfs_dir_item *di;
4620 struct btrfs_inode_item *ii;
4621 struct btrfs_path path;
4622 struct btrfs_key location;
4623 char namebuf[BTRFS_NAME_LEN] = {0};
4636 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4637 * ignore index check.
4639 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4641 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4642 total = btrfs_item_size_nr(node, slot);
4644 while (cur < total) {
4645 data_len = btrfs_dir_data_len(node, di);
4647 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4648 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4649 "DIR_ITEM" : "DIR_INDEX",
4650 key->objectid, key->offset, data_len);
4652 name_len = btrfs_dir_name_len(node, di);
4653 if (cur + sizeof(*di) + name_len > total ||
4654 name_len > BTRFS_NAME_LEN) {
4655 warning("root %llu %s[%llu %llu] name too long",
4657 key->type == BTRFS_DIR_ITEM_KEY ?
4658 "DIR_ITEM" : "DIR_INDEX",
4659 key->objectid, key->offset);
4661 if (cur + sizeof(*di) > total)
4663 len = min_t(u32, total - cur - sizeof(*di),
4668 (*size) += name_len;
4670 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4671 filetype = btrfs_dir_type(node, di);
4673 if (key->type == BTRFS_DIR_ITEM_KEY &&
4674 key->offset != btrfs_name_hash(namebuf, len)) {
4676 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4677 root->objectid, key->objectid, key->offset,
4678 namebuf, len, filetype, key->offset,
4679 btrfs_name_hash(namebuf, len));
4682 btrfs_init_path(&path);
4683 btrfs_dir_item_key_to_cpu(node, di, &location);
4685 /* Ignore related ROOT_ITEM check */
4686 if (location.type == BTRFS_ROOT_ITEM_KEY)
4689 /* Check relative INODE_ITEM(existence/filetype) */
4690 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4692 err |= INODE_ITEM_MISSING;
4693 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4694 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4695 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4696 key->offset, location.objectid, name_len,
4701 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4702 struct btrfs_inode_item);
4703 mode = btrfs_inode_mode(path.nodes[0], ii);
4705 if (imode_to_type(mode) != filetype) {
4706 err |= INODE_ITEM_MISMATCH;
4707 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4708 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4709 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4710 key->offset, name_len, namebuf, filetype);
4713 /* Check relative INODE_REF/INODE_EXTREF */
4714 location.type = BTRFS_INODE_REF_KEY;
4715 location.offset = key->objectid;
4716 ret = find_inode_ref(root, &location, namebuf, len,
4719 if (ret & INODE_REF_MISSING)
4720 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4721 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4722 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4723 key->offset, name_len, namebuf, filetype);
4726 btrfs_release_path(&path);
4727 len = sizeof(*di) + name_len + data_len;
4728 di = (struct btrfs_dir_item *)((char *)di + len);
4731 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4732 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4733 root->objectid, key->objectid, key->offset);
4742 * Check file extent datasum/hole, update the size of the file extents,
4743 * check and update the last offset of the file extent.
4745 * @root: the root of fs/file tree.
4746 * @fkey: the key of the file extent.
4747 * @nodatasum: INODE_NODATASUM feature.
4748 * @size: the sum of all EXTENT_DATA items size for this inode.
4749 * @end: the offset of the last extent.
4751 * Return 0 if no error occurred.
4753 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4754 struct extent_buffer *node, int slot,
4755 unsigned int nodatasum, u64 *size, u64 *end)
4757 struct btrfs_file_extent_item *fi;
4760 u64 extent_num_bytes;
4762 u64 csum_found; /* In byte size, sectorsize aligned */
4763 u64 search_start; /* Logical range start we search for csum */
4764 u64 search_len; /* Logical range len we search for csum */
4765 unsigned int extent_type;
4766 unsigned int is_hole;
4771 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4773 /* Check inline extent */
4774 extent_type = btrfs_file_extent_type(node, fi);
4775 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4776 struct btrfs_item *e = btrfs_item_nr(slot);
4777 u32 item_inline_len;
4779 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4780 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4781 compressed = btrfs_file_extent_compression(node, fi);
4782 if (extent_num_bytes == 0) {
4784 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4785 root->objectid, fkey->objectid, fkey->offset);
4786 err |= FILE_EXTENT_ERROR;
4788 if (!compressed && extent_num_bytes != item_inline_len) {
4790 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4791 root->objectid, fkey->objectid, fkey->offset,
4792 extent_num_bytes, item_inline_len);
4793 err |= FILE_EXTENT_ERROR;
4795 *end += extent_num_bytes;
4796 *size += extent_num_bytes;
4800 /* Check extent type */
4801 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4802 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4803 err |= FILE_EXTENT_ERROR;
4804 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4805 root->objectid, fkey->objectid, fkey->offset);
4809 /* Check REG_EXTENT/PREALLOC_EXTENT */
4810 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4811 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4812 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4813 extent_offset = btrfs_file_extent_offset(node, fi);
4814 compressed = btrfs_file_extent_compression(node, fi);
4815 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4818 * Check EXTENT_DATA csum
4820 * For plain (uncompressed) extent, we should only check the range
4821 * we're referring to, as it's possible that part of prealloc extent
4822 * has been written, and has csum:
4824 * |<--- Original large preallocated extent A ---->|
4825 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4828 * For compressed extent, we should check the whole range.
4831 search_start = disk_bytenr + extent_offset;
4832 search_len = extent_num_bytes;
4834 search_start = disk_bytenr;
4835 search_len = disk_num_bytes;
4837 ret = count_csum_range(root, search_start, search_len, &csum_found);
4838 if (csum_found > 0 && nodatasum) {
4839 err |= ODD_CSUM_ITEM;
4840 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4841 root->objectid, fkey->objectid, fkey->offset);
4842 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4843 !is_hole && (ret < 0 || csum_found < search_len)) {
4844 err |= CSUM_ITEM_MISSING;
4845 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4846 root->objectid, fkey->objectid, fkey->offset,
4847 csum_found, search_len);
4848 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4849 err |= ODD_CSUM_ITEM;
4850 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4851 root->objectid, fkey->objectid, fkey->offset, csum_found);
4854 /* Check EXTENT_DATA hole */
4855 if (!no_holes && *end != fkey->offset) {
4856 err |= FILE_EXTENT_ERROR;
4857 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4858 root->objectid, fkey->objectid, fkey->offset);
4861 *end += extent_num_bytes;
4863 *size += extent_num_bytes;
4869 * Check INODE_ITEM and related ITEMs (the same inode number)
4870 * 1. check link count
4871 * 2. check inode ref/extref
4872 * 3. check dir item/index
4874 * @ext_ref: the EXTENDED_IREF feature
4876 * Return 0 if no error occurred.
4877 * Return >0 for error or hit the traversal is done(by error bitmap)
4879 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4880 unsigned int ext_ref)
4882 struct extent_buffer *node;
4883 struct btrfs_inode_item *ii;
4884 struct btrfs_key key;
4893 u64 extent_size = 0;
4895 unsigned int nodatasum;
4900 node = path->nodes[0];
4901 slot = path->slots[0];
4903 btrfs_item_key_to_cpu(node, &key, slot);
4904 inode_id = key.objectid;
4906 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4907 ret = btrfs_next_item(root, path);
4913 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4914 isize = btrfs_inode_size(node, ii);
4915 nbytes = btrfs_inode_nbytes(node, ii);
4916 mode = btrfs_inode_mode(node, ii);
4917 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4918 nlink = btrfs_inode_nlink(node, ii);
4919 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4922 ret = btrfs_next_item(root, path);
4924 /* out will fill 'err' rusing current statistics */
4926 } else if (ret > 0) {
4931 node = path->nodes[0];
4932 slot = path->slots[0];
4933 btrfs_item_key_to_cpu(node, &key, slot);
4934 if (key.objectid != inode_id)
4938 case BTRFS_INODE_REF_KEY:
4939 ret = check_inode_ref(root, &key, node, slot, &refs,
4943 case BTRFS_INODE_EXTREF_KEY:
4944 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4945 warning("root %llu EXTREF[%llu %llu] isn't supported",
4946 root->objectid, key.objectid,
4948 ret = check_inode_extref(root, &key, node, slot, &refs,
4952 case BTRFS_DIR_ITEM_KEY:
4953 case BTRFS_DIR_INDEX_KEY:
4955 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4956 root->objectid, inode_id,
4957 imode_to_type(mode), key.objectid,
4960 ret = check_dir_item(root, &key, node, slot, &size,
4964 case BTRFS_EXTENT_DATA_KEY:
4966 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4967 root->objectid, inode_id, key.objectid,
4970 ret = check_file_extent(root, &key, node, slot,
4971 nodatasum, &extent_size,
4975 case BTRFS_XATTR_ITEM_KEY:
4978 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4979 key.objectid, key.type, key.offset);
4984 /* verify INODE_ITEM nlink/isize/nbytes */
4987 err |= LINK_COUNT_ERROR;
4988 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4989 root->objectid, inode_id, nlink);
4993 * Just a warning, as dir inode nbytes is just an
4994 * instructive value.
4996 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4997 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4998 root->objectid, inode_id,
4999 root->fs_info->nodesize);
5002 if (isize != size) {
5004 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5005 root->objectid, inode_id, isize, size);
5008 if (nlink != refs) {
5009 err |= LINK_COUNT_ERROR;
5010 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5011 root->objectid, inode_id, nlink, refs);
5012 } else if (!nlink) {
5016 if (!nbytes && !no_holes && extent_end < isize) {
5017 err |= NBYTES_ERROR;
5018 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5019 root->objectid, inode_id, isize);
5022 if (nbytes != extent_size) {
5023 err |= NBYTES_ERROR;
5024 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5025 root->objectid, inode_id, nbytes, extent_size);
5032 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5034 struct btrfs_path path;
5035 struct btrfs_key key;
5039 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5040 key.type = BTRFS_INODE_ITEM_KEY;
5043 /* For root being dropped, we don't need to check first inode */
5044 if (btrfs_root_refs(&root->root_item) == 0 &&
5045 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5049 btrfs_init_path(&path);
5051 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5056 err |= INODE_ITEM_MISSING;
5057 error("first inode item of root %llu is missing",
5061 err |= check_inode_item(root, &path, ext_ref);
5066 btrfs_release_path(&path);
5071 * Iterate all item on the tree and call check_inode_item() to check.
5073 * @root: the root of the tree to be checked.
5074 * @ext_ref: the EXTENDED_IREF feature
5076 * Return 0 if no error found.
5077 * Return <0 for error.
5079 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5081 struct btrfs_path path;
5082 struct node_refs nrefs;
5083 struct btrfs_root_item *root_item = &root->root_item;
5089 * We need to manually check the first inode item(256)
5090 * As the following traversal function will only start from
5091 * the first inode item in the leaf, if inode item(256) is missing
5092 * we will just skip it forever.
5094 ret = check_fs_first_inode(root, ext_ref);
5098 memset(&nrefs, 0, sizeof(nrefs));
5099 level = btrfs_header_level(root->node);
5100 btrfs_init_path(&path);
5102 if (btrfs_root_refs(root_item) > 0 ||
5103 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5104 path.nodes[level] = root->node;
5105 path.slots[level] = 0;
5106 extent_buffer_get(root->node);
5108 struct btrfs_key key;
5110 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5111 level = root_item->drop_level;
5112 path.lowest_level = level;
5113 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5120 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5123 /* if ret is negative, walk shall stop */
5129 ret = walk_up_tree_v2(root, &path, &level);
5131 /* Normal exit, reset ret to err */
5138 btrfs_release_path(&path);
5143 * Find the relative ref for root_ref and root_backref.
5145 * @root: the root of the root tree.
5146 * @ref_key: the key of the root ref.
5148 * Return 0 if no error occurred.
5150 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5151 struct extent_buffer *node, int slot)
5153 struct btrfs_path path;
5154 struct btrfs_key key;
5155 struct btrfs_root_ref *ref;
5156 struct btrfs_root_ref *backref;
5157 char ref_name[BTRFS_NAME_LEN] = {0};
5158 char backref_name[BTRFS_NAME_LEN] = {0};
5164 u32 backref_namelen;
5169 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5170 ref_dirid = btrfs_root_ref_dirid(node, ref);
5171 ref_seq = btrfs_root_ref_sequence(node, ref);
5172 ref_namelen = btrfs_root_ref_name_len(node, ref);
5174 if (ref_namelen <= BTRFS_NAME_LEN) {
5177 len = BTRFS_NAME_LEN;
5178 warning("%s[%llu %llu] ref_name too long",
5179 ref_key->type == BTRFS_ROOT_REF_KEY ?
5180 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5183 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5185 /* Find relative root_ref */
5186 key.objectid = ref_key->offset;
5187 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5188 key.offset = ref_key->objectid;
5190 btrfs_init_path(&path);
5191 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5193 err |= ROOT_REF_MISSING;
5194 error("%s[%llu %llu] couldn't find relative ref",
5195 ref_key->type == BTRFS_ROOT_REF_KEY ?
5196 "ROOT_REF" : "ROOT_BACKREF",
5197 ref_key->objectid, ref_key->offset);
5201 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5202 struct btrfs_root_ref);
5203 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5204 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5205 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5207 if (backref_namelen <= BTRFS_NAME_LEN) {
5208 len = backref_namelen;
5210 len = BTRFS_NAME_LEN;
5211 warning("%s[%llu %llu] ref_name too long",
5212 key.type == BTRFS_ROOT_REF_KEY ?
5213 "ROOT_REF" : "ROOT_BACKREF",
5214 key.objectid, key.offset);
5216 read_extent_buffer(path.nodes[0], backref_name,
5217 (unsigned long)(backref + 1), len);
5219 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5220 ref_namelen != backref_namelen ||
5221 strncmp(ref_name, backref_name, len)) {
5222 err |= ROOT_REF_MISMATCH;
5223 error("%s[%llu %llu] mismatch relative ref",
5224 ref_key->type == BTRFS_ROOT_REF_KEY ?
5225 "ROOT_REF" : "ROOT_BACKREF",
5226 ref_key->objectid, ref_key->offset);
5229 btrfs_release_path(&path);
5234 * Check all fs/file tree in low_memory mode.
5236 * 1. for fs tree root item, call check_fs_root_v2()
5237 * 2. for fs tree root ref/backref, call check_root_ref()
5239 * Return 0 if no error occurred.
5241 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5243 struct btrfs_root *tree_root = fs_info->tree_root;
5244 struct btrfs_root *cur_root = NULL;
5245 struct btrfs_path path;
5246 struct btrfs_key key;
5247 struct extent_buffer *node;
5248 unsigned int ext_ref;
5253 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5255 btrfs_init_path(&path);
5256 key.objectid = BTRFS_FS_TREE_OBJECTID;
5258 key.type = BTRFS_ROOT_ITEM_KEY;
5260 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5264 } else if (ret > 0) {
5270 node = path.nodes[0];
5271 slot = path.slots[0];
5272 btrfs_item_key_to_cpu(node, &key, slot);
5273 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5275 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5276 fs_root_objectid(key.objectid)) {
5277 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5278 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5281 key.offset = (u64)-1;
5282 cur_root = btrfs_read_fs_root(fs_info, &key);
5285 if (IS_ERR(cur_root)) {
5286 error("Fail to read fs/subvol tree: %lld",
5292 ret = check_fs_root_v2(cur_root, ext_ref);
5295 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5296 btrfs_free_fs_root(cur_root);
5297 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5298 key.type == BTRFS_ROOT_BACKREF_KEY) {
5299 ret = check_root_ref(tree_root, &key, node, slot);
5303 ret = btrfs_next_item(tree_root, &path);
5313 btrfs_release_path(&path);
5317 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5319 struct list_head *cur = rec->backrefs.next;
5320 struct extent_backref *back;
5321 struct tree_backref *tback;
5322 struct data_backref *dback;
5326 while(cur != &rec->backrefs) {
5327 back = to_extent_backref(cur);
5329 if (!back->found_extent_tree) {
5333 if (back->is_data) {
5334 dback = to_data_backref(back);
5335 fprintf(stderr, "Backref %llu %s %llu"
5336 " owner %llu offset %llu num_refs %lu"
5337 " not found in extent tree\n",
5338 (unsigned long long)rec->start,
5339 back->full_backref ?
5341 back->full_backref ?
5342 (unsigned long long)dback->parent:
5343 (unsigned long long)dback->root,
5344 (unsigned long long)dback->owner,
5345 (unsigned long long)dback->offset,
5346 (unsigned long)dback->num_refs);
5348 tback = to_tree_backref(back);
5349 fprintf(stderr, "Backref %llu parent %llu"
5350 " root %llu not found in extent tree\n",
5351 (unsigned long long)rec->start,
5352 (unsigned long long)tback->parent,
5353 (unsigned long long)tback->root);
5356 if (!back->is_data && !back->found_ref) {
5360 tback = to_tree_backref(back);
5361 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5362 (unsigned long long)rec->start,
5363 back->full_backref ? "parent" : "root",
5364 back->full_backref ?
5365 (unsigned long long)tback->parent :
5366 (unsigned long long)tback->root, back);
5368 if (back->is_data) {
5369 dback = to_data_backref(back);
5370 if (dback->found_ref != dback->num_refs) {
5374 fprintf(stderr, "Incorrect local backref count"
5375 " on %llu %s %llu owner %llu"
5376 " offset %llu found %u wanted %u back %p\n",
5377 (unsigned long long)rec->start,
5378 back->full_backref ?
5380 back->full_backref ?
5381 (unsigned long long)dback->parent:
5382 (unsigned long long)dback->root,
5383 (unsigned long long)dback->owner,
5384 (unsigned long long)dback->offset,
5385 dback->found_ref, dback->num_refs, back);
5387 if (dback->disk_bytenr != rec->start) {
5391 fprintf(stderr, "Backref disk bytenr does not"
5392 " match extent record, bytenr=%llu, "
5393 "ref bytenr=%llu\n",
5394 (unsigned long long)rec->start,
5395 (unsigned long long)dback->disk_bytenr);
5398 if (dback->bytes != rec->nr) {
5402 fprintf(stderr, "Backref bytes do not match "
5403 "extent backref, bytenr=%llu, ref "
5404 "bytes=%llu, backref bytes=%llu\n",
5405 (unsigned long long)rec->start,
5406 (unsigned long long)rec->nr,
5407 (unsigned long long)dback->bytes);
5410 if (!back->is_data) {
5413 dback = to_data_backref(back);
5414 found += dback->found_ref;
5417 if (found != rec->refs) {
5421 fprintf(stderr, "Incorrect global backref count "
5422 "on %llu found %llu wanted %llu\n",
5423 (unsigned long long)rec->start,
5424 (unsigned long long)found,
5425 (unsigned long long)rec->refs);
5431 static int free_all_extent_backrefs(struct extent_record *rec)
5433 struct extent_backref *back;
5434 struct list_head *cur;
5435 while (!list_empty(&rec->backrefs)) {
5436 cur = rec->backrefs.next;
5437 back = to_extent_backref(cur);
5444 static void free_extent_record_cache(struct cache_tree *extent_cache)
5446 struct cache_extent *cache;
5447 struct extent_record *rec;
5450 cache = first_cache_extent(extent_cache);
5453 rec = container_of(cache, struct extent_record, cache);
5454 remove_cache_extent(extent_cache, cache);
5455 free_all_extent_backrefs(rec);
5460 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5461 struct extent_record *rec)
5463 if (rec->content_checked && rec->owner_ref_checked &&
5464 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5465 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5466 !rec->bad_full_backref && !rec->crossing_stripes &&
5467 !rec->wrong_chunk_type) {
5468 remove_cache_extent(extent_cache, &rec->cache);
5469 free_all_extent_backrefs(rec);
5470 list_del_init(&rec->list);
5476 static int check_owner_ref(struct btrfs_root *root,
5477 struct extent_record *rec,
5478 struct extent_buffer *buf)
5480 struct extent_backref *node;
5481 struct tree_backref *back;
5482 struct btrfs_root *ref_root;
5483 struct btrfs_key key;
5484 struct btrfs_path path;
5485 struct extent_buffer *parent;
5490 list_for_each_entry(node, &rec->backrefs, list) {
5493 if (!node->found_ref)
5495 if (node->full_backref)
5497 back = to_tree_backref(node);
5498 if (btrfs_header_owner(buf) == back->root)
5501 BUG_ON(rec->is_root);
5503 /* try to find the block by search corresponding fs tree */
5504 key.objectid = btrfs_header_owner(buf);
5505 key.type = BTRFS_ROOT_ITEM_KEY;
5506 key.offset = (u64)-1;
5508 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5509 if (IS_ERR(ref_root))
5512 level = btrfs_header_level(buf);
5514 btrfs_item_key_to_cpu(buf, &key, 0);
5516 btrfs_node_key_to_cpu(buf, &key, 0);
5518 btrfs_init_path(&path);
5519 path.lowest_level = level + 1;
5520 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5524 parent = path.nodes[level + 1];
5525 if (parent && buf->start == btrfs_node_blockptr(parent,
5526 path.slots[level + 1]))
5529 btrfs_release_path(&path);
5530 return found ? 0 : 1;
5533 static int is_extent_tree_record(struct extent_record *rec)
5535 struct list_head *cur = rec->backrefs.next;
5536 struct extent_backref *node;
5537 struct tree_backref *back;
5540 while(cur != &rec->backrefs) {
5541 node = to_extent_backref(cur);
5545 back = to_tree_backref(node);
5546 if (node->full_backref)
5548 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5555 static int record_bad_block_io(struct btrfs_fs_info *info,
5556 struct cache_tree *extent_cache,
5559 struct extent_record *rec;
5560 struct cache_extent *cache;
5561 struct btrfs_key key;
5563 cache = lookup_cache_extent(extent_cache, start, len);
5567 rec = container_of(cache, struct extent_record, cache);
5568 if (!is_extent_tree_record(rec))
5571 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5572 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5575 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5576 struct extent_buffer *buf, int slot)
5578 if (btrfs_header_level(buf)) {
5579 struct btrfs_key_ptr ptr1, ptr2;
5581 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5582 sizeof(struct btrfs_key_ptr));
5583 read_extent_buffer(buf, &ptr2,
5584 btrfs_node_key_ptr_offset(slot + 1),
5585 sizeof(struct btrfs_key_ptr));
5586 write_extent_buffer(buf, &ptr1,
5587 btrfs_node_key_ptr_offset(slot + 1),
5588 sizeof(struct btrfs_key_ptr));
5589 write_extent_buffer(buf, &ptr2,
5590 btrfs_node_key_ptr_offset(slot),
5591 sizeof(struct btrfs_key_ptr));
5593 struct btrfs_disk_key key;
5594 btrfs_node_key(buf, &key, 0);
5595 btrfs_fixup_low_keys(root, path, &key,
5596 btrfs_header_level(buf) + 1);
5599 struct btrfs_item *item1, *item2;
5600 struct btrfs_key k1, k2;
5601 char *item1_data, *item2_data;
5602 u32 item1_offset, item2_offset, item1_size, item2_size;
5604 item1 = btrfs_item_nr(slot);
5605 item2 = btrfs_item_nr(slot + 1);
5606 btrfs_item_key_to_cpu(buf, &k1, slot);
5607 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5608 item1_offset = btrfs_item_offset(buf, item1);
5609 item2_offset = btrfs_item_offset(buf, item2);
5610 item1_size = btrfs_item_size(buf, item1);
5611 item2_size = btrfs_item_size(buf, item2);
5613 item1_data = malloc(item1_size);
5616 item2_data = malloc(item2_size);
5622 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5623 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5625 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5626 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5630 btrfs_set_item_offset(buf, item1, item2_offset);
5631 btrfs_set_item_offset(buf, item2, item1_offset);
5632 btrfs_set_item_size(buf, item1, item2_size);
5633 btrfs_set_item_size(buf, item2, item1_size);
5635 path->slots[0] = slot;
5636 btrfs_set_item_key_unsafe(root, path, &k2);
5637 path->slots[0] = slot + 1;
5638 btrfs_set_item_key_unsafe(root, path, &k1);
5643 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5645 struct extent_buffer *buf;
5646 struct btrfs_key k1, k2;
5648 int level = path->lowest_level;
5651 buf = path->nodes[level];
5652 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5654 btrfs_node_key_to_cpu(buf, &k1, i);
5655 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5657 btrfs_item_key_to_cpu(buf, &k1, i);
5658 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5660 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5662 ret = swap_values(root, path, buf, i);
5665 btrfs_mark_buffer_dirty(buf);
5671 static int delete_bogus_item(struct btrfs_root *root,
5672 struct btrfs_path *path,
5673 struct extent_buffer *buf, int slot)
5675 struct btrfs_key key;
5676 int nritems = btrfs_header_nritems(buf);
5678 btrfs_item_key_to_cpu(buf, &key, slot);
5680 /* These are all the keys we can deal with missing. */
5681 if (key.type != BTRFS_DIR_INDEX_KEY &&
5682 key.type != BTRFS_EXTENT_ITEM_KEY &&
5683 key.type != BTRFS_METADATA_ITEM_KEY &&
5684 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5685 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5688 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5689 (unsigned long long)key.objectid, key.type,
5690 (unsigned long long)key.offset, slot, buf->start);
5691 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5692 btrfs_item_nr_offset(slot + 1),
5693 sizeof(struct btrfs_item) *
5694 (nritems - slot - 1));
5695 btrfs_set_header_nritems(buf, nritems - 1);
5697 struct btrfs_disk_key disk_key;
5699 btrfs_item_key(buf, &disk_key, 0);
5700 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5702 btrfs_mark_buffer_dirty(buf);
5706 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5708 struct extent_buffer *buf;
5712 /* We should only get this for leaves */
5713 BUG_ON(path->lowest_level);
5714 buf = path->nodes[0];
5716 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5717 unsigned int shift = 0, offset;
5719 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5720 BTRFS_LEAF_DATA_SIZE(root)) {
5721 if (btrfs_item_end_nr(buf, i) >
5722 BTRFS_LEAF_DATA_SIZE(root)) {
5723 ret = delete_bogus_item(root, path, buf, i);
5726 fprintf(stderr, "item is off the end of the "
5727 "leaf, can't fix\n");
5731 shift = BTRFS_LEAF_DATA_SIZE(root) -
5732 btrfs_item_end_nr(buf, i);
5733 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5734 btrfs_item_offset_nr(buf, i - 1)) {
5735 if (btrfs_item_end_nr(buf, i) >
5736 btrfs_item_offset_nr(buf, i - 1)) {
5737 ret = delete_bogus_item(root, path, buf, i);
5740 fprintf(stderr, "items overlap, can't fix\n");
5744 shift = btrfs_item_offset_nr(buf, i - 1) -
5745 btrfs_item_end_nr(buf, i);
5750 printf("Shifting item nr %d by %u bytes in block %llu\n",
5751 i, shift, (unsigned long long)buf->start);
5752 offset = btrfs_item_offset_nr(buf, i);
5753 memmove_extent_buffer(buf,
5754 btrfs_leaf_data(buf) + offset + shift,
5755 btrfs_leaf_data(buf) + offset,
5756 btrfs_item_size_nr(buf, i));
5757 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5759 btrfs_mark_buffer_dirty(buf);
5763 * We may have moved things, in which case we want to exit so we don't
5764 * write those changes out. Once we have proper abort functionality in
5765 * progs this can be changed to something nicer.
5772 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5773 * then just return -EIO.
5775 static int try_to_fix_bad_block(struct btrfs_root *root,
5776 struct extent_buffer *buf,
5777 enum btrfs_tree_block_status status)
5779 struct btrfs_trans_handle *trans;
5780 struct ulist *roots;
5781 struct ulist_node *node;
5782 struct btrfs_root *search_root;
5783 struct btrfs_path path;
5784 struct ulist_iterator iter;
5785 struct btrfs_key root_key, key;
5788 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5789 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5792 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5796 btrfs_init_path(&path);
5797 ULIST_ITER_INIT(&iter);
5798 while ((node = ulist_next(roots, &iter))) {
5799 root_key.objectid = node->val;
5800 root_key.type = BTRFS_ROOT_ITEM_KEY;
5801 root_key.offset = (u64)-1;
5803 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5810 trans = btrfs_start_transaction(search_root, 0);
5811 if (IS_ERR(trans)) {
5812 ret = PTR_ERR(trans);
5816 path.lowest_level = btrfs_header_level(buf);
5817 path.skip_check_block = 1;
5818 if (path.lowest_level)
5819 btrfs_node_key_to_cpu(buf, &key, 0);
5821 btrfs_item_key_to_cpu(buf, &key, 0);
5822 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5825 btrfs_commit_transaction(trans, search_root);
5828 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5829 ret = fix_key_order(search_root, &path);
5830 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5831 ret = fix_item_offset(search_root, &path);
5833 btrfs_commit_transaction(trans, search_root);
5836 btrfs_release_path(&path);
5837 btrfs_commit_transaction(trans, search_root);
5840 btrfs_release_path(&path);
5844 static int check_block(struct btrfs_root *root,
5845 struct cache_tree *extent_cache,
5846 struct extent_buffer *buf, u64 flags)
5848 struct extent_record *rec;
5849 struct cache_extent *cache;
5850 struct btrfs_key key;
5851 enum btrfs_tree_block_status status;
5855 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5858 rec = container_of(cache, struct extent_record, cache);
5859 rec->generation = btrfs_header_generation(buf);
5861 level = btrfs_header_level(buf);
5862 if (btrfs_header_nritems(buf) > 0) {
5865 btrfs_item_key_to_cpu(buf, &key, 0);
5867 btrfs_node_key_to_cpu(buf, &key, 0);
5869 rec->info_objectid = key.objectid;
5871 rec->info_level = level;
5873 if (btrfs_is_leaf(buf))
5874 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5876 status = btrfs_check_node(root, &rec->parent_key, buf);
5878 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5880 status = try_to_fix_bad_block(root, buf, status);
5881 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5883 fprintf(stderr, "bad block %llu\n",
5884 (unsigned long long)buf->start);
5887 * Signal to callers we need to start the scan over
5888 * again since we'll have cowed blocks.
5893 rec->content_checked = 1;
5894 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5895 rec->owner_ref_checked = 1;
5897 ret = check_owner_ref(root, rec, buf);
5899 rec->owner_ref_checked = 1;
5903 maybe_free_extent_rec(extent_cache, rec);
5907 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5908 u64 parent, u64 root)
5910 struct list_head *cur = rec->backrefs.next;
5911 struct extent_backref *node;
5912 struct tree_backref *back;
5914 while(cur != &rec->backrefs) {
5915 node = to_extent_backref(cur);
5919 back = to_tree_backref(node);
5921 if (!node->full_backref)
5923 if (parent == back->parent)
5926 if (node->full_backref)
5928 if (back->root == root)
5935 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5936 u64 parent, u64 root)
5938 struct tree_backref *ref = malloc(sizeof(*ref));
5942 memset(&ref->node, 0, sizeof(ref->node));
5944 ref->parent = parent;
5945 ref->node.full_backref = 1;
5948 ref->node.full_backref = 0;
5950 list_add_tail(&ref->node.list, &rec->backrefs);
5955 static struct data_backref *find_data_backref(struct extent_record *rec,
5956 u64 parent, u64 root,
5957 u64 owner, u64 offset,
5959 u64 disk_bytenr, u64 bytes)
5961 struct list_head *cur = rec->backrefs.next;
5962 struct extent_backref *node;
5963 struct data_backref *back;
5965 while(cur != &rec->backrefs) {
5966 node = to_extent_backref(cur);
5970 back = to_data_backref(node);
5972 if (!node->full_backref)
5974 if (parent == back->parent)
5977 if (node->full_backref)
5979 if (back->root == root && back->owner == owner &&
5980 back->offset == offset) {
5981 if (found_ref && node->found_ref &&
5982 (back->bytes != bytes ||
5983 back->disk_bytenr != disk_bytenr))
5992 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5993 u64 parent, u64 root,
5994 u64 owner, u64 offset,
5997 struct data_backref *ref = malloc(sizeof(*ref));
6001 memset(&ref->node, 0, sizeof(ref->node));
6002 ref->node.is_data = 1;
6005 ref->parent = parent;
6008 ref->node.full_backref = 1;
6012 ref->offset = offset;
6013 ref->node.full_backref = 0;
6015 ref->bytes = max_size;
6018 list_add_tail(&ref->node.list, &rec->backrefs);
6019 if (max_size > rec->max_size)
6020 rec->max_size = max_size;
6024 /* Check if the type of extent matches with its chunk */
6025 static void check_extent_type(struct extent_record *rec)
6027 struct btrfs_block_group_cache *bg_cache;
6029 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6033 /* data extent, check chunk directly*/
6034 if (!rec->metadata) {
6035 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6036 rec->wrong_chunk_type = 1;
6040 /* metadata extent, check the obvious case first */
6041 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6042 BTRFS_BLOCK_GROUP_METADATA))) {
6043 rec->wrong_chunk_type = 1;
6048 * Check SYSTEM extent, as it's also marked as metadata, we can only
6049 * make sure it's a SYSTEM extent by its backref
6051 if (!list_empty(&rec->backrefs)) {
6052 struct extent_backref *node;
6053 struct tree_backref *tback;
6056 node = to_extent_backref(rec->backrefs.next);
6057 if (node->is_data) {
6058 /* tree block shouldn't have data backref */
6059 rec->wrong_chunk_type = 1;
6062 tback = container_of(node, struct tree_backref, node);
6064 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6065 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6067 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6068 if (!(bg_cache->flags & bg_type))
6069 rec->wrong_chunk_type = 1;
6074 * Allocate a new extent record, fill default values from @tmpl and insert int
6075 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6076 * the cache, otherwise it fails.
6078 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6079 struct extent_record *tmpl)
6081 struct extent_record *rec;
6084 BUG_ON(tmpl->max_size == 0);
6085 rec = malloc(sizeof(*rec));
6088 rec->start = tmpl->start;
6089 rec->max_size = tmpl->max_size;
6090 rec->nr = max(tmpl->nr, tmpl->max_size);
6091 rec->found_rec = tmpl->found_rec;
6092 rec->content_checked = tmpl->content_checked;
6093 rec->owner_ref_checked = tmpl->owner_ref_checked;
6094 rec->num_duplicates = 0;
6095 rec->metadata = tmpl->metadata;
6096 rec->flag_block_full_backref = FLAG_UNSET;
6097 rec->bad_full_backref = 0;
6098 rec->crossing_stripes = 0;
6099 rec->wrong_chunk_type = 0;
6100 rec->is_root = tmpl->is_root;
6101 rec->refs = tmpl->refs;
6102 rec->extent_item_refs = tmpl->extent_item_refs;
6103 rec->parent_generation = tmpl->parent_generation;
6104 INIT_LIST_HEAD(&rec->backrefs);
6105 INIT_LIST_HEAD(&rec->dups);
6106 INIT_LIST_HEAD(&rec->list);
6107 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6108 rec->cache.start = tmpl->start;
6109 rec->cache.size = tmpl->nr;
6110 ret = insert_cache_extent(extent_cache, &rec->cache);
6115 bytes_used += rec->nr;
6118 rec->crossing_stripes = check_crossing_stripes(global_info,
6119 rec->start, global_info->nodesize);
6120 check_extent_type(rec);
6125 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6127 * - refs - if found, increase refs
6128 * - is_root - if found, set
6129 * - content_checked - if found, set
6130 * - owner_ref_checked - if found, set
6132 * If not found, create a new one, initialize and insert.
6134 static int add_extent_rec(struct cache_tree *extent_cache,
6135 struct extent_record *tmpl)
6137 struct extent_record *rec;
6138 struct cache_extent *cache;
6142 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6144 rec = container_of(cache, struct extent_record, cache);
6148 rec->nr = max(tmpl->nr, tmpl->max_size);
6151 * We need to make sure to reset nr to whatever the extent
6152 * record says was the real size, this way we can compare it to
6155 if (tmpl->found_rec) {
6156 if (tmpl->start != rec->start || rec->found_rec) {
6157 struct extent_record *tmp;
6160 if (list_empty(&rec->list))
6161 list_add_tail(&rec->list,
6162 &duplicate_extents);
6165 * We have to do this song and dance in case we
6166 * find an extent record that falls inside of
6167 * our current extent record but does not have
6168 * the same objectid.
6170 tmp = malloc(sizeof(*tmp));
6173 tmp->start = tmpl->start;
6174 tmp->max_size = tmpl->max_size;
6177 tmp->metadata = tmpl->metadata;
6178 tmp->extent_item_refs = tmpl->extent_item_refs;
6179 INIT_LIST_HEAD(&tmp->list);
6180 list_add_tail(&tmp->list, &rec->dups);
6181 rec->num_duplicates++;
6188 if (tmpl->extent_item_refs && !dup) {
6189 if (rec->extent_item_refs) {
6190 fprintf(stderr, "block %llu rec "
6191 "extent_item_refs %llu, passed %llu\n",
6192 (unsigned long long)tmpl->start,
6193 (unsigned long long)
6194 rec->extent_item_refs,
6195 (unsigned long long)tmpl->extent_item_refs);
6197 rec->extent_item_refs = tmpl->extent_item_refs;
6201 if (tmpl->content_checked)
6202 rec->content_checked = 1;
6203 if (tmpl->owner_ref_checked)
6204 rec->owner_ref_checked = 1;
6205 memcpy(&rec->parent_key, &tmpl->parent_key,
6206 sizeof(tmpl->parent_key));
6207 if (tmpl->parent_generation)
6208 rec->parent_generation = tmpl->parent_generation;
6209 if (rec->max_size < tmpl->max_size)
6210 rec->max_size = tmpl->max_size;
6213 * A metadata extent can't cross stripe_len boundary, otherwise
6214 * kernel scrub won't be able to handle it.
6215 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6219 rec->crossing_stripes = check_crossing_stripes(
6220 global_info, rec->start,
6221 global_info->nodesize);
6222 check_extent_type(rec);
6223 maybe_free_extent_rec(extent_cache, rec);
6227 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6232 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6233 u64 parent, u64 root, int found_ref)
6235 struct extent_record *rec;
6236 struct tree_backref *back;
6237 struct cache_extent *cache;
6240 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6242 struct extent_record tmpl;
6244 memset(&tmpl, 0, sizeof(tmpl));
6245 tmpl.start = bytenr;
6250 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6254 /* really a bug in cache_extent implement now */
6255 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6260 rec = container_of(cache, struct extent_record, cache);
6261 if (rec->start != bytenr) {
6263 * Several cause, from unaligned bytenr to over lapping extents
6268 back = find_tree_backref(rec, parent, root);
6270 back = alloc_tree_backref(rec, parent, root);
6276 if (back->node.found_ref) {
6277 fprintf(stderr, "Extent back ref already exists "
6278 "for %llu parent %llu root %llu \n",
6279 (unsigned long long)bytenr,
6280 (unsigned long long)parent,
6281 (unsigned long long)root);
6283 back->node.found_ref = 1;
6285 if (back->node.found_extent_tree) {
6286 fprintf(stderr, "Extent back ref already exists "
6287 "for %llu parent %llu root %llu \n",
6288 (unsigned long long)bytenr,
6289 (unsigned long long)parent,
6290 (unsigned long long)root);
6292 back->node.found_extent_tree = 1;
6294 check_extent_type(rec);
6295 maybe_free_extent_rec(extent_cache, rec);
6299 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6300 u64 parent, u64 root, u64 owner, u64 offset,
6301 u32 num_refs, int found_ref, u64 max_size)
6303 struct extent_record *rec;
6304 struct data_backref *back;
6305 struct cache_extent *cache;
6308 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6310 struct extent_record tmpl;
6312 memset(&tmpl, 0, sizeof(tmpl));
6313 tmpl.start = bytenr;
6315 tmpl.max_size = max_size;
6317 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6321 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6326 rec = container_of(cache, struct extent_record, cache);
6327 if (rec->max_size < max_size)
6328 rec->max_size = max_size;
6331 * If found_ref is set then max_size is the real size and must match the
6332 * existing refs. So if we have already found a ref then we need to
6333 * make sure that this ref matches the existing one, otherwise we need
6334 * to add a new backref so we can notice that the backrefs don't match
6335 * and we need to figure out who is telling the truth. This is to
6336 * account for that awful fsync bug I introduced where we'd end up with
6337 * a btrfs_file_extent_item that would have its length include multiple
6338 * prealloc extents or point inside of a prealloc extent.
6340 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6343 back = alloc_data_backref(rec, parent, root, owner, offset,
6349 BUG_ON(num_refs != 1);
6350 if (back->node.found_ref)
6351 BUG_ON(back->bytes != max_size);
6352 back->node.found_ref = 1;
6353 back->found_ref += 1;
6354 back->bytes = max_size;
6355 back->disk_bytenr = bytenr;
6357 rec->content_checked = 1;
6358 rec->owner_ref_checked = 1;
6360 if (back->node.found_extent_tree) {
6361 fprintf(stderr, "Extent back ref already exists "
6362 "for %llu parent %llu root %llu "
6363 "owner %llu offset %llu num_refs %lu\n",
6364 (unsigned long long)bytenr,
6365 (unsigned long long)parent,
6366 (unsigned long long)root,
6367 (unsigned long long)owner,
6368 (unsigned long long)offset,
6369 (unsigned long)num_refs);
6371 back->num_refs = num_refs;
6372 back->node.found_extent_tree = 1;
6374 maybe_free_extent_rec(extent_cache, rec);
6378 static int add_pending(struct cache_tree *pending,
6379 struct cache_tree *seen, u64 bytenr, u32 size)
6382 ret = add_cache_extent(seen, bytenr, size);
6385 add_cache_extent(pending, bytenr, size);
6389 static int pick_next_pending(struct cache_tree *pending,
6390 struct cache_tree *reada,
6391 struct cache_tree *nodes,
6392 u64 last, struct block_info *bits, int bits_nr,
6395 unsigned long node_start = last;
6396 struct cache_extent *cache;
6399 cache = search_cache_extent(reada, 0);
6401 bits[0].start = cache->start;
6402 bits[0].size = cache->size;
6407 if (node_start > 32768)
6408 node_start -= 32768;
6410 cache = search_cache_extent(nodes, node_start);
6412 cache = search_cache_extent(nodes, 0);
6415 cache = search_cache_extent(pending, 0);
6420 bits[ret].start = cache->start;
6421 bits[ret].size = cache->size;
6422 cache = next_cache_extent(cache);
6424 } while (cache && ret < bits_nr);
6430 bits[ret].start = cache->start;
6431 bits[ret].size = cache->size;
6432 cache = next_cache_extent(cache);
6434 } while (cache && ret < bits_nr);
6436 if (bits_nr - ret > 8) {
6437 u64 lookup = bits[0].start + bits[0].size;
6438 struct cache_extent *next;
6439 next = search_cache_extent(pending, lookup);
6441 if (next->start - lookup > 32768)
6443 bits[ret].start = next->start;
6444 bits[ret].size = next->size;
6445 lookup = next->start + next->size;
6449 next = next_cache_extent(next);
6457 static void free_chunk_record(struct cache_extent *cache)
6459 struct chunk_record *rec;
6461 rec = container_of(cache, struct chunk_record, cache);
6462 list_del_init(&rec->list);
6463 list_del_init(&rec->dextents);
6467 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6469 cache_tree_free_extents(chunk_cache, free_chunk_record);
6472 static void free_device_record(struct rb_node *node)
6474 struct device_record *rec;
6476 rec = container_of(node, struct device_record, node);
6480 FREE_RB_BASED_TREE(device_cache, free_device_record);
6482 int insert_block_group_record(struct block_group_tree *tree,
6483 struct block_group_record *bg_rec)
6487 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6491 list_add_tail(&bg_rec->list, &tree->block_groups);
6495 static void free_block_group_record(struct cache_extent *cache)
6497 struct block_group_record *rec;
6499 rec = container_of(cache, struct block_group_record, cache);
6500 list_del_init(&rec->list);
6504 void free_block_group_tree(struct block_group_tree *tree)
6506 cache_tree_free_extents(&tree->tree, free_block_group_record);
6509 int insert_device_extent_record(struct device_extent_tree *tree,
6510 struct device_extent_record *de_rec)
6515 * Device extent is a bit different from the other extents, because
6516 * the extents which belong to the different devices may have the
6517 * same start and size, so we need use the special extent cache
6518 * search/insert functions.
6520 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6524 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6525 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6529 static void free_device_extent_record(struct cache_extent *cache)
6531 struct device_extent_record *rec;
6533 rec = container_of(cache, struct device_extent_record, cache);
6534 if (!list_empty(&rec->chunk_list))
6535 list_del_init(&rec->chunk_list);
6536 if (!list_empty(&rec->device_list))
6537 list_del_init(&rec->device_list);
6541 void free_device_extent_tree(struct device_extent_tree *tree)
6543 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6546 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6547 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6548 struct extent_buffer *leaf, int slot)
6550 struct btrfs_extent_ref_v0 *ref0;
6551 struct btrfs_key key;
6554 btrfs_item_key_to_cpu(leaf, &key, slot);
6555 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6556 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6557 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6560 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6561 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6567 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6568 struct btrfs_key *key,
6571 struct btrfs_chunk *ptr;
6572 struct chunk_record *rec;
6575 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6576 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6578 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6580 fprintf(stderr, "memory allocation failed\n");
6584 INIT_LIST_HEAD(&rec->list);
6585 INIT_LIST_HEAD(&rec->dextents);
6588 rec->cache.start = key->offset;
6589 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6591 rec->generation = btrfs_header_generation(leaf);
6593 rec->objectid = key->objectid;
6594 rec->type = key->type;
6595 rec->offset = key->offset;
6597 rec->length = rec->cache.size;
6598 rec->owner = btrfs_chunk_owner(leaf, ptr);
6599 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6600 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6601 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6602 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6603 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6604 rec->num_stripes = num_stripes;
6605 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6607 for (i = 0; i < rec->num_stripes; ++i) {
6608 rec->stripes[i].devid =
6609 btrfs_stripe_devid_nr(leaf, ptr, i);
6610 rec->stripes[i].offset =
6611 btrfs_stripe_offset_nr(leaf, ptr, i);
6612 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6613 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6620 static int process_chunk_item(struct cache_tree *chunk_cache,
6621 struct btrfs_key *key, struct extent_buffer *eb,
6624 struct chunk_record *rec;
6625 struct btrfs_chunk *chunk;
6628 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6630 * Do extra check for this chunk item,
6632 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6633 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6634 * and owner<->key_type check.
6636 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6639 error("chunk(%llu, %llu) is not valid, ignore it",
6640 key->offset, btrfs_chunk_length(eb, chunk));
6643 rec = btrfs_new_chunk_record(eb, key, slot);
6644 ret = insert_cache_extent(chunk_cache, &rec->cache);
6646 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6647 rec->offset, rec->length);
6654 static int process_device_item(struct rb_root *dev_cache,
6655 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6657 struct btrfs_dev_item *ptr;
6658 struct device_record *rec;
6661 ptr = btrfs_item_ptr(eb,
6662 slot, struct btrfs_dev_item);
6664 rec = malloc(sizeof(*rec));
6666 fprintf(stderr, "memory allocation failed\n");
6670 rec->devid = key->offset;
6671 rec->generation = btrfs_header_generation(eb);
6673 rec->objectid = key->objectid;
6674 rec->type = key->type;
6675 rec->offset = key->offset;
6677 rec->devid = btrfs_device_id(eb, ptr);
6678 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6679 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6681 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6683 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6690 struct block_group_record *
6691 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6694 struct btrfs_block_group_item *ptr;
6695 struct block_group_record *rec;
6697 rec = calloc(1, sizeof(*rec));
6699 fprintf(stderr, "memory allocation failed\n");
6703 rec->cache.start = key->objectid;
6704 rec->cache.size = key->offset;
6706 rec->generation = btrfs_header_generation(leaf);
6708 rec->objectid = key->objectid;
6709 rec->type = key->type;
6710 rec->offset = key->offset;
6712 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6713 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6715 INIT_LIST_HEAD(&rec->list);
6720 static int process_block_group_item(struct block_group_tree *block_group_cache,
6721 struct btrfs_key *key,
6722 struct extent_buffer *eb, int slot)
6724 struct block_group_record *rec;
6727 rec = btrfs_new_block_group_record(eb, key, slot);
6728 ret = insert_block_group_record(block_group_cache, rec);
6730 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6731 rec->objectid, rec->offset);
6738 struct device_extent_record *
6739 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6740 struct btrfs_key *key, int slot)
6742 struct device_extent_record *rec;
6743 struct btrfs_dev_extent *ptr;
6745 rec = calloc(1, sizeof(*rec));
6747 fprintf(stderr, "memory allocation failed\n");
6751 rec->cache.objectid = key->objectid;
6752 rec->cache.start = key->offset;
6754 rec->generation = btrfs_header_generation(leaf);
6756 rec->objectid = key->objectid;
6757 rec->type = key->type;
6758 rec->offset = key->offset;
6760 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6761 rec->chunk_objecteid =
6762 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6764 btrfs_dev_extent_chunk_offset(leaf, ptr);
6765 rec->length = btrfs_dev_extent_length(leaf, ptr);
6766 rec->cache.size = rec->length;
6768 INIT_LIST_HEAD(&rec->chunk_list);
6769 INIT_LIST_HEAD(&rec->device_list);
6775 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6776 struct btrfs_key *key, struct extent_buffer *eb,
6779 struct device_extent_record *rec;
6782 rec = btrfs_new_device_extent_record(eb, key, slot);
6783 ret = insert_device_extent_record(dev_extent_cache, rec);
6786 "Device extent[%llu, %llu, %llu] existed.\n",
6787 rec->objectid, rec->offset, rec->length);
6794 static int process_extent_item(struct btrfs_root *root,
6795 struct cache_tree *extent_cache,
6796 struct extent_buffer *eb, int slot)
6798 struct btrfs_extent_item *ei;
6799 struct btrfs_extent_inline_ref *iref;
6800 struct btrfs_extent_data_ref *dref;
6801 struct btrfs_shared_data_ref *sref;
6802 struct btrfs_key key;
6803 struct extent_record tmpl;
6808 u32 item_size = btrfs_item_size_nr(eb, slot);
6814 btrfs_item_key_to_cpu(eb, &key, slot);
6816 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6818 num_bytes = root->fs_info->nodesize;
6820 num_bytes = key.offset;
6823 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6824 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6825 key.objectid, root->fs_info->sectorsize);
6828 if (item_size < sizeof(*ei)) {
6829 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6830 struct btrfs_extent_item_v0 *ei0;
6831 BUG_ON(item_size != sizeof(*ei0));
6832 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6833 refs = btrfs_extent_refs_v0(eb, ei0);
6837 memset(&tmpl, 0, sizeof(tmpl));
6838 tmpl.start = key.objectid;
6839 tmpl.nr = num_bytes;
6840 tmpl.extent_item_refs = refs;
6841 tmpl.metadata = metadata;
6843 tmpl.max_size = num_bytes;
6845 return add_extent_rec(extent_cache, &tmpl);
6848 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6849 refs = btrfs_extent_refs(eb, ei);
6850 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6854 if (metadata && num_bytes != root->fs_info->nodesize) {
6855 error("ignore invalid metadata extent, length %llu does not equal to %u",
6856 num_bytes, root->fs_info->nodesize);
6859 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6860 error("ignore invalid data extent, length %llu is not aligned to %u",
6861 num_bytes, root->fs_info->sectorsize);
6865 memset(&tmpl, 0, sizeof(tmpl));
6866 tmpl.start = key.objectid;
6867 tmpl.nr = num_bytes;
6868 tmpl.extent_item_refs = refs;
6869 tmpl.metadata = metadata;
6871 tmpl.max_size = num_bytes;
6872 add_extent_rec(extent_cache, &tmpl);
6874 ptr = (unsigned long)(ei + 1);
6875 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6876 key.type == BTRFS_EXTENT_ITEM_KEY)
6877 ptr += sizeof(struct btrfs_tree_block_info);
6879 end = (unsigned long)ei + item_size;
6881 iref = (struct btrfs_extent_inline_ref *)ptr;
6882 type = btrfs_extent_inline_ref_type(eb, iref);
6883 offset = btrfs_extent_inline_ref_offset(eb, iref);
6885 case BTRFS_TREE_BLOCK_REF_KEY:
6886 ret = add_tree_backref(extent_cache, key.objectid,
6890 "add_tree_backref failed (extent items tree block): %s",
6893 case BTRFS_SHARED_BLOCK_REF_KEY:
6894 ret = add_tree_backref(extent_cache, key.objectid,
6898 "add_tree_backref failed (extent items shared block): %s",
6901 case BTRFS_EXTENT_DATA_REF_KEY:
6902 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6903 add_data_backref(extent_cache, key.objectid, 0,
6904 btrfs_extent_data_ref_root(eb, dref),
6905 btrfs_extent_data_ref_objectid(eb,
6907 btrfs_extent_data_ref_offset(eb, dref),
6908 btrfs_extent_data_ref_count(eb, dref),
6911 case BTRFS_SHARED_DATA_REF_KEY:
6912 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6913 add_data_backref(extent_cache, key.objectid, offset,
6915 btrfs_shared_data_ref_count(eb, sref),
6919 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6920 key.objectid, key.type, num_bytes);
6923 ptr += btrfs_extent_inline_ref_size(type);
6930 static int check_cache_range(struct btrfs_root *root,
6931 struct btrfs_block_group_cache *cache,
6932 u64 offset, u64 bytes)
6934 struct btrfs_free_space *entry;
6940 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6941 bytenr = btrfs_sb_offset(i);
6942 ret = btrfs_rmap_block(root->fs_info,
6943 cache->key.objectid, bytenr, 0,
6944 &logical, &nr, &stripe_len);
6949 if (logical[nr] + stripe_len <= offset)
6951 if (offset + bytes <= logical[nr])
6953 if (logical[nr] == offset) {
6954 if (stripe_len >= bytes) {
6958 bytes -= stripe_len;
6959 offset += stripe_len;
6960 } else if (logical[nr] < offset) {
6961 if (logical[nr] + stripe_len >=
6966 bytes = (offset + bytes) -
6967 (logical[nr] + stripe_len);
6968 offset = logical[nr] + stripe_len;
6971 * Could be tricky, the super may land in the
6972 * middle of the area we're checking. First
6973 * check the easiest case, it's at the end.
6975 if (logical[nr] + stripe_len >=
6977 bytes = logical[nr] - offset;
6981 /* Check the left side */
6982 ret = check_cache_range(root, cache,
6984 logical[nr] - offset);
6990 /* Now we continue with the right side */
6991 bytes = (offset + bytes) -
6992 (logical[nr] + stripe_len);
6993 offset = logical[nr] + stripe_len;
7000 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7002 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7003 offset, offset+bytes);
7007 if (entry->offset != offset) {
7008 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7013 if (entry->bytes != bytes) {
7014 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7015 bytes, entry->bytes, offset);
7019 unlink_free_space(cache->free_space_ctl, entry);
7024 static int verify_space_cache(struct btrfs_root *root,
7025 struct btrfs_block_group_cache *cache)
7027 struct btrfs_path path;
7028 struct extent_buffer *leaf;
7029 struct btrfs_key key;
7033 root = root->fs_info->extent_root;
7035 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7037 btrfs_init_path(&path);
7038 key.objectid = last;
7040 key.type = BTRFS_EXTENT_ITEM_KEY;
7041 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7046 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7047 ret = btrfs_next_leaf(root, &path);
7055 leaf = path.nodes[0];
7056 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7057 if (key.objectid >= cache->key.offset + cache->key.objectid)
7059 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7060 key.type != BTRFS_METADATA_ITEM_KEY) {
7065 if (last == key.objectid) {
7066 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7067 last = key.objectid + key.offset;
7069 last = key.objectid + root->fs_info->nodesize;
7074 ret = check_cache_range(root, cache, last,
7075 key.objectid - last);
7078 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7079 last = key.objectid + key.offset;
7081 last = key.objectid + root->fs_info->nodesize;
7085 if (last < cache->key.objectid + cache->key.offset)
7086 ret = check_cache_range(root, cache, last,
7087 cache->key.objectid +
7088 cache->key.offset - last);
7091 btrfs_release_path(&path);
7094 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7095 fprintf(stderr, "There are still entries left in the space "
7103 static int check_space_cache(struct btrfs_root *root)
7105 struct btrfs_block_group_cache *cache;
7106 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7110 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7111 btrfs_super_generation(root->fs_info->super_copy) !=
7112 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7113 printf("cache and super generation don't match, space cache "
7114 "will be invalidated\n");
7118 if (ctx.progress_enabled) {
7119 ctx.tp = TASK_FREE_SPACE;
7120 task_start(ctx.info);
7124 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7128 start = cache->key.objectid + cache->key.offset;
7129 if (!cache->free_space_ctl) {
7130 if (btrfs_init_free_space_ctl(cache,
7131 root->fs_info->sectorsize)) {
7136 btrfs_remove_free_space_cache(cache);
7139 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7140 ret = exclude_super_stripes(root, cache);
7142 fprintf(stderr, "could not exclude super stripes: %s\n",
7147 ret = load_free_space_tree(root->fs_info, cache);
7148 free_excluded_extents(root, cache);
7150 fprintf(stderr, "could not load free space tree: %s\n",
7157 ret = load_free_space_cache(root->fs_info, cache);
7162 ret = verify_space_cache(root, cache);
7164 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7165 cache->key.objectid);
7170 task_stop(ctx.info);
7172 return error ? -EINVAL : 0;
7175 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7176 u64 num_bytes, unsigned long leaf_offset,
7177 struct extent_buffer *eb) {
7179 struct btrfs_fs_info *fs_info = root->fs_info;
7181 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7183 unsigned long csum_offset;
7187 u64 data_checked = 0;
7193 if (num_bytes % fs_info->sectorsize)
7196 data = malloc(num_bytes);
7200 while (offset < num_bytes) {
7203 read_len = num_bytes - offset;
7204 /* read as much space once a time */
7205 ret = read_extent_data(fs_info, data + offset,
7206 bytenr + offset, &read_len, mirror);
7210 /* verify every 4k data's checksum */
7211 while (data_checked < read_len) {
7213 tmp = offset + data_checked;
7215 csum = btrfs_csum_data((char *)data + tmp,
7216 csum, fs_info->sectorsize);
7217 btrfs_csum_final(csum, (u8 *)&csum);
7219 csum_offset = leaf_offset +
7220 tmp / fs_info->sectorsize * csum_size;
7221 read_extent_buffer(eb, (char *)&csum_expected,
7222 csum_offset, csum_size);
7223 /* try another mirror */
7224 if (csum != csum_expected) {
7225 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7226 mirror, bytenr + tmp,
7227 csum, csum_expected);
7228 num_copies = btrfs_num_copies(root->fs_info,
7230 if (mirror < num_copies - 1) {
7235 data_checked += fs_info->sectorsize;
7244 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7247 struct btrfs_path path;
7248 struct extent_buffer *leaf;
7249 struct btrfs_key key;
7252 btrfs_init_path(&path);
7253 key.objectid = bytenr;
7254 key.type = BTRFS_EXTENT_ITEM_KEY;
7255 key.offset = (u64)-1;
7258 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7261 fprintf(stderr, "Error looking up extent record %d\n", ret);
7262 btrfs_release_path(&path);
7265 if (path.slots[0] > 0) {
7268 ret = btrfs_prev_leaf(root, &path);
7271 } else if (ret > 0) {
7278 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7281 * Block group items come before extent items if they have the same
7282 * bytenr, so walk back one more just in case. Dear future traveller,
7283 * first congrats on mastering time travel. Now if it's not too much
7284 * trouble could you go back to 2006 and tell Chris to make the
7285 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7286 * EXTENT_ITEM_KEY please?
7288 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7289 if (path.slots[0] > 0) {
7292 ret = btrfs_prev_leaf(root, &path);
7295 } else if (ret > 0) {
7300 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7304 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7305 ret = btrfs_next_leaf(root, &path);
7307 fprintf(stderr, "Error going to next leaf "
7309 btrfs_release_path(&path);
7315 leaf = path.nodes[0];
7316 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7317 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7321 if (key.objectid + key.offset < bytenr) {
7325 if (key.objectid > bytenr + num_bytes)
7328 if (key.objectid == bytenr) {
7329 if (key.offset >= num_bytes) {
7333 num_bytes -= key.offset;
7334 bytenr += key.offset;
7335 } else if (key.objectid < bytenr) {
7336 if (key.objectid + key.offset >= bytenr + num_bytes) {
7340 num_bytes = (bytenr + num_bytes) -
7341 (key.objectid + key.offset);
7342 bytenr = key.objectid + key.offset;
7344 if (key.objectid + key.offset < bytenr + num_bytes) {
7345 u64 new_start = key.objectid + key.offset;
7346 u64 new_bytes = bytenr + num_bytes - new_start;
7349 * Weird case, the extent is in the middle of
7350 * our range, we'll have to search one side
7351 * and then the other. Not sure if this happens
7352 * in real life, but no harm in coding it up
7353 * anyway just in case.
7355 btrfs_release_path(&path);
7356 ret = check_extent_exists(root, new_start,
7359 fprintf(stderr, "Right section didn't "
7363 num_bytes = key.objectid - bytenr;
7366 num_bytes = key.objectid - bytenr;
7373 if (num_bytes && !ret) {
7374 fprintf(stderr, "There are no extents for csum range "
7375 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7379 btrfs_release_path(&path);
7383 static int check_csums(struct btrfs_root *root)
7385 struct btrfs_path path;
7386 struct extent_buffer *leaf;
7387 struct btrfs_key key;
7388 u64 offset = 0, num_bytes = 0;
7389 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7393 unsigned long leaf_offset;
7395 root = root->fs_info->csum_root;
7396 if (!extent_buffer_uptodate(root->node)) {
7397 fprintf(stderr, "No valid csum tree found\n");
7401 btrfs_init_path(&path);
7402 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7403 key.type = BTRFS_EXTENT_CSUM_KEY;
7405 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7407 fprintf(stderr, "Error searching csum tree %d\n", ret);
7408 btrfs_release_path(&path);
7412 if (ret > 0 && path.slots[0])
7417 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7418 ret = btrfs_next_leaf(root, &path);
7420 fprintf(stderr, "Error going to next leaf "
7427 leaf = path.nodes[0];
7429 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7430 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7435 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7436 csum_size) * root->fs_info->sectorsize;
7437 if (!check_data_csum)
7438 goto skip_csum_check;
7439 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7440 ret = check_extent_csums(root, key.offset, data_len,
7446 offset = key.offset;
7447 } else if (key.offset != offset + num_bytes) {
7448 ret = check_extent_exists(root, offset, num_bytes);
7450 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7451 "there is no extent record\n",
7452 offset, offset+num_bytes);
7455 offset = key.offset;
7458 num_bytes += data_len;
7462 btrfs_release_path(&path);
7466 static int is_dropped_key(struct btrfs_key *key,
7467 struct btrfs_key *drop_key) {
7468 if (key->objectid < drop_key->objectid)
7470 else if (key->objectid == drop_key->objectid) {
7471 if (key->type < drop_key->type)
7473 else if (key->type == drop_key->type) {
7474 if (key->offset < drop_key->offset)
7482 * Here are the rules for FULL_BACKREF.
7484 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7485 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7487 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7488 * if it happened after the relocation occurred since we'll have dropped the
7489 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7490 * have no real way to know for sure.
7492 * We process the blocks one root at a time, and we start from the lowest root
7493 * objectid and go to the highest. So we can just lookup the owner backref for
7494 * the record and if we don't find it then we know it doesn't exist and we have
7497 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7498 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7499 * be set or not and then we can check later once we've gathered all the refs.
7501 static int calc_extent_flag(struct cache_tree *extent_cache,
7502 struct extent_buffer *buf,
7503 struct root_item_record *ri,
7506 struct extent_record *rec;
7507 struct cache_extent *cache;
7508 struct tree_backref *tback;
7511 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7512 /* we have added this extent before */
7516 rec = container_of(cache, struct extent_record, cache);
7519 * Except file/reloc tree, we can not have
7522 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7527 if (buf->start == ri->bytenr)
7530 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7533 owner = btrfs_header_owner(buf);
7534 if (owner == ri->objectid)
7537 tback = find_tree_backref(rec, 0, owner);
7542 if (rec->flag_block_full_backref != FLAG_UNSET &&
7543 rec->flag_block_full_backref != 0)
7544 rec->bad_full_backref = 1;
7547 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7548 if (rec->flag_block_full_backref != FLAG_UNSET &&
7549 rec->flag_block_full_backref != 1)
7550 rec->bad_full_backref = 1;
7554 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7556 fprintf(stderr, "Invalid key type(");
7557 print_key_type(stderr, 0, key_type);
7558 fprintf(stderr, ") found in root(");
7559 print_objectid(stderr, rootid, 0);
7560 fprintf(stderr, ")\n");
7564 * Check if the key is valid with its extent buffer.
7566 * This is a early check in case invalid key exists in a extent buffer
7567 * This is not comprehensive yet, but should prevent wrong key/item passed
7570 static int check_type_with_root(u64 rootid, u8 key_type)
7573 /* Only valid in chunk tree */
7574 case BTRFS_DEV_ITEM_KEY:
7575 case BTRFS_CHUNK_ITEM_KEY:
7576 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7579 /* valid in csum and log tree */
7580 case BTRFS_CSUM_TREE_OBJECTID:
7581 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7585 case BTRFS_EXTENT_ITEM_KEY:
7586 case BTRFS_METADATA_ITEM_KEY:
7587 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7588 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7591 case BTRFS_ROOT_ITEM_KEY:
7592 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7595 case BTRFS_DEV_EXTENT_KEY:
7596 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7602 report_mismatch_key_root(key_type, rootid);
7606 static int run_next_block(struct btrfs_root *root,
7607 struct block_info *bits,
7610 struct cache_tree *pending,
7611 struct cache_tree *seen,
7612 struct cache_tree *reada,
7613 struct cache_tree *nodes,
7614 struct cache_tree *extent_cache,
7615 struct cache_tree *chunk_cache,
7616 struct rb_root *dev_cache,
7617 struct block_group_tree *block_group_cache,
7618 struct device_extent_tree *dev_extent_cache,
7619 struct root_item_record *ri)
7621 struct btrfs_fs_info *fs_info = root->fs_info;
7622 struct extent_buffer *buf;
7623 struct extent_record *rec = NULL;
7634 struct btrfs_key key;
7635 struct cache_extent *cache;
7638 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7639 bits_nr, &reada_bits);
7644 for(i = 0; i < nritems; i++) {
7645 ret = add_cache_extent(reada, bits[i].start,
7650 /* fixme, get the parent transid */
7651 readahead_tree_block(fs_info, bits[i].start, 0);
7654 *last = bits[0].start;
7655 bytenr = bits[0].start;
7656 size = bits[0].size;
7658 cache = lookup_cache_extent(pending, bytenr, size);
7660 remove_cache_extent(pending, cache);
7663 cache = lookup_cache_extent(reada, bytenr, size);
7665 remove_cache_extent(reada, cache);
7668 cache = lookup_cache_extent(nodes, bytenr, size);
7670 remove_cache_extent(nodes, cache);
7673 cache = lookup_cache_extent(extent_cache, bytenr, size);
7675 rec = container_of(cache, struct extent_record, cache);
7676 gen = rec->parent_generation;
7679 /* fixme, get the real parent transid */
7680 buf = read_tree_block(root->fs_info, bytenr, gen);
7681 if (!extent_buffer_uptodate(buf)) {
7682 record_bad_block_io(root->fs_info,
7683 extent_cache, bytenr, size);
7687 nritems = btrfs_header_nritems(buf);
7690 if (!init_extent_tree) {
7691 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7692 btrfs_header_level(buf), 1, NULL,
7695 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7697 fprintf(stderr, "Couldn't calc extent flags\n");
7698 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7703 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7705 fprintf(stderr, "Couldn't calc extent flags\n");
7706 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7710 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7712 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7713 ri->objectid == btrfs_header_owner(buf)) {
7715 * Ok we got to this block from it's original owner and
7716 * we have FULL_BACKREF set. Relocation can leave
7717 * converted blocks over so this is altogether possible,
7718 * however it's not possible if the generation > the
7719 * last snapshot, so check for this case.
7721 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7722 btrfs_header_generation(buf) > ri->last_snapshot) {
7723 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7724 rec->bad_full_backref = 1;
7729 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7730 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7731 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7732 rec->bad_full_backref = 1;
7736 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7737 rec->flag_block_full_backref = 1;
7741 rec->flag_block_full_backref = 0;
7743 owner = btrfs_header_owner(buf);
7746 ret = check_block(root, extent_cache, buf, flags);
7750 if (btrfs_is_leaf(buf)) {
7751 btree_space_waste += btrfs_leaf_free_space(root, buf);
7752 for (i = 0; i < nritems; i++) {
7753 struct btrfs_file_extent_item *fi;
7754 btrfs_item_key_to_cpu(buf, &key, i);
7756 * Check key type against the leaf owner.
7757 * Could filter quite a lot of early error if
7760 if (check_type_with_root(btrfs_header_owner(buf),
7762 fprintf(stderr, "ignoring invalid key\n");
7765 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7766 process_extent_item(root, extent_cache, buf,
7770 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7771 process_extent_item(root, extent_cache, buf,
7775 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7777 btrfs_item_size_nr(buf, i);
7780 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7781 process_chunk_item(chunk_cache, &key, buf, i);
7784 if (key.type == BTRFS_DEV_ITEM_KEY) {
7785 process_device_item(dev_cache, &key, buf, i);
7788 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7789 process_block_group_item(block_group_cache,
7793 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7794 process_device_extent_item(dev_extent_cache,
7799 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7800 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7801 process_extent_ref_v0(extent_cache, buf, i);
7808 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7809 ret = add_tree_backref(extent_cache,
7810 key.objectid, 0, key.offset, 0);
7813 "add_tree_backref failed (leaf tree block): %s",
7817 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7818 ret = add_tree_backref(extent_cache,
7819 key.objectid, key.offset, 0, 0);
7822 "add_tree_backref failed (leaf shared block): %s",
7826 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7827 struct btrfs_extent_data_ref *ref;
7828 ref = btrfs_item_ptr(buf, i,
7829 struct btrfs_extent_data_ref);
7830 add_data_backref(extent_cache,
7832 btrfs_extent_data_ref_root(buf, ref),
7833 btrfs_extent_data_ref_objectid(buf,
7835 btrfs_extent_data_ref_offset(buf, ref),
7836 btrfs_extent_data_ref_count(buf, ref),
7837 0, root->fs_info->sectorsize);
7840 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7841 struct btrfs_shared_data_ref *ref;
7842 ref = btrfs_item_ptr(buf, i,
7843 struct btrfs_shared_data_ref);
7844 add_data_backref(extent_cache,
7845 key.objectid, key.offset, 0, 0, 0,
7846 btrfs_shared_data_ref_count(buf, ref),
7847 0, root->fs_info->sectorsize);
7850 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7851 struct bad_item *bad;
7853 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7857 bad = malloc(sizeof(struct bad_item));
7860 INIT_LIST_HEAD(&bad->list);
7861 memcpy(&bad->key, &key,
7862 sizeof(struct btrfs_key));
7863 bad->root_id = owner;
7864 list_add_tail(&bad->list, &delete_items);
7867 if (key.type != BTRFS_EXTENT_DATA_KEY)
7869 fi = btrfs_item_ptr(buf, i,
7870 struct btrfs_file_extent_item);
7871 if (btrfs_file_extent_type(buf, fi) ==
7872 BTRFS_FILE_EXTENT_INLINE)
7874 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7877 data_bytes_allocated +=
7878 btrfs_file_extent_disk_num_bytes(buf, fi);
7879 if (data_bytes_allocated < root->fs_info->sectorsize) {
7882 data_bytes_referenced +=
7883 btrfs_file_extent_num_bytes(buf, fi);
7884 add_data_backref(extent_cache,
7885 btrfs_file_extent_disk_bytenr(buf, fi),
7886 parent, owner, key.objectid, key.offset -
7887 btrfs_file_extent_offset(buf, fi), 1, 1,
7888 btrfs_file_extent_disk_num_bytes(buf, fi));
7892 struct btrfs_key first_key;
7894 first_key.objectid = 0;
7897 btrfs_item_key_to_cpu(buf, &first_key, 0);
7898 level = btrfs_header_level(buf);
7899 for (i = 0; i < nritems; i++) {
7900 struct extent_record tmpl;
7902 ptr = btrfs_node_blockptr(buf, i);
7903 size = root->fs_info->nodesize;
7904 btrfs_node_key_to_cpu(buf, &key, i);
7906 if ((level == ri->drop_level)
7907 && is_dropped_key(&key, &ri->drop_key)) {
7912 memset(&tmpl, 0, sizeof(tmpl));
7913 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7914 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7919 tmpl.max_size = size;
7920 ret = add_extent_rec(extent_cache, &tmpl);
7924 ret = add_tree_backref(extent_cache, ptr, parent,
7928 "add_tree_backref failed (non-leaf block): %s",
7934 add_pending(nodes, seen, ptr, size);
7936 add_pending(pending, seen, ptr, size);
7939 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7940 nritems) * sizeof(struct btrfs_key_ptr);
7942 total_btree_bytes += buf->len;
7943 if (fs_root_objectid(btrfs_header_owner(buf)))
7944 total_fs_tree_bytes += buf->len;
7945 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7946 total_extent_tree_bytes += buf->len;
7948 free_extent_buffer(buf);
7952 static int add_root_to_pending(struct extent_buffer *buf,
7953 struct cache_tree *extent_cache,
7954 struct cache_tree *pending,
7955 struct cache_tree *seen,
7956 struct cache_tree *nodes,
7959 struct extent_record tmpl;
7962 if (btrfs_header_level(buf) > 0)
7963 add_pending(nodes, seen, buf->start, buf->len);
7965 add_pending(pending, seen, buf->start, buf->len);
7967 memset(&tmpl, 0, sizeof(tmpl));
7968 tmpl.start = buf->start;
7973 tmpl.max_size = buf->len;
7974 add_extent_rec(extent_cache, &tmpl);
7976 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7977 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7978 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7981 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7986 /* as we fix the tree, we might be deleting blocks that
7987 * we're tracking for repair. This hook makes sure we
7988 * remove any backrefs for blocks as we are fixing them.
7990 static int free_extent_hook(struct btrfs_trans_handle *trans,
7991 struct btrfs_root *root,
7992 u64 bytenr, u64 num_bytes, u64 parent,
7993 u64 root_objectid, u64 owner, u64 offset,
7996 struct extent_record *rec;
7997 struct cache_extent *cache;
7999 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8001 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8002 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8006 rec = container_of(cache, struct extent_record, cache);
8008 struct data_backref *back;
8009 back = find_data_backref(rec, parent, root_objectid, owner,
8010 offset, 1, bytenr, num_bytes);
8013 if (back->node.found_ref) {
8014 back->found_ref -= refs_to_drop;
8016 rec->refs -= refs_to_drop;
8018 if (back->node.found_extent_tree) {
8019 back->num_refs -= refs_to_drop;
8020 if (rec->extent_item_refs)
8021 rec->extent_item_refs -= refs_to_drop;
8023 if (back->found_ref == 0)
8024 back->node.found_ref = 0;
8025 if (back->num_refs == 0)
8026 back->node.found_extent_tree = 0;
8028 if (!back->node.found_extent_tree && back->node.found_ref) {
8029 list_del(&back->node.list);
8033 struct tree_backref *back;
8034 back = find_tree_backref(rec, parent, root_objectid);
8037 if (back->node.found_ref) {
8040 back->node.found_ref = 0;
8042 if (back->node.found_extent_tree) {
8043 if (rec->extent_item_refs)
8044 rec->extent_item_refs--;
8045 back->node.found_extent_tree = 0;
8047 if (!back->node.found_extent_tree && back->node.found_ref) {
8048 list_del(&back->node.list);
8052 maybe_free_extent_rec(extent_cache, rec);
8057 static int delete_extent_records(struct btrfs_trans_handle *trans,
8058 struct btrfs_root *root,
8059 struct btrfs_path *path,
8062 struct btrfs_key key;
8063 struct btrfs_key found_key;
8064 struct extent_buffer *leaf;
8069 key.objectid = bytenr;
8071 key.offset = (u64)-1;
8074 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8081 if (path->slots[0] == 0)
8087 leaf = path->nodes[0];
8088 slot = path->slots[0];
8090 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8091 if (found_key.objectid != bytenr)
8094 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8095 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8096 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8097 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8098 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8099 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8100 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8101 btrfs_release_path(path);
8102 if (found_key.type == 0) {
8103 if (found_key.offset == 0)
8105 key.offset = found_key.offset - 1;
8106 key.type = found_key.type;
8108 key.type = found_key.type - 1;
8109 key.offset = (u64)-1;
8113 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8114 found_key.objectid, found_key.type, found_key.offset);
8116 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8119 btrfs_release_path(path);
8121 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8122 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8123 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8124 found_key.offset : root->fs_info->nodesize;
8126 ret = btrfs_update_block_group(trans, root, bytenr,
8133 btrfs_release_path(path);
8138 * for a single backref, this will allocate a new extent
8139 * and add the backref to it.
8141 static int record_extent(struct btrfs_trans_handle *trans,
8142 struct btrfs_fs_info *info,
8143 struct btrfs_path *path,
8144 struct extent_record *rec,
8145 struct extent_backref *back,
8146 int allocated, u64 flags)
8149 struct btrfs_root *extent_root = info->extent_root;
8150 struct extent_buffer *leaf;
8151 struct btrfs_key ins_key;
8152 struct btrfs_extent_item *ei;
8153 struct data_backref *dback;
8154 struct btrfs_tree_block_info *bi;
8157 rec->max_size = max_t(u64, rec->max_size,
8161 u32 item_size = sizeof(*ei);
8164 item_size += sizeof(*bi);
8166 ins_key.objectid = rec->start;
8167 ins_key.offset = rec->max_size;
8168 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8170 ret = btrfs_insert_empty_item(trans, extent_root, path,
8171 &ins_key, item_size);
8175 leaf = path->nodes[0];
8176 ei = btrfs_item_ptr(leaf, path->slots[0],
8177 struct btrfs_extent_item);
8179 btrfs_set_extent_refs(leaf, ei, 0);
8180 btrfs_set_extent_generation(leaf, ei, rec->generation);
8182 if (back->is_data) {
8183 btrfs_set_extent_flags(leaf, ei,
8184 BTRFS_EXTENT_FLAG_DATA);
8186 struct btrfs_disk_key copy_key;;
8188 bi = (struct btrfs_tree_block_info *)(ei + 1);
8189 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8192 btrfs_set_disk_key_objectid(©_key,
8193 rec->info_objectid);
8194 btrfs_set_disk_key_type(©_key, 0);
8195 btrfs_set_disk_key_offset(©_key, 0);
8197 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8198 btrfs_set_tree_block_key(leaf, bi, ©_key);
8200 btrfs_set_extent_flags(leaf, ei,
8201 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8204 btrfs_mark_buffer_dirty(leaf);
8205 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8206 rec->max_size, 1, 0);
8209 btrfs_release_path(path);
8212 if (back->is_data) {
8216 dback = to_data_backref(back);
8217 if (back->full_backref)
8218 parent = dback->parent;
8222 for (i = 0; i < dback->found_ref; i++) {
8223 /* if parent != 0, we're doing a full backref
8224 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8225 * just makes the backref allocator create a data
8228 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8229 rec->start, rec->max_size,
8233 BTRFS_FIRST_FREE_OBJECTID :
8239 fprintf(stderr, "adding new data backref"
8240 " on %llu %s %llu owner %llu"
8241 " offset %llu found %d\n",
8242 (unsigned long long)rec->start,
8243 back->full_backref ?
8245 back->full_backref ?
8246 (unsigned long long)parent :
8247 (unsigned long long)dback->root,
8248 (unsigned long long)dback->owner,
8249 (unsigned long long)dback->offset,
8253 struct tree_backref *tback;
8255 tback = to_tree_backref(back);
8256 if (back->full_backref)
8257 parent = tback->parent;
8261 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8262 rec->start, rec->max_size,
8263 parent, tback->root, 0, 0);
8264 fprintf(stderr, "adding new tree backref on "
8265 "start %llu len %llu parent %llu root %llu\n",
8266 rec->start, rec->max_size, parent, tback->root);
8269 btrfs_release_path(path);
8273 static struct extent_entry *find_entry(struct list_head *entries,
8274 u64 bytenr, u64 bytes)
8276 struct extent_entry *entry = NULL;
8278 list_for_each_entry(entry, entries, list) {
8279 if (entry->bytenr == bytenr && entry->bytes == bytes)
8286 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8288 struct extent_entry *entry, *best = NULL, *prev = NULL;
8290 list_for_each_entry(entry, entries, list) {
8292 * If there are as many broken entries as entries then we know
8293 * not to trust this particular entry.
8295 if (entry->broken == entry->count)
8299 * Special case, when there are only two entries and 'best' is
8309 * If our current entry == best then we can't be sure our best
8310 * is really the best, so we need to keep searching.
8312 if (best && best->count == entry->count) {
8318 /* Prev == entry, not good enough, have to keep searching */
8319 if (!prev->broken && prev->count == entry->count)
8323 best = (prev->count > entry->count) ? prev : entry;
8324 else if (best->count < entry->count)
8332 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8333 struct data_backref *dback, struct extent_entry *entry)
8335 struct btrfs_trans_handle *trans;
8336 struct btrfs_root *root;
8337 struct btrfs_file_extent_item *fi;
8338 struct extent_buffer *leaf;
8339 struct btrfs_key key;
8343 key.objectid = dback->root;
8344 key.type = BTRFS_ROOT_ITEM_KEY;
8345 key.offset = (u64)-1;
8346 root = btrfs_read_fs_root(info, &key);
8348 fprintf(stderr, "Couldn't find root for our ref\n");
8353 * The backref points to the original offset of the extent if it was
8354 * split, so we need to search down to the offset we have and then walk
8355 * forward until we find the backref we're looking for.
8357 key.objectid = dback->owner;
8358 key.type = BTRFS_EXTENT_DATA_KEY;
8359 key.offset = dback->offset;
8360 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8362 fprintf(stderr, "Error looking up ref %d\n", ret);
8367 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8368 ret = btrfs_next_leaf(root, path);
8370 fprintf(stderr, "Couldn't find our ref, next\n");
8374 leaf = path->nodes[0];
8375 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8376 if (key.objectid != dback->owner ||
8377 key.type != BTRFS_EXTENT_DATA_KEY) {
8378 fprintf(stderr, "Couldn't find our ref, search\n");
8381 fi = btrfs_item_ptr(leaf, path->slots[0],
8382 struct btrfs_file_extent_item);
8383 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8384 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8386 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8391 btrfs_release_path(path);
8393 trans = btrfs_start_transaction(root, 1);
8395 return PTR_ERR(trans);
8398 * Ok we have the key of the file extent we want to fix, now we can cow
8399 * down to the thing and fix it.
8401 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8403 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8404 key.objectid, key.type, key.offset, ret);
8408 fprintf(stderr, "Well that's odd, we just found this key "
8409 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8414 leaf = path->nodes[0];
8415 fi = btrfs_item_ptr(leaf, path->slots[0],
8416 struct btrfs_file_extent_item);
8418 if (btrfs_file_extent_compression(leaf, fi) &&
8419 dback->disk_bytenr != entry->bytenr) {
8420 fprintf(stderr, "Ref doesn't match the record start and is "
8421 "compressed, please take a btrfs-image of this file "
8422 "system and send it to a btrfs developer so they can "
8423 "complete this functionality for bytenr %Lu\n",
8424 dback->disk_bytenr);
8429 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8430 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8431 } else if (dback->disk_bytenr > entry->bytenr) {
8432 u64 off_diff, offset;
8434 off_diff = dback->disk_bytenr - entry->bytenr;
8435 offset = btrfs_file_extent_offset(leaf, fi);
8436 if (dback->disk_bytenr + offset +
8437 btrfs_file_extent_num_bytes(leaf, fi) >
8438 entry->bytenr + entry->bytes) {
8439 fprintf(stderr, "Ref is past the entry end, please "
8440 "take a btrfs-image of this file system and "
8441 "send it to a btrfs developer, ref %Lu\n",
8442 dback->disk_bytenr);
8447 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8448 btrfs_set_file_extent_offset(leaf, fi, offset);
8449 } else if (dback->disk_bytenr < entry->bytenr) {
8452 offset = btrfs_file_extent_offset(leaf, fi);
8453 if (dback->disk_bytenr + offset < entry->bytenr) {
8454 fprintf(stderr, "Ref is before the entry start, please"
8455 " take a btrfs-image of this file system and "
8456 "send it to a btrfs developer, ref %Lu\n",
8457 dback->disk_bytenr);
8462 offset += dback->disk_bytenr;
8463 offset -= entry->bytenr;
8464 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8465 btrfs_set_file_extent_offset(leaf, fi, offset);
8468 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8471 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8472 * only do this if we aren't using compression, otherwise it's a
8475 if (!btrfs_file_extent_compression(leaf, fi))
8476 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8478 printf("ram bytes may be wrong?\n");
8479 btrfs_mark_buffer_dirty(leaf);
8481 err = btrfs_commit_transaction(trans, root);
8482 btrfs_release_path(path);
8483 return ret ? ret : err;
8486 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8487 struct extent_record *rec)
8489 struct extent_backref *back;
8490 struct data_backref *dback;
8491 struct extent_entry *entry, *best = NULL;
8494 int broken_entries = 0;
8499 * Metadata is easy and the backrefs should always agree on bytenr and
8500 * size, if not we've got bigger issues.
8505 list_for_each_entry(back, &rec->backrefs, list) {
8506 if (back->full_backref || !back->is_data)
8509 dback = to_data_backref(back);
8512 * We only pay attention to backrefs that we found a real
8515 if (dback->found_ref == 0)
8519 * For now we only catch when the bytes don't match, not the
8520 * bytenr. We can easily do this at the same time, but I want
8521 * to have a fs image to test on before we just add repair
8522 * functionality willy-nilly so we know we won't screw up the
8526 entry = find_entry(&entries, dback->disk_bytenr,
8529 entry = malloc(sizeof(struct extent_entry));
8534 memset(entry, 0, sizeof(*entry));
8535 entry->bytenr = dback->disk_bytenr;
8536 entry->bytes = dback->bytes;
8537 list_add_tail(&entry->list, &entries);
8542 * If we only have on entry we may think the entries agree when
8543 * in reality they don't so we have to do some extra checking.
8545 if (dback->disk_bytenr != rec->start ||
8546 dback->bytes != rec->nr || back->broken)
8557 /* Yay all the backrefs agree, carry on good sir */
8558 if (nr_entries <= 1 && !mismatch)
8561 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8562 "%Lu\n", rec->start);
8565 * First we want to see if the backrefs can agree amongst themselves who
8566 * is right, so figure out which one of the entries has the highest
8569 best = find_most_right_entry(&entries);
8572 * Ok so we may have an even split between what the backrefs think, so
8573 * this is where we use the extent ref to see what it thinks.
8576 entry = find_entry(&entries, rec->start, rec->nr);
8577 if (!entry && (!broken_entries || !rec->found_rec)) {
8578 fprintf(stderr, "Backrefs don't agree with each other "
8579 "and extent record doesn't agree with anybody,"
8580 " so we can't fix bytenr %Lu bytes %Lu\n",
8581 rec->start, rec->nr);
8584 } else if (!entry) {
8586 * Ok our backrefs were broken, we'll assume this is the
8587 * correct value and add an entry for this range.
8589 entry = malloc(sizeof(struct extent_entry));
8594 memset(entry, 0, sizeof(*entry));
8595 entry->bytenr = rec->start;
8596 entry->bytes = rec->nr;
8597 list_add_tail(&entry->list, &entries);
8601 best = find_most_right_entry(&entries);
8603 fprintf(stderr, "Backrefs and extent record evenly "
8604 "split on who is right, this is going to "
8605 "require user input to fix bytenr %Lu bytes "
8606 "%Lu\n", rec->start, rec->nr);
8613 * I don't think this can happen currently as we'll abort() if we catch
8614 * this case higher up, but in case somebody removes that we still can't
8615 * deal with it properly here yet, so just bail out of that's the case.
8617 if (best->bytenr != rec->start) {
8618 fprintf(stderr, "Extent start and backref starts don't match, "
8619 "please use btrfs-image on this file system and send "
8620 "it to a btrfs developer so they can make fsck fix "
8621 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8622 rec->start, rec->nr);
8628 * Ok great we all agreed on an extent record, let's go find the real
8629 * references and fix up the ones that don't match.
8631 list_for_each_entry(back, &rec->backrefs, list) {
8632 if (back->full_backref || !back->is_data)
8635 dback = to_data_backref(back);
8638 * Still ignoring backrefs that don't have a real ref attached
8641 if (dback->found_ref == 0)
8644 if (dback->bytes == best->bytes &&
8645 dback->disk_bytenr == best->bytenr)
8648 ret = repair_ref(info, path, dback, best);
8654 * Ok we messed with the actual refs, which means we need to drop our
8655 * entire cache and go back and rescan. I know this is a huge pain and
8656 * adds a lot of extra work, but it's the only way to be safe. Once all
8657 * the backrefs agree we may not need to do anything to the extent
8662 while (!list_empty(&entries)) {
8663 entry = list_entry(entries.next, struct extent_entry, list);
8664 list_del_init(&entry->list);
8670 static int process_duplicates(struct cache_tree *extent_cache,
8671 struct extent_record *rec)
8673 struct extent_record *good, *tmp;
8674 struct cache_extent *cache;
8678 * If we found a extent record for this extent then return, or if we
8679 * have more than one duplicate we are likely going to need to delete
8682 if (rec->found_rec || rec->num_duplicates > 1)
8685 /* Shouldn't happen but just in case */
8686 BUG_ON(!rec->num_duplicates);
8689 * So this happens if we end up with a backref that doesn't match the
8690 * actual extent entry. So either the backref is bad or the extent
8691 * entry is bad. Either way we want to have the extent_record actually
8692 * reflect what we found in the extent_tree, so we need to take the
8693 * duplicate out and use that as the extent_record since the only way we
8694 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8696 remove_cache_extent(extent_cache, &rec->cache);
8698 good = to_extent_record(rec->dups.next);
8699 list_del_init(&good->list);
8700 INIT_LIST_HEAD(&good->backrefs);
8701 INIT_LIST_HEAD(&good->dups);
8702 good->cache.start = good->start;
8703 good->cache.size = good->nr;
8704 good->content_checked = 0;
8705 good->owner_ref_checked = 0;
8706 good->num_duplicates = 0;
8707 good->refs = rec->refs;
8708 list_splice_init(&rec->backrefs, &good->backrefs);
8710 cache = lookup_cache_extent(extent_cache, good->start,
8714 tmp = container_of(cache, struct extent_record, cache);
8717 * If we find another overlapping extent and it's found_rec is
8718 * set then it's a duplicate and we need to try and delete
8721 if (tmp->found_rec || tmp->num_duplicates > 0) {
8722 if (list_empty(&good->list))
8723 list_add_tail(&good->list,
8724 &duplicate_extents);
8725 good->num_duplicates += tmp->num_duplicates + 1;
8726 list_splice_init(&tmp->dups, &good->dups);
8727 list_del_init(&tmp->list);
8728 list_add_tail(&tmp->list, &good->dups);
8729 remove_cache_extent(extent_cache, &tmp->cache);
8734 * Ok we have another non extent item backed extent rec, so lets
8735 * just add it to this extent and carry on like we did above.
8737 good->refs += tmp->refs;
8738 list_splice_init(&tmp->backrefs, &good->backrefs);
8739 remove_cache_extent(extent_cache, &tmp->cache);
8742 ret = insert_cache_extent(extent_cache, &good->cache);
8745 return good->num_duplicates ? 0 : 1;
8748 static int delete_duplicate_records(struct btrfs_root *root,
8749 struct extent_record *rec)
8751 struct btrfs_trans_handle *trans;
8752 LIST_HEAD(delete_list);
8753 struct btrfs_path path;
8754 struct extent_record *tmp, *good, *n;
8757 struct btrfs_key key;
8759 btrfs_init_path(&path);
8762 /* Find the record that covers all of the duplicates. */
8763 list_for_each_entry(tmp, &rec->dups, list) {
8764 if (good->start < tmp->start)
8766 if (good->nr > tmp->nr)
8769 if (tmp->start + tmp->nr < good->start + good->nr) {
8770 fprintf(stderr, "Ok we have overlapping extents that "
8771 "aren't completely covered by each other, this "
8772 "is going to require more careful thought. "
8773 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8774 tmp->start, tmp->nr, good->start, good->nr);
8781 list_add_tail(&rec->list, &delete_list);
8783 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8786 list_move_tail(&tmp->list, &delete_list);
8789 root = root->fs_info->extent_root;
8790 trans = btrfs_start_transaction(root, 1);
8791 if (IS_ERR(trans)) {
8792 ret = PTR_ERR(trans);
8796 list_for_each_entry(tmp, &delete_list, list) {
8797 if (tmp->found_rec == 0)
8799 key.objectid = tmp->start;
8800 key.type = BTRFS_EXTENT_ITEM_KEY;
8801 key.offset = tmp->nr;
8803 /* Shouldn't happen but just in case */
8804 if (tmp->metadata) {
8805 fprintf(stderr, "Well this shouldn't happen, extent "
8806 "record overlaps but is metadata? "
8807 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8811 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8817 ret = btrfs_del_item(trans, root, &path);
8820 btrfs_release_path(&path);
8823 err = btrfs_commit_transaction(trans, root);
8827 while (!list_empty(&delete_list)) {
8828 tmp = to_extent_record(delete_list.next);
8829 list_del_init(&tmp->list);
8835 while (!list_empty(&rec->dups)) {
8836 tmp = to_extent_record(rec->dups.next);
8837 list_del_init(&tmp->list);
8841 btrfs_release_path(&path);
8843 if (!ret && !nr_del)
8844 rec->num_duplicates = 0;
8846 return ret ? ret : nr_del;
8849 static int find_possible_backrefs(struct btrfs_fs_info *info,
8850 struct btrfs_path *path,
8851 struct cache_tree *extent_cache,
8852 struct extent_record *rec)
8854 struct btrfs_root *root;
8855 struct extent_backref *back;
8856 struct data_backref *dback;
8857 struct cache_extent *cache;
8858 struct btrfs_file_extent_item *fi;
8859 struct btrfs_key key;
8863 list_for_each_entry(back, &rec->backrefs, list) {
8864 /* Don't care about full backrefs (poor unloved backrefs) */
8865 if (back->full_backref || !back->is_data)
8868 dback = to_data_backref(back);
8870 /* We found this one, we don't need to do a lookup */
8871 if (dback->found_ref)
8874 key.objectid = dback->root;
8875 key.type = BTRFS_ROOT_ITEM_KEY;
8876 key.offset = (u64)-1;
8878 root = btrfs_read_fs_root(info, &key);
8880 /* No root, definitely a bad ref, skip */
8881 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8883 /* Other err, exit */
8885 return PTR_ERR(root);
8887 key.objectid = dback->owner;
8888 key.type = BTRFS_EXTENT_DATA_KEY;
8889 key.offset = dback->offset;
8890 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8892 btrfs_release_path(path);
8895 /* Didn't find it, we can carry on */
8900 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8901 struct btrfs_file_extent_item);
8902 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8903 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8904 btrfs_release_path(path);
8905 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8907 struct extent_record *tmp;
8908 tmp = container_of(cache, struct extent_record, cache);
8911 * If we found an extent record for the bytenr for this
8912 * particular backref then we can't add it to our
8913 * current extent record. We only want to add backrefs
8914 * that don't have a corresponding extent item in the
8915 * extent tree since they likely belong to this record
8916 * and we need to fix it if it doesn't match bytenrs.
8922 dback->found_ref += 1;
8923 dback->disk_bytenr = bytenr;
8924 dback->bytes = bytes;
8927 * Set this so the verify backref code knows not to trust the
8928 * values in this backref.
8937 * Record orphan data ref into corresponding root.
8939 * Return 0 if the extent item contains data ref and recorded.
8940 * Return 1 if the extent item contains no useful data ref
8941 * On that case, it may contains only shared_dataref or metadata backref
8942 * or the file extent exists(this should be handled by the extent bytenr
8944 * Return <0 if something goes wrong.
8946 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8947 struct extent_record *rec)
8949 struct btrfs_key key;
8950 struct btrfs_root *dest_root;
8951 struct extent_backref *back;
8952 struct data_backref *dback;
8953 struct orphan_data_extent *orphan;
8954 struct btrfs_path path;
8955 int recorded_data_ref = 0;
8960 btrfs_init_path(&path);
8961 list_for_each_entry(back, &rec->backrefs, list) {
8962 if (back->full_backref || !back->is_data ||
8963 !back->found_extent_tree)
8965 dback = to_data_backref(back);
8966 if (dback->found_ref)
8968 key.objectid = dback->root;
8969 key.type = BTRFS_ROOT_ITEM_KEY;
8970 key.offset = (u64)-1;
8972 dest_root = btrfs_read_fs_root(fs_info, &key);
8974 /* For non-exist root we just skip it */
8975 if (IS_ERR(dest_root) || !dest_root)
8978 key.objectid = dback->owner;
8979 key.type = BTRFS_EXTENT_DATA_KEY;
8980 key.offset = dback->offset;
8982 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8983 btrfs_release_path(&path);
8985 * For ret < 0, it's OK since the fs-tree may be corrupted,
8986 * we need to record it for inode/file extent rebuild.
8987 * For ret > 0, we record it only for file extent rebuild.
8988 * For ret == 0, the file extent exists but only bytenr
8989 * mismatch, let the original bytenr fix routine to handle,
8995 orphan = malloc(sizeof(*orphan));
9000 INIT_LIST_HEAD(&orphan->list);
9001 orphan->root = dback->root;
9002 orphan->objectid = dback->owner;
9003 orphan->offset = dback->offset;
9004 orphan->disk_bytenr = rec->cache.start;
9005 orphan->disk_len = rec->cache.size;
9006 list_add(&dest_root->orphan_data_extents, &orphan->list);
9007 recorded_data_ref = 1;
9010 btrfs_release_path(&path);
9012 return !recorded_data_ref;
9018 * when an incorrect extent item is found, this will delete
9019 * all of the existing entries for it and recreate them
9020 * based on what the tree scan found.
9022 static int fixup_extent_refs(struct btrfs_fs_info *info,
9023 struct cache_tree *extent_cache,
9024 struct extent_record *rec)
9026 struct btrfs_trans_handle *trans = NULL;
9028 struct btrfs_path path;
9029 struct list_head *cur = rec->backrefs.next;
9030 struct cache_extent *cache;
9031 struct extent_backref *back;
9035 if (rec->flag_block_full_backref)
9036 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9038 btrfs_init_path(&path);
9039 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9041 * Sometimes the backrefs themselves are so broken they don't
9042 * get attached to any meaningful rec, so first go back and
9043 * check any of our backrefs that we couldn't find and throw
9044 * them into the list if we find the backref so that
9045 * verify_backrefs can figure out what to do.
9047 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9052 /* step one, make sure all of the backrefs agree */
9053 ret = verify_backrefs(info, &path, rec);
9057 trans = btrfs_start_transaction(info->extent_root, 1);
9058 if (IS_ERR(trans)) {
9059 ret = PTR_ERR(trans);
9063 /* step two, delete all the existing records */
9064 ret = delete_extent_records(trans, info->extent_root, &path,
9070 /* was this block corrupt? If so, don't add references to it */
9071 cache = lookup_cache_extent(info->corrupt_blocks,
9072 rec->start, rec->max_size);
9078 /* step three, recreate all the refs we did find */
9079 while(cur != &rec->backrefs) {
9080 back = to_extent_backref(cur);
9084 * if we didn't find any references, don't create a
9087 if (!back->found_ref)
9090 rec->bad_full_backref = 0;
9091 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9099 int err = btrfs_commit_transaction(trans, info->extent_root);
9105 fprintf(stderr, "Repaired extent references for %llu\n",
9106 (unsigned long long)rec->start);
9108 btrfs_release_path(&path);
9112 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9113 struct extent_record *rec)
9115 struct btrfs_trans_handle *trans;
9116 struct btrfs_root *root = fs_info->extent_root;
9117 struct btrfs_path path;
9118 struct btrfs_extent_item *ei;
9119 struct btrfs_key key;
9123 key.objectid = rec->start;
9124 if (rec->metadata) {
9125 key.type = BTRFS_METADATA_ITEM_KEY;
9126 key.offset = rec->info_level;
9128 key.type = BTRFS_EXTENT_ITEM_KEY;
9129 key.offset = rec->max_size;
9132 trans = btrfs_start_transaction(root, 0);
9134 return PTR_ERR(trans);
9136 btrfs_init_path(&path);
9137 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9139 btrfs_release_path(&path);
9140 btrfs_commit_transaction(trans, root);
9143 fprintf(stderr, "Didn't find extent for %llu\n",
9144 (unsigned long long)rec->start);
9145 btrfs_release_path(&path);
9146 btrfs_commit_transaction(trans, root);
9150 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9151 struct btrfs_extent_item);
9152 flags = btrfs_extent_flags(path.nodes[0], ei);
9153 if (rec->flag_block_full_backref) {
9154 fprintf(stderr, "setting full backref on %llu\n",
9155 (unsigned long long)key.objectid);
9156 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9158 fprintf(stderr, "clearing full backref on %llu\n",
9159 (unsigned long long)key.objectid);
9160 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9162 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9163 btrfs_mark_buffer_dirty(path.nodes[0]);
9164 btrfs_release_path(&path);
9165 ret = btrfs_commit_transaction(trans, root);
9167 fprintf(stderr, "Repaired extent flags for %llu\n",
9168 (unsigned long long)rec->start);
9173 /* right now we only prune from the extent allocation tree */
9174 static int prune_one_block(struct btrfs_trans_handle *trans,
9175 struct btrfs_fs_info *info,
9176 struct btrfs_corrupt_block *corrupt)
9179 struct btrfs_path path;
9180 struct extent_buffer *eb;
9184 int level = corrupt->level + 1;
9186 btrfs_init_path(&path);
9188 /* we want to stop at the parent to our busted block */
9189 path.lowest_level = level;
9191 ret = btrfs_search_slot(trans, info->extent_root,
9192 &corrupt->key, &path, -1, 1);
9197 eb = path.nodes[level];
9204 * hopefully the search gave us the block we want to prune,
9205 * lets try that first
9207 slot = path.slots[level];
9208 found = btrfs_node_blockptr(eb, slot);
9209 if (found == corrupt->cache.start)
9212 nritems = btrfs_header_nritems(eb);
9214 /* the search failed, lets scan this node and hope we find it */
9215 for (slot = 0; slot < nritems; slot++) {
9216 found = btrfs_node_blockptr(eb, slot);
9217 if (found == corrupt->cache.start)
9221 * we couldn't find the bad block. TODO, search all the nodes for pointers
9224 if (eb == info->extent_root->node) {
9229 btrfs_release_path(&path);
9234 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9235 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9238 btrfs_release_path(&path);
9242 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9244 struct btrfs_trans_handle *trans = NULL;
9245 struct cache_extent *cache;
9246 struct btrfs_corrupt_block *corrupt;
9249 cache = search_cache_extent(info->corrupt_blocks, 0);
9253 trans = btrfs_start_transaction(info->extent_root, 1);
9255 return PTR_ERR(trans);
9257 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9258 prune_one_block(trans, info, corrupt);
9259 remove_cache_extent(info->corrupt_blocks, cache);
9262 return btrfs_commit_transaction(trans, info->extent_root);
9266 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9268 struct btrfs_block_group_cache *cache;
9273 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9274 &start, &end, EXTENT_DIRTY);
9277 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9282 cache = btrfs_lookup_first_block_group(fs_info, start);
9287 start = cache->key.objectid + cache->key.offset;
9291 static int check_extent_refs(struct btrfs_root *root,
9292 struct cache_tree *extent_cache)
9294 struct extent_record *rec;
9295 struct cache_extent *cache;
9301 * if we're doing a repair, we have to make sure
9302 * we don't allocate from the problem extents.
9303 * In the worst case, this will be all the
9306 cache = search_cache_extent(extent_cache, 0);
9308 rec = container_of(cache, struct extent_record, cache);
9309 set_extent_dirty(root->fs_info->excluded_extents,
9311 rec->start + rec->max_size - 1);
9312 cache = next_cache_extent(cache);
9315 /* pin down all the corrupted blocks too */
9316 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9318 set_extent_dirty(root->fs_info->excluded_extents,
9320 cache->start + cache->size - 1);
9321 cache = next_cache_extent(cache);
9323 prune_corrupt_blocks(root->fs_info);
9324 reset_cached_block_groups(root->fs_info);
9327 reset_cached_block_groups(root->fs_info);
9330 * We need to delete any duplicate entries we find first otherwise we
9331 * could mess up the extent tree when we have backrefs that actually
9332 * belong to a different extent item and not the weird duplicate one.
9334 while (repair && !list_empty(&duplicate_extents)) {
9335 rec = to_extent_record(duplicate_extents.next);
9336 list_del_init(&rec->list);
9338 /* Sometimes we can find a backref before we find an actual
9339 * extent, so we need to process it a little bit to see if there
9340 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9341 * if this is a backref screwup. If we need to delete stuff
9342 * process_duplicates() will return 0, otherwise it will return
9345 if (process_duplicates(extent_cache, rec))
9347 ret = delete_duplicate_records(root, rec);
9351 * delete_duplicate_records will return the number of entries
9352 * deleted, so if it's greater than 0 then we know we actually
9353 * did something and we need to remove.
9366 cache = search_cache_extent(extent_cache, 0);
9369 rec = container_of(cache, struct extent_record, cache);
9370 if (rec->num_duplicates) {
9371 fprintf(stderr, "extent item %llu has multiple extent "
9372 "items\n", (unsigned long long)rec->start);
9376 if (rec->refs != rec->extent_item_refs) {
9377 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9378 (unsigned long long)rec->start,
9379 (unsigned long long)rec->nr);
9380 fprintf(stderr, "extent item %llu, found %llu\n",
9381 (unsigned long long)rec->extent_item_refs,
9382 (unsigned long long)rec->refs);
9383 ret = record_orphan_data_extents(root->fs_info, rec);
9389 if (all_backpointers_checked(rec, 1)) {
9390 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9391 (unsigned long long)rec->start,
9392 (unsigned long long)rec->nr);
9396 if (!rec->owner_ref_checked) {
9397 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9398 (unsigned long long)rec->start,
9399 (unsigned long long)rec->nr);
9404 if (repair && fix) {
9405 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9411 if (rec->bad_full_backref) {
9412 fprintf(stderr, "bad full backref, on [%llu]\n",
9413 (unsigned long long)rec->start);
9415 ret = fixup_extent_flags(root->fs_info, rec);
9423 * Although it's not a extent ref's problem, we reuse this
9424 * routine for error reporting.
9425 * No repair function yet.
9427 if (rec->crossing_stripes) {
9429 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9430 rec->start, rec->start + rec->max_size);
9434 if (rec->wrong_chunk_type) {
9436 "bad extent [%llu, %llu), type mismatch with chunk\n",
9437 rec->start, rec->start + rec->max_size);
9441 remove_cache_extent(extent_cache, cache);
9442 free_all_extent_backrefs(rec);
9443 if (!init_extent_tree && repair && (!cur_err || fix))
9444 clear_extent_dirty(root->fs_info->excluded_extents,
9446 rec->start + rec->max_size - 1);
9451 if (ret && ret != -EAGAIN) {
9452 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9455 struct btrfs_trans_handle *trans;
9457 root = root->fs_info->extent_root;
9458 trans = btrfs_start_transaction(root, 1);
9459 if (IS_ERR(trans)) {
9460 ret = PTR_ERR(trans);
9464 ret = btrfs_fix_block_accounting(trans, root);
9467 ret = btrfs_commit_transaction(trans, root);
9476 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9480 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9481 stripe_size = length;
9482 stripe_size /= num_stripes;
9483 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9484 stripe_size = length * 2;
9485 stripe_size /= num_stripes;
9486 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9487 stripe_size = length;
9488 stripe_size /= (num_stripes - 1);
9489 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9490 stripe_size = length;
9491 stripe_size /= (num_stripes - 2);
9493 stripe_size = length;
9499 * Check the chunk with its block group/dev list ref:
9500 * Return 0 if all refs seems valid.
9501 * Return 1 if part of refs seems valid, need later check for rebuild ref
9502 * like missing block group and needs to search extent tree to rebuild them.
9503 * Return -1 if essential refs are missing and unable to rebuild.
9505 static int check_chunk_refs(struct chunk_record *chunk_rec,
9506 struct block_group_tree *block_group_cache,
9507 struct device_extent_tree *dev_extent_cache,
9510 struct cache_extent *block_group_item;
9511 struct block_group_record *block_group_rec;
9512 struct cache_extent *dev_extent_item;
9513 struct device_extent_record *dev_extent_rec;
9517 int metadump_v2 = 0;
9521 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9524 if (block_group_item) {
9525 block_group_rec = container_of(block_group_item,
9526 struct block_group_record,
9528 if (chunk_rec->length != block_group_rec->offset ||
9529 chunk_rec->offset != block_group_rec->objectid ||
9531 chunk_rec->type_flags != block_group_rec->flags)) {
9534 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9535 chunk_rec->objectid,
9540 chunk_rec->type_flags,
9541 block_group_rec->objectid,
9542 block_group_rec->type,
9543 block_group_rec->offset,
9544 block_group_rec->offset,
9545 block_group_rec->objectid,
9546 block_group_rec->flags);
9549 list_del_init(&block_group_rec->list);
9550 chunk_rec->bg_rec = block_group_rec;
9555 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9556 chunk_rec->objectid,
9561 chunk_rec->type_flags);
9568 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9569 chunk_rec->num_stripes);
9570 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9571 devid = chunk_rec->stripes[i].devid;
9572 offset = chunk_rec->stripes[i].offset;
9573 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9574 devid, offset, length);
9575 if (dev_extent_item) {
9576 dev_extent_rec = container_of(dev_extent_item,
9577 struct device_extent_record,
9579 if (dev_extent_rec->objectid != devid ||
9580 dev_extent_rec->offset != offset ||
9581 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9582 dev_extent_rec->length != length) {
9585 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9586 chunk_rec->objectid,
9589 chunk_rec->stripes[i].devid,
9590 chunk_rec->stripes[i].offset,
9591 dev_extent_rec->objectid,
9592 dev_extent_rec->offset,
9593 dev_extent_rec->length);
9596 list_move(&dev_extent_rec->chunk_list,
9597 &chunk_rec->dextents);
9602 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9603 chunk_rec->objectid,
9606 chunk_rec->stripes[i].devid,
9607 chunk_rec->stripes[i].offset);
9614 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9615 int check_chunks(struct cache_tree *chunk_cache,
9616 struct block_group_tree *block_group_cache,
9617 struct device_extent_tree *dev_extent_cache,
9618 struct list_head *good, struct list_head *bad,
9619 struct list_head *rebuild, int silent)
9621 struct cache_extent *chunk_item;
9622 struct chunk_record *chunk_rec;
9623 struct block_group_record *bg_rec;
9624 struct device_extent_record *dext_rec;
9628 chunk_item = first_cache_extent(chunk_cache);
9629 while (chunk_item) {
9630 chunk_rec = container_of(chunk_item, struct chunk_record,
9632 err = check_chunk_refs(chunk_rec, block_group_cache,
9633 dev_extent_cache, silent);
9636 if (err == 0 && good)
9637 list_add_tail(&chunk_rec->list, good);
9638 if (err > 0 && rebuild)
9639 list_add_tail(&chunk_rec->list, rebuild);
9641 list_add_tail(&chunk_rec->list, bad);
9642 chunk_item = next_cache_extent(chunk_item);
9645 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9648 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9656 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9660 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9671 static int check_device_used(struct device_record *dev_rec,
9672 struct device_extent_tree *dext_cache)
9674 struct cache_extent *cache;
9675 struct device_extent_record *dev_extent_rec;
9678 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9680 dev_extent_rec = container_of(cache,
9681 struct device_extent_record,
9683 if (dev_extent_rec->objectid != dev_rec->devid)
9686 list_del_init(&dev_extent_rec->device_list);
9687 total_byte += dev_extent_rec->length;
9688 cache = next_cache_extent(cache);
9691 if (total_byte != dev_rec->byte_used) {
9693 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9694 total_byte, dev_rec->byte_used, dev_rec->objectid,
9695 dev_rec->type, dev_rec->offset);
9702 /* check btrfs_dev_item -> btrfs_dev_extent */
9703 static int check_devices(struct rb_root *dev_cache,
9704 struct device_extent_tree *dev_extent_cache)
9706 struct rb_node *dev_node;
9707 struct device_record *dev_rec;
9708 struct device_extent_record *dext_rec;
9712 dev_node = rb_first(dev_cache);
9714 dev_rec = container_of(dev_node, struct device_record, node);
9715 err = check_device_used(dev_rec, dev_extent_cache);
9719 dev_node = rb_next(dev_node);
9721 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9724 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9725 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9732 static int add_root_item_to_list(struct list_head *head,
9733 u64 objectid, u64 bytenr, u64 last_snapshot,
9734 u8 level, u8 drop_level,
9735 struct btrfs_key *drop_key)
9738 struct root_item_record *ri_rec;
9739 ri_rec = malloc(sizeof(*ri_rec));
9742 ri_rec->bytenr = bytenr;
9743 ri_rec->objectid = objectid;
9744 ri_rec->level = level;
9745 ri_rec->drop_level = drop_level;
9746 ri_rec->last_snapshot = last_snapshot;
9748 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9749 list_add_tail(&ri_rec->list, head);
9754 static void free_root_item_list(struct list_head *list)
9756 struct root_item_record *ri_rec;
9758 while (!list_empty(list)) {
9759 ri_rec = list_first_entry(list, struct root_item_record,
9761 list_del_init(&ri_rec->list);
9766 static int deal_root_from_list(struct list_head *list,
9767 struct btrfs_root *root,
9768 struct block_info *bits,
9770 struct cache_tree *pending,
9771 struct cache_tree *seen,
9772 struct cache_tree *reada,
9773 struct cache_tree *nodes,
9774 struct cache_tree *extent_cache,
9775 struct cache_tree *chunk_cache,
9776 struct rb_root *dev_cache,
9777 struct block_group_tree *block_group_cache,
9778 struct device_extent_tree *dev_extent_cache)
9783 while (!list_empty(list)) {
9784 struct root_item_record *rec;
9785 struct extent_buffer *buf;
9786 rec = list_entry(list->next,
9787 struct root_item_record, list);
9789 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9790 if (!extent_buffer_uptodate(buf)) {
9791 free_extent_buffer(buf);
9795 ret = add_root_to_pending(buf, extent_cache, pending,
9796 seen, nodes, rec->objectid);
9800 * To rebuild extent tree, we need deal with snapshot
9801 * one by one, otherwise we deal with node firstly which
9802 * can maximize readahead.
9805 ret = run_next_block(root, bits, bits_nr, &last,
9806 pending, seen, reada, nodes,
9807 extent_cache, chunk_cache,
9808 dev_cache, block_group_cache,
9809 dev_extent_cache, rec);
9813 free_extent_buffer(buf);
9814 list_del(&rec->list);
9820 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9821 reada, nodes, extent_cache, chunk_cache,
9822 dev_cache, block_group_cache,
9823 dev_extent_cache, NULL);
9833 static int check_chunks_and_extents(struct btrfs_root *root)
9835 struct rb_root dev_cache;
9836 struct cache_tree chunk_cache;
9837 struct block_group_tree block_group_cache;
9838 struct device_extent_tree dev_extent_cache;
9839 struct cache_tree extent_cache;
9840 struct cache_tree seen;
9841 struct cache_tree pending;
9842 struct cache_tree reada;
9843 struct cache_tree nodes;
9844 struct extent_io_tree excluded_extents;
9845 struct cache_tree corrupt_blocks;
9846 struct btrfs_path path;
9847 struct btrfs_key key;
9848 struct btrfs_key found_key;
9850 struct block_info *bits;
9852 struct extent_buffer *leaf;
9854 struct btrfs_root_item ri;
9855 struct list_head dropping_trees;
9856 struct list_head normal_trees;
9857 struct btrfs_root *root1;
9861 dev_cache = RB_ROOT;
9862 cache_tree_init(&chunk_cache);
9863 block_group_tree_init(&block_group_cache);
9864 device_extent_tree_init(&dev_extent_cache);
9866 cache_tree_init(&extent_cache);
9867 cache_tree_init(&seen);
9868 cache_tree_init(&pending);
9869 cache_tree_init(&nodes);
9870 cache_tree_init(&reada);
9871 cache_tree_init(&corrupt_blocks);
9872 extent_io_tree_init(&excluded_extents);
9873 INIT_LIST_HEAD(&dropping_trees);
9874 INIT_LIST_HEAD(&normal_trees);
9877 root->fs_info->excluded_extents = &excluded_extents;
9878 root->fs_info->fsck_extent_cache = &extent_cache;
9879 root->fs_info->free_extent_hook = free_extent_hook;
9880 root->fs_info->corrupt_blocks = &corrupt_blocks;
9884 bits = malloc(bits_nr * sizeof(struct block_info));
9890 if (ctx.progress_enabled) {
9891 ctx.tp = TASK_EXTENTS;
9892 task_start(ctx.info);
9896 root1 = root->fs_info->tree_root;
9897 level = btrfs_header_level(root1->node);
9898 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9899 root1->node->start, 0, level, 0, NULL);
9902 root1 = root->fs_info->chunk_root;
9903 level = btrfs_header_level(root1->node);
9904 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9905 root1->node->start, 0, level, 0, NULL);
9908 btrfs_init_path(&path);
9911 key.type = BTRFS_ROOT_ITEM_KEY;
9912 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9917 leaf = path.nodes[0];
9918 slot = path.slots[0];
9919 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9920 ret = btrfs_next_leaf(root, &path);
9923 leaf = path.nodes[0];
9924 slot = path.slots[0];
9926 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9927 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9928 unsigned long offset;
9931 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9932 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9933 last_snapshot = btrfs_root_last_snapshot(&ri);
9934 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9935 level = btrfs_root_level(&ri);
9936 ret = add_root_item_to_list(&normal_trees,
9938 btrfs_root_bytenr(&ri),
9939 last_snapshot, level,
9944 level = btrfs_root_level(&ri);
9945 objectid = found_key.objectid;
9946 btrfs_disk_key_to_cpu(&found_key,
9948 ret = add_root_item_to_list(&dropping_trees,
9950 btrfs_root_bytenr(&ri),
9951 last_snapshot, level,
9952 ri.drop_level, &found_key);
9959 btrfs_release_path(&path);
9962 * check_block can return -EAGAIN if it fixes something, please keep
9963 * this in mind when dealing with return values from these functions, if
9964 * we get -EAGAIN we want to fall through and restart the loop.
9966 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9967 &seen, &reada, &nodes, &extent_cache,
9968 &chunk_cache, &dev_cache, &block_group_cache,
9975 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9976 &pending, &seen, &reada, &nodes,
9977 &extent_cache, &chunk_cache, &dev_cache,
9978 &block_group_cache, &dev_extent_cache);
9985 ret = check_chunks(&chunk_cache, &block_group_cache,
9986 &dev_extent_cache, NULL, NULL, NULL, 0);
9993 ret = check_extent_refs(root, &extent_cache);
10000 ret = check_devices(&dev_cache, &dev_extent_cache);
10005 task_stop(ctx.info);
10007 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10008 extent_io_tree_cleanup(&excluded_extents);
10009 root->fs_info->fsck_extent_cache = NULL;
10010 root->fs_info->free_extent_hook = NULL;
10011 root->fs_info->corrupt_blocks = NULL;
10012 root->fs_info->excluded_extents = NULL;
10015 free_chunk_cache_tree(&chunk_cache);
10016 free_device_cache_tree(&dev_cache);
10017 free_block_group_tree(&block_group_cache);
10018 free_device_extent_tree(&dev_extent_cache);
10019 free_extent_cache_tree(&seen);
10020 free_extent_cache_tree(&pending);
10021 free_extent_cache_tree(&reada);
10022 free_extent_cache_tree(&nodes);
10023 free_root_item_list(&normal_trees);
10024 free_root_item_list(&dropping_trees);
10027 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10028 free_extent_cache_tree(&seen);
10029 free_extent_cache_tree(&pending);
10030 free_extent_cache_tree(&reada);
10031 free_extent_cache_tree(&nodes);
10032 free_chunk_cache_tree(&chunk_cache);
10033 free_block_group_tree(&block_group_cache);
10034 free_device_cache_tree(&dev_cache);
10035 free_device_extent_tree(&dev_extent_cache);
10036 free_extent_record_cache(&extent_cache);
10037 free_root_item_list(&normal_trees);
10038 free_root_item_list(&dropping_trees);
10039 extent_io_tree_cleanup(&excluded_extents);
10044 * Check backrefs of a tree block given by @bytenr or @eb.
10046 * @root: the root containing the @bytenr or @eb
10047 * @eb: tree block extent buffer, can be NULL
10048 * @bytenr: bytenr of the tree block to search
10049 * @level: tree level of the tree block
10050 * @owner: owner of the tree block
10052 * Return >0 for any error found and output error message
10053 * Return 0 for no error found
10055 static int check_tree_block_ref(struct btrfs_root *root,
10056 struct extent_buffer *eb, u64 bytenr,
10057 int level, u64 owner)
10059 struct btrfs_key key;
10060 struct btrfs_root *extent_root = root->fs_info->extent_root;
10061 struct btrfs_path path;
10062 struct btrfs_extent_item *ei;
10063 struct btrfs_extent_inline_ref *iref;
10064 struct extent_buffer *leaf;
10070 u32 nodesize = root->fs_info->nodesize;
10073 int tree_reloc_root = 0;
10078 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10079 btrfs_header_bytenr(root->node) == bytenr)
10080 tree_reloc_root = 1;
10082 btrfs_init_path(&path);
10083 key.objectid = bytenr;
10084 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10085 key.type = BTRFS_METADATA_ITEM_KEY;
10087 key.type = BTRFS_EXTENT_ITEM_KEY;
10088 key.offset = (u64)-1;
10090 /* Search for the backref in extent tree */
10091 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10093 err |= BACKREF_MISSING;
10096 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10098 err |= BACKREF_MISSING;
10102 leaf = path.nodes[0];
10103 slot = path.slots[0];
10104 btrfs_item_key_to_cpu(leaf, &key, slot);
10106 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10108 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10109 skinny_level = (int)key.offset;
10110 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10112 struct btrfs_tree_block_info *info;
10114 info = (struct btrfs_tree_block_info *)(ei + 1);
10115 skinny_level = btrfs_tree_block_level(leaf, info);
10116 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10123 if (!(btrfs_extent_flags(leaf, ei) &
10124 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10126 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10127 key.objectid, nodesize,
10128 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10129 err = BACKREF_MISMATCH;
10131 header_gen = btrfs_header_generation(eb);
10132 extent_gen = btrfs_extent_generation(leaf, ei);
10133 if (header_gen != extent_gen) {
10135 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10136 key.objectid, nodesize, header_gen,
10138 err = BACKREF_MISMATCH;
10140 if (level != skinny_level) {
10142 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10143 key.objectid, nodesize, level, skinny_level);
10144 err = BACKREF_MISMATCH;
10146 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10148 "extent[%llu %u] is referred by other roots than %llu",
10149 key.objectid, nodesize, root->objectid);
10150 err = BACKREF_MISMATCH;
10155 * Iterate the extent/metadata item to find the exact backref
10157 item_size = btrfs_item_size_nr(leaf, slot);
10158 ptr = (unsigned long)iref;
10159 end = (unsigned long)ei + item_size;
10160 while (ptr < end) {
10161 iref = (struct btrfs_extent_inline_ref *)ptr;
10162 type = btrfs_extent_inline_ref_type(leaf, iref);
10163 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10165 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10166 (offset == root->objectid || offset == owner)) {
10168 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10170 * Backref of tree reloc root points to itself, no need
10171 * to check backref any more.
10173 if (tree_reloc_root)
10176 /* Check if the backref points to valid referencer */
10177 found_ref = !check_tree_block_ref(root, NULL,
10178 offset, level + 1, owner);
10183 ptr += btrfs_extent_inline_ref_size(type);
10187 * Inlined extent item doesn't have what we need, check
10188 * TREE_BLOCK_REF_KEY
10191 btrfs_release_path(&path);
10192 key.objectid = bytenr;
10193 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10194 key.offset = root->objectid;
10196 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10201 err |= BACKREF_MISSING;
10203 btrfs_release_path(&path);
10204 if (eb && (err & BACKREF_MISSING))
10205 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10206 bytenr, nodesize, owner, level);
10211 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10213 * Return >0 any error found and output error message
10214 * Return 0 for no error found
10216 static int check_extent_data_item(struct btrfs_root *root,
10217 struct extent_buffer *eb, int slot)
10219 struct btrfs_file_extent_item *fi;
10220 struct btrfs_path path;
10221 struct btrfs_root *extent_root = root->fs_info->extent_root;
10222 struct btrfs_key fi_key;
10223 struct btrfs_key dbref_key;
10224 struct extent_buffer *leaf;
10225 struct btrfs_extent_item *ei;
10226 struct btrfs_extent_inline_ref *iref;
10227 struct btrfs_extent_data_ref *dref;
10230 u64 disk_num_bytes;
10231 u64 extent_num_bytes;
10238 int found_dbackref = 0;
10242 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10243 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10245 /* Nothing to check for hole and inline data extents */
10246 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10247 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10250 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10251 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10252 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10254 /* Check unaligned disk_num_bytes and num_bytes */
10255 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10257 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10258 fi_key.objectid, fi_key.offset, disk_num_bytes,
10259 root->fs_info->sectorsize);
10260 err |= BYTES_UNALIGNED;
10262 data_bytes_allocated += disk_num_bytes;
10264 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10266 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10267 fi_key.objectid, fi_key.offset, extent_num_bytes,
10268 root->fs_info->sectorsize);
10269 err |= BYTES_UNALIGNED;
10271 data_bytes_referenced += extent_num_bytes;
10273 owner = btrfs_header_owner(eb);
10275 /* Check the extent item of the file extent in extent tree */
10276 btrfs_init_path(&path);
10277 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10278 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10279 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10281 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10285 leaf = path.nodes[0];
10286 slot = path.slots[0];
10287 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10289 extent_flags = btrfs_extent_flags(leaf, ei);
10291 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10293 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10294 disk_bytenr, disk_num_bytes,
10295 BTRFS_EXTENT_FLAG_DATA);
10296 err |= BACKREF_MISMATCH;
10299 /* Check data backref inside that extent item */
10300 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10301 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10302 ptr = (unsigned long)iref;
10303 end = (unsigned long)ei + item_size;
10304 while (ptr < end) {
10305 iref = (struct btrfs_extent_inline_ref *)ptr;
10306 type = btrfs_extent_inline_ref_type(leaf, iref);
10307 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10309 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10310 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10311 if (ref_root == owner || ref_root == root->objectid)
10312 found_dbackref = 1;
10313 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10314 found_dbackref = !check_tree_block_ref(root, NULL,
10315 btrfs_extent_inline_ref_offset(leaf, iref),
10319 if (found_dbackref)
10321 ptr += btrfs_extent_inline_ref_size(type);
10324 if (!found_dbackref) {
10325 btrfs_release_path(&path);
10327 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10328 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10329 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10330 dbref_key.offset = hash_extent_data_ref(root->objectid,
10331 fi_key.objectid, fi_key.offset);
10333 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10334 &dbref_key, &path, 0, 0);
10336 found_dbackref = 1;
10340 btrfs_release_path(&path);
10343 * Neither inlined nor EXTENT_DATA_REF found, try
10344 * SHARED_DATA_REF as last chance.
10346 dbref_key.objectid = disk_bytenr;
10347 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10348 dbref_key.offset = eb->start;
10350 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10351 &dbref_key, &path, 0, 0);
10353 found_dbackref = 1;
10359 if (!found_dbackref)
10360 err |= BACKREF_MISSING;
10361 btrfs_release_path(&path);
10362 if (err & BACKREF_MISSING) {
10363 error("data extent[%llu %llu] backref lost",
10364 disk_bytenr, disk_num_bytes);
10370 * Get real tree block level for the case like shared block
10371 * Return >= 0 as tree level
10372 * Return <0 for error
10374 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10376 struct extent_buffer *eb;
10377 struct btrfs_path path;
10378 struct btrfs_key key;
10379 struct btrfs_extent_item *ei;
10386 /* Search extent tree for extent generation and level */
10387 key.objectid = bytenr;
10388 key.type = BTRFS_METADATA_ITEM_KEY;
10389 key.offset = (u64)-1;
10391 btrfs_init_path(&path);
10392 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10395 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10403 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10404 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10405 struct btrfs_extent_item);
10406 flags = btrfs_extent_flags(path.nodes[0], ei);
10407 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10412 /* Get transid for later read_tree_block() check */
10413 transid = btrfs_extent_generation(path.nodes[0], ei);
10415 /* Get backref level as one source */
10416 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10417 backref_level = key.offset;
10419 struct btrfs_tree_block_info *info;
10421 info = (struct btrfs_tree_block_info *)(ei + 1);
10422 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10424 btrfs_release_path(&path);
10426 /* Get level from tree block as an alternative source */
10427 eb = read_tree_block(fs_info, bytenr, transid);
10428 if (!extent_buffer_uptodate(eb)) {
10429 free_extent_buffer(eb);
10432 header_level = btrfs_header_level(eb);
10433 free_extent_buffer(eb);
10435 if (header_level != backref_level)
10437 return header_level;
10440 btrfs_release_path(&path);
10445 * Check if a tree block backref is valid (points to a valid tree block)
10446 * if level == -1, level will be resolved
10447 * Return >0 for any error found and print error message
10449 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10450 u64 bytenr, int level)
10452 struct btrfs_root *root;
10453 struct btrfs_key key;
10454 struct btrfs_path path;
10455 struct extent_buffer *eb;
10456 struct extent_buffer *node;
10457 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10461 /* Query level for level == -1 special case */
10463 level = query_tree_block_level(fs_info, bytenr);
10465 err |= REFERENCER_MISSING;
10469 key.objectid = root_id;
10470 key.type = BTRFS_ROOT_ITEM_KEY;
10471 key.offset = (u64)-1;
10473 root = btrfs_read_fs_root(fs_info, &key);
10474 if (IS_ERR(root)) {
10475 err |= REFERENCER_MISSING;
10479 /* Read out the tree block to get item/node key */
10480 eb = read_tree_block(fs_info, bytenr, 0);
10481 if (!extent_buffer_uptodate(eb)) {
10482 err |= REFERENCER_MISSING;
10483 free_extent_buffer(eb);
10487 /* Empty tree, no need to check key */
10488 if (!btrfs_header_nritems(eb) && !level) {
10489 free_extent_buffer(eb);
10494 btrfs_node_key_to_cpu(eb, &key, 0);
10496 btrfs_item_key_to_cpu(eb, &key, 0);
10498 free_extent_buffer(eb);
10500 btrfs_init_path(&path);
10501 path.lowest_level = level;
10502 /* Search with the first key, to ensure we can reach it */
10503 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10505 err |= REFERENCER_MISSING;
10509 node = path.nodes[level];
10510 if (btrfs_header_bytenr(node) != bytenr) {
10512 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10513 bytenr, nodesize, bytenr,
10514 btrfs_header_bytenr(node));
10515 err |= REFERENCER_MISMATCH;
10517 if (btrfs_header_level(node) != level) {
10519 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10520 bytenr, nodesize, level,
10521 btrfs_header_level(node));
10522 err |= REFERENCER_MISMATCH;
10526 btrfs_release_path(&path);
10528 if (err & REFERENCER_MISSING) {
10530 error("extent [%llu %d] lost referencer (owner: %llu)",
10531 bytenr, nodesize, root_id);
10534 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10535 bytenr, nodesize, root_id, level);
10542 * Check if tree block @eb is tree reloc root.
10543 * Return 0 if it's not or any problem happens
10544 * Return 1 if it's a tree reloc root
10546 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10547 struct extent_buffer *eb)
10549 struct btrfs_root *tree_reloc_root;
10550 struct btrfs_key key;
10551 u64 bytenr = btrfs_header_bytenr(eb);
10552 u64 owner = btrfs_header_owner(eb);
10555 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10556 key.offset = owner;
10557 key.type = BTRFS_ROOT_ITEM_KEY;
10559 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10560 if (IS_ERR(tree_reloc_root))
10563 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10565 btrfs_free_fs_root(tree_reloc_root);
10570 * Check referencer for shared block backref
10571 * If level == -1, this function will resolve the level.
10573 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10574 u64 parent, u64 bytenr, int level)
10576 struct extent_buffer *eb;
10578 int found_parent = 0;
10581 eb = read_tree_block(fs_info, parent, 0);
10582 if (!extent_buffer_uptodate(eb))
10586 level = query_tree_block_level(fs_info, bytenr);
10590 /* It's possible it's a tree reloc root */
10591 if (parent == bytenr) {
10592 if (is_tree_reloc_root(fs_info, eb))
10597 if (level + 1 != btrfs_header_level(eb))
10600 nr = btrfs_header_nritems(eb);
10601 for (i = 0; i < nr; i++) {
10602 if (bytenr == btrfs_node_blockptr(eb, i)) {
10608 free_extent_buffer(eb);
10609 if (!found_parent) {
10611 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10612 bytenr, fs_info->nodesize, parent, level);
10613 return REFERENCER_MISSING;
10619 * Check referencer for normal (inlined) data ref
10620 * If len == 0, it will be resolved by searching in extent tree
10622 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10623 u64 root_id, u64 objectid, u64 offset,
10624 u64 bytenr, u64 len, u32 count)
10626 struct btrfs_root *root;
10627 struct btrfs_root *extent_root = fs_info->extent_root;
10628 struct btrfs_key key;
10629 struct btrfs_path path;
10630 struct extent_buffer *leaf;
10631 struct btrfs_file_extent_item *fi;
10632 u32 found_count = 0;
10637 key.objectid = bytenr;
10638 key.type = BTRFS_EXTENT_ITEM_KEY;
10639 key.offset = (u64)-1;
10641 btrfs_init_path(&path);
10642 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10645 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10648 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10649 if (key.objectid != bytenr ||
10650 key.type != BTRFS_EXTENT_ITEM_KEY)
10653 btrfs_release_path(&path);
10655 key.objectid = root_id;
10656 key.type = BTRFS_ROOT_ITEM_KEY;
10657 key.offset = (u64)-1;
10658 btrfs_init_path(&path);
10660 root = btrfs_read_fs_root(fs_info, &key);
10664 key.objectid = objectid;
10665 key.type = BTRFS_EXTENT_DATA_KEY;
10667 * It can be nasty as data backref offset is
10668 * file offset - file extent offset, which is smaller or
10669 * equal to original backref offset. The only special case is
10670 * overflow. So we need to special check and do further search.
10672 key.offset = offset & (1ULL << 63) ? 0 : offset;
10674 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10679 * Search afterwards to get correct one
10680 * NOTE: As we must do a comprehensive check on the data backref to
10681 * make sure the dref count also matches, we must iterate all file
10682 * extents for that inode.
10685 leaf = path.nodes[0];
10686 slot = path.slots[0];
10688 if (slot >= btrfs_header_nritems(leaf))
10690 btrfs_item_key_to_cpu(leaf, &key, slot);
10691 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10693 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10695 * Except normal disk bytenr and disk num bytes, we still
10696 * need to do extra check on dbackref offset as
10697 * dbackref offset = file_offset - file_extent_offset
10699 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10700 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10701 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10706 ret = btrfs_next_item(root, &path);
10711 btrfs_release_path(&path);
10712 if (found_count != count) {
10714 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10715 bytenr, len, root_id, objectid, offset, count, found_count);
10716 return REFERENCER_MISSING;
10722 * Check if the referencer of a shared data backref exists
10724 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10725 u64 parent, u64 bytenr)
10727 struct extent_buffer *eb;
10728 struct btrfs_key key;
10729 struct btrfs_file_extent_item *fi;
10731 int found_parent = 0;
10734 eb = read_tree_block(fs_info, parent, 0);
10735 if (!extent_buffer_uptodate(eb))
10738 nr = btrfs_header_nritems(eb);
10739 for (i = 0; i < nr; i++) {
10740 btrfs_item_key_to_cpu(eb, &key, i);
10741 if (key.type != BTRFS_EXTENT_DATA_KEY)
10744 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10745 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10748 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10755 free_extent_buffer(eb);
10756 if (!found_parent) {
10757 error("shared extent %llu referencer lost (parent: %llu)",
10759 return REFERENCER_MISSING;
10765 * This function will check a given extent item, including its backref and
10766 * itself (like crossing stripe boundary and type)
10768 * Since we don't use extent_record anymore, introduce new error bit
10770 static int check_extent_item(struct btrfs_fs_info *fs_info,
10771 struct extent_buffer *eb, int slot)
10773 struct btrfs_extent_item *ei;
10774 struct btrfs_extent_inline_ref *iref;
10775 struct btrfs_extent_data_ref *dref;
10779 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10780 u32 item_size = btrfs_item_size_nr(eb, slot);
10785 struct btrfs_key key;
10789 btrfs_item_key_to_cpu(eb, &key, slot);
10790 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10791 bytes_used += key.offset;
10793 bytes_used += nodesize;
10795 if (item_size < sizeof(*ei)) {
10797 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10798 * old thing when on disk format is still un-determined.
10799 * No need to care about it anymore
10801 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10805 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10806 flags = btrfs_extent_flags(eb, ei);
10808 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10810 if (metadata && check_crossing_stripes(global_info, key.objectid,
10812 error("bad metadata [%llu, %llu) crossing stripe boundary",
10813 key.objectid, key.objectid + nodesize);
10814 err |= CROSSING_STRIPE_BOUNDARY;
10817 ptr = (unsigned long)(ei + 1);
10819 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10820 /* Old EXTENT_ITEM metadata */
10821 struct btrfs_tree_block_info *info;
10823 info = (struct btrfs_tree_block_info *)ptr;
10824 level = btrfs_tree_block_level(eb, info);
10825 ptr += sizeof(struct btrfs_tree_block_info);
10827 /* New METADATA_ITEM */
10828 level = key.offset;
10830 end = (unsigned long)ei + item_size;
10833 /* Reached extent item end normally */
10837 /* Beyond extent item end, wrong item size */
10839 err |= ITEM_SIZE_MISMATCH;
10840 error("extent item at bytenr %llu slot %d has wrong size",
10845 /* Now check every backref in this extent item */
10846 iref = (struct btrfs_extent_inline_ref *)ptr;
10847 type = btrfs_extent_inline_ref_type(eb, iref);
10848 offset = btrfs_extent_inline_ref_offset(eb, iref);
10850 case BTRFS_TREE_BLOCK_REF_KEY:
10851 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10855 case BTRFS_SHARED_BLOCK_REF_KEY:
10856 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10860 case BTRFS_EXTENT_DATA_REF_KEY:
10861 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10862 ret = check_extent_data_backref(fs_info,
10863 btrfs_extent_data_ref_root(eb, dref),
10864 btrfs_extent_data_ref_objectid(eb, dref),
10865 btrfs_extent_data_ref_offset(eb, dref),
10866 key.objectid, key.offset,
10867 btrfs_extent_data_ref_count(eb, dref));
10870 case BTRFS_SHARED_DATA_REF_KEY:
10871 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10875 error("extent[%llu %d %llu] has unknown ref type: %d",
10876 key.objectid, key.type, key.offset, type);
10877 err |= UNKNOWN_TYPE;
10881 ptr += btrfs_extent_inline_ref_size(type);
10889 * Check if a dev extent item is referred correctly by its chunk
10891 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10892 struct extent_buffer *eb, int slot)
10894 struct btrfs_root *chunk_root = fs_info->chunk_root;
10895 struct btrfs_dev_extent *ptr;
10896 struct btrfs_path path;
10897 struct btrfs_key chunk_key;
10898 struct btrfs_key devext_key;
10899 struct btrfs_chunk *chunk;
10900 struct extent_buffer *l;
10904 int found_chunk = 0;
10907 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10908 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10909 length = btrfs_dev_extent_length(eb, ptr);
10911 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10912 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10913 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10915 btrfs_init_path(&path);
10916 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10921 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10922 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10927 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10930 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10931 for (i = 0; i < num_stripes; i++) {
10932 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10933 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10935 if (devid == devext_key.objectid &&
10936 offset == devext_key.offset) {
10942 btrfs_release_path(&path);
10943 if (!found_chunk) {
10945 "device extent[%llu, %llu, %llu] did not find the related chunk",
10946 devext_key.objectid, devext_key.offset, length);
10947 return REFERENCER_MISSING;
10953 * Check if the used space is correct with the dev item
10955 static int check_dev_item(struct btrfs_fs_info *fs_info,
10956 struct extent_buffer *eb, int slot)
10958 struct btrfs_root *dev_root = fs_info->dev_root;
10959 struct btrfs_dev_item *dev_item;
10960 struct btrfs_path path;
10961 struct btrfs_key key;
10962 struct btrfs_dev_extent *ptr;
10968 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10969 dev_id = btrfs_device_id(eb, dev_item);
10970 used = btrfs_device_bytes_used(eb, dev_item);
10972 key.objectid = dev_id;
10973 key.type = BTRFS_DEV_EXTENT_KEY;
10976 btrfs_init_path(&path);
10977 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10979 btrfs_item_key_to_cpu(eb, &key, slot);
10980 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10981 key.objectid, key.type, key.offset);
10982 btrfs_release_path(&path);
10983 return REFERENCER_MISSING;
10986 /* Iterate dev_extents to calculate the used space of a device */
10988 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10991 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10992 if (key.objectid > dev_id)
10994 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10997 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10998 struct btrfs_dev_extent);
10999 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11001 ret = btrfs_next_item(dev_root, &path);
11005 btrfs_release_path(&path);
11007 if (used != total) {
11008 btrfs_item_key_to_cpu(eb, &key, slot);
11010 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11011 total, used, BTRFS_ROOT_TREE_OBJECTID,
11012 BTRFS_DEV_EXTENT_KEY, dev_id);
11013 return ACCOUNTING_MISMATCH;
11019 * Check a block group item with its referener (chunk) and its used space
11020 * with extent/metadata item
11022 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11023 struct extent_buffer *eb, int slot)
11025 struct btrfs_root *extent_root = fs_info->extent_root;
11026 struct btrfs_root *chunk_root = fs_info->chunk_root;
11027 struct btrfs_block_group_item *bi;
11028 struct btrfs_block_group_item bg_item;
11029 struct btrfs_path path;
11030 struct btrfs_key bg_key;
11031 struct btrfs_key chunk_key;
11032 struct btrfs_key extent_key;
11033 struct btrfs_chunk *chunk;
11034 struct extent_buffer *leaf;
11035 struct btrfs_extent_item *ei;
11036 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11044 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11045 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11046 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11047 used = btrfs_block_group_used(&bg_item);
11048 bg_flags = btrfs_block_group_flags(&bg_item);
11050 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11051 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11052 chunk_key.offset = bg_key.objectid;
11054 btrfs_init_path(&path);
11055 /* Search for the referencer chunk */
11056 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11059 "block group[%llu %llu] did not find the related chunk item",
11060 bg_key.objectid, bg_key.offset);
11061 err |= REFERENCER_MISSING;
11063 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11064 struct btrfs_chunk);
11065 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11068 "block group[%llu %llu] related chunk item length does not match",
11069 bg_key.objectid, bg_key.offset);
11070 err |= REFERENCER_MISMATCH;
11073 btrfs_release_path(&path);
11075 /* Search from the block group bytenr */
11076 extent_key.objectid = bg_key.objectid;
11077 extent_key.type = 0;
11078 extent_key.offset = 0;
11080 btrfs_init_path(&path);
11081 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11085 /* Iterate extent tree to account used space */
11087 leaf = path.nodes[0];
11089 /* Search slot can point to the last item beyond leaf nritems */
11090 if (path.slots[0] >= btrfs_header_nritems(leaf))
11093 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11094 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11097 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11098 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11100 if (extent_key.objectid < bg_key.objectid)
11103 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11106 total += extent_key.offset;
11108 ei = btrfs_item_ptr(leaf, path.slots[0],
11109 struct btrfs_extent_item);
11110 flags = btrfs_extent_flags(leaf, ei);
11111 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11112 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11114 "bad extent[%llu, %llu) type mismatch with chunk",
11115 extent_key.objectid,
11116 extent_key.objectid + extent_key.offset);
11117 err |= CHUNK_TYPE_MISMATCH;
11119 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11120 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11121 BTRFS_BLOCK_GROUP_METADATA))) {
11123 "bad extent[%llu, %llu) type mismatch with chunk",
11124 extent_key.objectid,
11125 extent_key.objectid + nodesize);
11126 err |= CHUNK_TYPE_MISMATCH;
11130 ret = btrfs_next_item(extent_root, &path);
11136 btrfs_release_path(&path);
11138 if (total != used) {
11140 "block group[%llu %llu] used %llu but extent items used %llu",
11141 bg_key.objectid, bg_key.offset, used, total);
11142 err |= ACCOUNTING_MISMATCH;
11148 * Check a chunk item.
11149 * Including checking all referred dev_extents and block group
11151 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11152 struct extent_buffer *eb, int slot)
11154 struct btrfs_root *extent_root = fs_info->extent_root;
11155 struct btrfs_root *dev_root = fs_info->dev_root;
11156 struct btrfs_path path;
11157 struct btrfs_key chunk_key;
11158 struct btrfs_key bg_key;
11159 struct btrfs_key devext_key;
11160 struct btrfs_chunk *chunk;
11161 struct extent_buffer *leaf;
11162 struct btrfs_block_group_item *bi;
11163 struct btrfs_block_group_item bg_item;
11164 struct btrfs_dev_extent *ptr;
11176 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11177 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11178 length = btrfs_chunk_length(eb, chunk);
11179 chunk_end = chunk_key.offset + length;
11180 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11183 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11185 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11188 type = btrfs_chunk_type(eb, chunk);
11190 bg_key.objectid = chunk_key.offset;
11191 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11192 bg_key.offset = length;
11194 btrfs_init_path(&path);
11195 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11198 "chunk[%llu %llu) did not find the related block group item",
11199 chunk_key.offset, chunk_end);
11200 err |= REFERENCER_MISSING;
11202 leaf = path.nodes[0];
11203 bi = btrfs_item_ptr(leaf, path.slots[0],
11204 struct btrfs_block_group_item);
11205 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11207 if (btrfs_block_group_flags(&bg_item) != type) {
11209 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11210 chunk_key.offset, chunk_end, type,
11211 btrfs_block_group_flags(&bg_item));
11212 err |= REFERENCER_MISSING;
11216 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11217 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11218 for (i = 0; i < num_stripes; i++) {
11219 btrfs_release_path(&path);
11220 btrfs_init_path(&path);
11221 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11222 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11223 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11225 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11228 goto not_match_dev;
11230 leaf = path.nodes[0];
11231 ptr = btrfs_item_ptr(leaf, path.slots[0],
11232 struct btrfs_dev_extent);
11233 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11234 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11235 if (objectid != chunk_key.objectid ||
11236 offset != chunk_key.offset ||
11237 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11238 goto not_match_dev;
11241 err |= BACKREF_MISSING;
11243 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11244 chunk_key.objectid, chunk_end, i);
11247 btrfs_release_path(&path);
11253 * Main entry function to check known items and update related accounting info
11255 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11257 struct btrfs_fs_info *fs_info = root->fs_info;
11258 struct btrfs_key key;
11261 struct btrfs_extent_data_ref *dref;
11266 btrfs_item_key_to_cpu(eb, &key, slot);
11270 case BTRFS_EXTENT_DATA_KEY:
11271 ret = check_extent_data_item(root, eb, slot);
11274 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11275 ret = check_block_group_item(fs_info, eb, slot);
11278 case BTRFS_DEV_ITEM_KEY:
11279 ret = check_dev_item(fs_info, eb, slot);
11282 case BTRFS_CHUNK_ITEM_KEY:
11283 ret = check_chunk_item(fs_info, eb, slot);
11286 case BTRFS_DEV_EXTENT_KEY:
11287 ret = check_dev_extent_item(fs_info, eb, slot);
11290 case BTRFS_EXTENT_ITEM_KEY:
11291 case BTRFS_METADATA_ITEM_KEY:
11292 ret = check_extent_item(fs_info, eb, slot);
11295 case BTRFS_EXTENT_CSUM_KEY:
11296 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11298 case BTRFS_TREE_BLOCK_REF_KEY:
11299 ret = check_tree_block_backref(fs_info, key.offset,
11303 case BTRFS_EXTENT_DATA_REF_KEY:
11304 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11305 ret = check_extent_data_backref(fs_info,
11306 btrfs_extent_data_ref_root(eb, dref),
11307 btrfs_extent_data_ref_objectid(eb, dref),
11308 btrfs_extent_data_ref_offset(eb, dref),
11310 btrfs_extent_data_ref_count(eb, dref));
11313 case BTRFS_SHARED_BLOCK_REF_KEY:
11314 ret = check_shared_block_backref(fs_info, key.offset,
11318 case BTRFS_SHARED_DATA_REF_KEY:
11319 ret = check_shared_data_backref(fs_info, key.offset,
11327 if (++slot < btrfs_header_nritems(eb))
11334 * Helper function for later fs/subvol tree check. To determine if a tree
11335 * block should be checked.
11336 * This function will ensure only the direct referencer with lowest rootid to
11337 * check a fs/subvolume tree block.
11339 * Backref check at extent tree would detect errors like missing subvolume
11340 * tree, so we can do aggressive check to reduce duplicated checks.
11342 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11344 struct btrfs_root *extent_root = root->fs_info->extent_root;
11345 struct btrfs_key key;
11346 struct btrfs_path path;
11347 struct extent_buffer *leaf;
11349 struct btrfs_extent_item *ei;
11355 struct btrfs_extent_inline_ref *iref;
11358 btrfs_init_path(&path);
11359 key.objectid = btrfs_header_bytenr(eb);
11360 key.type = BTRFS_METADATA_ITEM_KEY;
11361 key.offset = (u64)-1;
11364 * Any failure in backref resolving means we can't determine
11365 * whom the tree block belongs to.
11366 * So in that case, we need to check that tree block
11368 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11372 ret = btrfs_previous_extent_item(extent_root, &path,
11373 btrfs_header_bytenr(eb));
11377 leaf = path.nodes[0];
11378 slot = path.slots[0];
11379 btrfs_item_key_to_cpu(leaf, &key, slot);
11380 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11382 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11383 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11385 struct btrfs_tree_block_info *info;
11387 info = (struct btrfs_tree_block_info *)(ei + 1);
11388 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11391 item_size = btrfs_item_size_nr(leaf, slot);
11392 ptr = (unsigned long)iref;
11393 end = (unsigned long)ei + item_size;
11394 while (ptr < end) {
11395 iref = (struct btrfs_extent_inline_ref *)ptr;
11396 type = btrfs_extent_inline_ref_type(leaf, iref);
11397 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11400 * We only check the tree block if current root is
11401 * the lowest referencer of it.
11403 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11404 offset < root->objectid) {
11405 btrfs_release_path(&path);
11409 ptr += btrfs_extent_inline_ref_size(type);
11412 * Normally we should also check keyed tree block ref, but that may be
11413 * very time consuming. Inlined ref should already make us skip a lot
11414 * of refs now. So skip search keyed tree block ref.
11418 btrfs_release_path(&path);
11423 * Traversal function for tree block. We will do:
11424 * 1) Skip shared fs/subvolume tree blocks
11425 * 2) Update related bytes accounting
11426 * 3) Pre-order traversal
11428 static int traverse_tree_block(struct btrfs_root *root,
11429 struct extent_buffer *node)
11431 struct extent_buffer *eb;
11432 struct btrfs_key key;
11433 struct btrfs_key drop_key;
11441 * Skip shared fs/subvolume tree block, in that case they will
11442 * be checked by referencer with lowest rootid
11444 if (is_fstree(root->objectid) && !should_check(root, node))
11447 /* Update bytes accounting */
11448 total_btree_bytes += node->len;
11449 if (fs_root_objectid(btrfs_header_owner(node)))
11450 total_fs_tree_bytes += node->len;
11451 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11452 total_extent_tree_bytes += node->len;
11454 /* pre-order tranversal, check itself first */
11455 level = btrfs_header_level(node);
11456 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11457 btrfs_header_level(node),
11458 btrfs_header_owner(node));
11462 "check %s failed root %llu bytenr %llu level %d, force continue check",
11463 level ? "node":"leaf", root->objectid,
11464 btrfs_header_bytenr(node), btrfs_header_level(node));
11467 btree_space_waste += btrfs_leaf_free_space(root, node);
11468 ret = check_leaf_items(root, node);
11473 nr = btrfs_header_nritems(node);
11474 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11475 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11476 sizeof(struct btrfs_key_ptr);
11478 /* Then check all its children */
11479 for (i = 0; i < nr; i++) {
11480 u64 blocknr = btrfs_node_blockptr(node, i);
11482 btrfs_node_key_to_cpu(node, &key, i);
11483 if (level == root->root_item.drop_level &&
11484 is_dropped_key(&key, &drop_key))
11488 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11489 * to call the function itself.
11491 eb = read_tree_block(root->fs_info, blocknr, 0);
11492 if (extent_buffer_uptodate(eb)) {
11493 ret = traverse_tree_block(root, eb);
11496 free_extent_buffer(eb);
11503 * Low memory usage version check_chunks_and_extents.
11505 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11507 struct btrfs_path path;
11508 struct btrfs_key key;
11509 struct btrfs_root *root1;
11510 struct btrfs_root *cur_root;
11514 root1 = root->fs_info->chunk_root;
11515 ret = traverse_tree_block(root1, root1->node);
11518 root1 = root->fs_info->tree_root;
11519 ret = traverse_tree_block(root1, root1->node);
11522 btrfs_init_path(&path);
11523 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11525 key.type = BTRFS_ROOT_ITEM_KEY;
11527 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11529 error("cannot find extent treet in tree_root");
11534 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11535 if (key.type != BTRFS_ROOT_ITEM_KEY)
11537 key.offset = (u64)-1;
11539 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11540 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11543 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11544 if (IS_ERR(cur_root) || !cur_root) {
11545 error("failed to read tree: %lld", key.objectid);
11549 ret = traverse_tree_block(cur_root, cur_root->node);
11552 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11553 btrfs_free_fs_root(cur_root);
11555 ret = btrfs_next_item(root1, &path);
11561 btrfs_release_path(&path);
11565 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11566 struct btrfs_root *root, int overwrite)
11568 struct extent_buffer *c;
11569 struct extent_buffer *old = root->node;
11572 struct btrfs_disk_key disk_key = {0,0,0};
11578 extent_buffer_get(c);
11581 c = btrfs_alloc_free_block(trans, root,
11582 root->fs_info->nodesize,
11583 root->root_key.objectid,
11584 &disk_key, level, 0, 0);
11587 extent_buffer_get(c);
11591 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11592 btrfs_set_header_level(c, level);
11593 btrfs_set_header_bytenr(c, c->start);
11594 btrfs_set_header_generation(c, trans->transid);
11595 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11596 btrfs_set_header_owner(c, root->root_key.objectid);
11598 write_extent_buffer(c, root->fs_info->fsid,
11599 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11601 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11602 btrfs_header_chunk_tree_uuid(c),
11605 btrfs_mark_buffer_dirty(c);
11607 * this case can happen in the following case:
11609 * 1.overwrite previous root.
11611 * 2.reinit reloc data root, this is because we skip pin
11612 * down reloc data tree before which means we can allocate
11613 * same block bytenr here.
11615 if (old->start == c->start) {
11616 btrfs_set_root_generation(&root->root_item,
11618 root->root_item.level = btrfs_header_level(root->node);
11619 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11620 &root->root_key, &root->root_item);
11622 free_extent_buffer(c);
11626 free_extent_buffer(old);
11628 add_root_to_dirty_list(root);
11632 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11633 struct extent_buffer *eb, int tree_root)
11635 struct extent_buffer *tmp;
11636 struct btrfs_root_item *ri;
11637 struct btrfs_key key;
11639 int level = btrfs_header_level(eb);
11645 * If we have pinned this block before, don't pin it again.
11646 * This can not only avoid forever loop with broken filesystem
11647 * but also give us some speedups.
11649 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11650 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11653 btrfs_pin_extent(fs_info, eb->start, eb->len);
11655 nritems = btrfs_header_nritems(eb);
11656 for (i = 0; i < nritems; i++) {
11658 btrfs_item_key_to_cpu(eb, &key, i);
11659 if (key.type != BTRFS_ROOT_ITEM_KEY)
11661 /* Skip the extent root and reloc roots */
11662 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11663 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11664 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11666 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11667 bytenr = btrfs_disk_root_bytenr(eb, ri);
11670 * If at any point we start needing the real root we
11671 * will have to build a stump root for the root we are
11672 * in, but for now this doesn't actually use the root so
11673 * just pass in extent_root.
11675 tmp = read_tree_block(fs_info, bytenr, 0);
11676 if (!extent_buffer_uptodate(tmp)) {
11677 fprintf(stderr, "Error reading root block\n");
11680 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11681 free_extent_buffer(tmp);
11685 bytenr = btrfs_node_blockptr(eb, i);
11687 /* If we aren't the tree root don't read the block */
11688 if (level == 1 && !tree_root) {
11689 btrfs_pin_extent(fs_info, bytenr,
11690 fs_info->nodesize);
11694 tmp = read_tree_block(fs_info, bytenr, 0);
11695 if (!extent_buffer_uptodate(tmp)) {
11696 fprintf(stderr, "Error reading tree block\n");
11699 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11700 free_extent_buffer(tmp);
11709 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11713 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11717 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11720 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11722 struct btrfs_block_group_cache *cache;
11723 struct btrfs_path path;
11724 struct extent_buffer *leaf;
11725 struct btrfs_chunk *chunk;
11726 struct btrfs_key key;
11730 btrfs_init_path(&path);
11732 key.type = BTRFS_CHUNK_ITEM_KEY;
11734 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11736 btrfs_release_path(&path);
11741 * We do this in case the block groups were screwed up and had alloc
11742 * bits that aren't actually set on the chunks. This happens with
11743 * restored images every time and could happen in real life I guess.
11745 fs_info->avail_data_alloc_bits = 0;
11746 fs_info->avail_metadata_alloc_bits = 0;
11747 fs_info->avail_system_alloc_bits = 0;
11749 /* First we need to create the in-memory block groups */
11751 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11752 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11754 btrfs_release_path(&path);
11762 leaf = path.nodes[0];
11763 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11764 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11769 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11770 btrfs_add_block_group(fs_info, 0,
11771 btrfs_chunk_type(leaf, chunk),
11772 key.objectid, key.offset,
11773 btrfs_chunk_length(leaf, chunk));
11774 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11775 key.offset + btrfs_chunk_length(leaf, chunk));
11780 cache = btrfs_lookup_first_block_group(fs_info, start);
11784 start = cache->key.objectid + cache->key.offset;
11787 btrfs_release_path(&path);
11791 static int reset_balance(struct btrfs_trans_handle *trans,
11792 struct btrfs_fs_info *fs_info)
11794 struct btrfs_root *root = fs_info->tree_root;
11795 struct btrfs_path path;
11796 struct extent_buffer *leaf;
11797 struct btrfs_key key;
11798 int del_slot, del_nr = 0;
11802 btrfs_init_path(&path);
11803 key.objectid = BTRFS_BALANCE_OBJECTID;
11804 key.type = BTRFS_BALANCE_ITEM_KEY;
11806 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11811 goto reinit_data_reloc;
11816 ret = btrfs_del_item(trans, root, &path);
11819 btrfs_release_path(&path);
11821 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11822 key.type = BTRFS_ROOT_ITEM_KEY;
11824 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11828 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11833 ret = btrfs_del_items(trans, root, &path,
11840 btrfs_release_path(&path);
11843 ret = btrfs_search_slot(trans, root, &key, &path,
11850 leaf = path.nodes[0];
11851 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11852 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11854 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11859 del_slot = path.slots[0];
11868 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11872 btrfs_release_path(&path);
11875 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11876 key.type = BTRFS_ROOT_ITEM_KEY;
11877 key.offset = (u64)-1;
11878 root = btrfs_read_fs_root(fs_info, &key);
11879 if (IS_ERR(root)) {
11880 fprintf(stderr, "Error reading data reloc tree\n");
11881 ret = PTR_ERR(root);
11884 record_root_in_trans(trans, root);
11885 ret = btrfs_fsck_reinit_root(trans, root, 0);
11888 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11890 btrfs_release_path(&path);
11894 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11895 struct btrfs_fs_info *fs_info)
11901 * The only reason we don't do this is because right now we're just
11902 * walking the trees we find and pinning down their bytes, we don't look
11903 * at any of the leaves. In order to do mixed groups we'd have to check
11904 * the leaves of any fs roots and pin down the bytes for any file
11905 * extents we find. Not hard but why do it if we don't have to?
11907 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11908 fprintf(stderr, "We don't support re-initing the extent tree "
11909 "for mixed block groups yet, please notify a btrfs "
11910 "developer you want to do this so they can add this "
11911 "functionality.\n");
11916 * first we need to walk all of the trees except the extent tree and pin
11917 * down the bytes that are in use so we don't overwrite any existing
11920 ret = pin_metadata_blocks(fs_info);
11922 fprintf(stderr, "error pinning down used bytes\n");
11927 * Need to drop all the block groups since we're going to recreate all
11930 btrfs_free_block_groups(fs_info);
11931 ret = reset_block_groups(fs_info);
11933 fprintf(stderr, "error resetting the block groups\n");
11937 /* Ok we can allocate now, reinit the extent root */
11938 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11940 fprintf(stderr, "extent root initialization failed\n");
11942 * When the transaction code is updated we should end the
11943 * transaction, but for now progs only knows about commit so
11944 * just return an error.
11950 * Now we have all the in-memory block groups setup so we can make
11951 * allocations properly, and the metadata we care about is safe since we
11952 * pinned all of it above.
11955 struct btrfs_block_group_cache *cache;
11957 cache = btrfs_lookup_first_block_group(fs_info, start);
11960 start = cache->key.objectid + cache->key.offset;
11961 ret = btrfs_insert_item(trans, fs_info->extent_root,
11962 &cache->key, &cache->item,
11963 sizeof(cache->item));
11965 fprintf(stderr, "Error adding block group\n");
11968 btrfs_extent_post_op(trans, fs_info->extent_root);
11971 ret = reset_balance(trans, fs_info);
11973 fprintf(stderr, "error resetting the pending balance\n");
11978 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11980 struct btrfs_path path;
11981 struct btrfs_trans_handle *trans;
11982 struct btrfs_key key;
11985 printf("Recowing metadata block %llu\n", eb->start);
11986 key.objectid = btrfs_header_owner(eb);
11987 key.type = BTRFS_ROOT_ITEM_KEY;
11988 key.offset = (u64)-1;
11990 root = btrfs_read_fs_root(root->fs_info, &key);
11991 if (IS_ERR(root)) {
11992 fprintf(stderr, "Couldn't find owner root %llu\n",
11994 return PTR_ERR(root);
11997 trans = btrfs_start_transaction(root, 1);
11999 return PTR_ERR(trans);
12001 btrfs_init_path(&path);
12002 path.lowest_level = btrfs_header_level(eb);
12003 if (path.lowest_level)
12004 btrfs_node_key_to_cpu(eb, &key, 0);
12006 btrfs_item_key_to_cpu(eb, &key, 0);
12008 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12009 btrfs_commit_transaction(trans, root);
12010 btrfs_release_path(&path);
12014 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12016 struct btrfs_path path;
12017 struct btrfs_trans_handle *trans;
12018 struct btrfs_key key;
12021 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12022 bad->key.type, bad->key.offset);
12023 key.objectid = bad->root_id;
12024 key.type = BTRFS_ROOT_ITEM_KEY;
12025 key.offset = (u64)-1;
12027 root = btrfs_read_fs_root(root->fs_info, &key);
12028 if (IS_ERR(root)) {
12029 fprintf(stderr, "Couldn't find owner root %llu\n",
12031 return PTR_ERR(root);
12034 trans = btrfs_start_transaction(root, 1);
12036 return PTR_ERR(trans);
12038 btrfs_init_path(&path);
12039 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12045 ret = btrfs_del_item(trans, root, &path);
12047 btrfs_commit_transaction(trans, root);
12048 btrfs_release_path(&path);
12052 static int zero_log_tree(struct btrfs_root *root)
12054 struct btrfs_trans_handle *trans;
12057 trans = btrfs_start_transaction(root, 1);
12058 if (IS_ERR(trans)) {
12059 ret = PTR_ERR(trans);
12062 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12063 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12064 ret = btrfs_commit_transaction(trans, root);
12068 static int populate_csum(struct btrfs_trans_handle *trans,
12069 struct btrfs_root *csum_root, char *buf, u64 start,
12072 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12077 while (offset < len) {
12078 sectorsize = fs_info->sectorsize;
12079 ret = read_extent_data(fs_info, buf, start + offset,
12083 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12084 start + offset, buf, sectorsize);
12087 offset += sectorsize;
12092 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12093 struct btrfs_root *csum_root,
12094 struct btrfs_root *cur_root)
12096 struct btrfs_path path;
12097 struct btrfs_key key;
12098 struct extent_buffer *node;
12099 struct btrfs_file_extent_item *fi;
12106 buf = malloc(cur_root->fs_info->sectorsize);
12110 btrfs_init_path(&path);
12114 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12117 /* Iterate all regular file extents and fill its csum */
12119 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12121 if (key.type != BTRFS_EXTENT_DATA_KEY)
12123 node = path.nodes[0];
12124 slot = path.slots[0];
12125 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12126 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12128 start = btrfs_file_extent_disk_bytenr(node, fi);
12129 len = btrfs_file_extent_disk_num_bytes(node, fi);
12131 ret = populate_csum(trans, csum_root, buf, start, len);
12132 if (ret == -EEXIST)
12138 * TODO: if next leaf is corrupted, jump to nearest next valid
12141 ret = btrfs_next_item(cur_root, &path);
12151 btrfs_release_path(&path);
12156 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12157 struct btrfs_root *csum_root)
12159 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12160 struct btrfs_path path;
12161 struct btrfs_root *tree_root = fs_info->tree_root;
12162 struct btrfs_root *cur_root;
12163 struct extent_buffer *node;
12164 struct btrfs_key key;
12168 btrfs_init_path(&path);
12169 key.objectid = BTRFS_FS_TREE_OBJECTID;
12171 key.type = BTRFS_ROOT_ITEM_KEY;
12172 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12181 node = path.nodes[0];
12182 slot = path.slots[0];
12183 btrfs_item_key_to_cpu(node, &key, slot);
12184 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12186 if (key.type != BTRFS_ROOT_ITEM_KEY)
12188 if (!is_fstree(key.objectid))
12190 key.offset = (u64)-1;
12192 cur_root = btrfs_read_fs_root(fs_info, &key);
12193 if (IS_ERR(cur_root) || !cur_root) {
12194 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12198 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12203 ret = btrfs_next_item(tree_root, &path);
12213 btrfs_release_path(&path);
12217 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12218 struct btrfs_root *csum_root)
12220 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12221 struct btrfs_path path;
12222 struct btrfs_extent_item *ei;
12223 struct extent_buffer *leaf;
12225 struct btrfs_key key;
12228 btrfs_init_path(&path);
12230 key.type = BTRFS_EXTENT_ITEM_KEY;
12232 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12234 btrfs_release_path(&path);
12238 buf = malloc(csum_root->fs_info->sectorsize);
12240 btrfs_release_path(&path);
12245 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12246 ret = btrfs_next_leaf(extent_root, &path);
12254 leaf = path.nodes[0];
12256 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12257 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12262 ei = btrfs_item_ptr(leaf, path.slots[0],
12263 struct btrfs_extent_item);
12264 if (!(btrfs_extent_flags(leaf, ei) &
12265 BTRFS_EXTENT_FLAG_DATA)) {
12270 ret = populate_csum(trans, csum_root, buf, key.objectid,
12277 btrfs_release_path(&path);
12283 * Recalculate the csum and put it into the csum tree.
12285 * Extent tree init will wipe out all the extent info, so in that case, we
12286 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12287 * will use fs/subvol trees to init the csum tree.
12289 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12290 struct btrfs_root *csum_root,
12291 int search_fs_tree)
12293 if (search_fs_tree)
12294 return fill_csum_tree_from_fs(trans, csum_root);
12296 return fill_csum_tree_from_extent(trans, csum_root);
12299 static void free_roots_info_cache(void)
12301 if (!roots_info_cache)
12304 while (!cache_tree_empty(roots_info_cache)) {
12305 struct cache_extent *entry;
12306 struct root_item_info *rii;
12308 entry = first_cache_extent(roots_info_cache);
12311 remove_cache_extent(roots_info_cache, entry);
12312 rii = container_of(entry, struct root_item_info, cache_extent);
12316 free(roots_info_cache);
12317 roots_info_cache = NULL;
12320 static int build_roots_info_cache(struct btrfs_fs_info *info)
12323 struct btrfs_key key;
12324 struct extent_buffer *leaf;
12325 struct btrfs_path path;
12327 if (!roots_info_cache) {
12328 roots_info_cache = malloc(sizeof(*roots_info_cache));
12329 if (!roots_info_cache)
12331 cache_tree_init(roots_info_cache);
12334 btrfs_init_path(&path);
12336 key.type = BTRFS_EXTENT_ITEM_KEY;
12338 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12341 leaf = path.nodes[0];
12344 struct btrfs_key found_key;
12345 struct btrfs_extent_item *ei;
12346 struct btrfs_extent_inline_ref *iref;
12347 int slot = path.slots[0];
12352 struct cache_extent *entry;
12353 struct root_item_info *rii;
12355 if (slot >= btrfs_header_nritems(leaf)) {
12356 ret = btrfs_next_leaf(info->extent_root, &path);
12363 leaf = path.nodes[0];
12364 slot = path.slots[0];
12367 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12369 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12370 found_key.type != BTRFS_METADATA_ITEM_KEY)
12373 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12374 flags = btrfs_extent_flags(leaf, ei);
12376 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12377 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12380 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12381 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12382 level = found_key.offset;
12384 struct btrfs_tree_block_info *binfo;
12386 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12387 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12388 level = btrfs_tree_block_level(leaf, binfo);
12392 * For a root extent, it must be of the following type and the
12393 * first (and only one) iref in the item.
12395 type = btrfs_extent_inline_ref_type(leaf, iref);
12396 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12399 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12400 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12402 rii = malloc(sizeof(struct root_item_info));
12407 rii->cache_extent.start = root_id;
12408 rii->cache_extent.size = 1;
12409 rii->level = (u8)-1;
12410 entry = &rii->cache_extent;
12411 ret = insert_cache_extent(roots_info_cache, entry);
12414 rii = container_of(entry, struct root_item_info,
12418 ASSERT(rii->cache_extent.start == root_id);
12419 ASSERT(rii->cache_extent.size == 1);
12421 if (level > rii->level || rii->level == (u8)-1) {
12422 rii->level = level;
12423 rii->bytenr = found_key.objectid;
12424 rii->gen = btrfs_extent_generation(leaf, ei);
12425 rii->node_count = 1;
12426 } else if (level == rii->level) {
12434 btrfs_release_path(&path);
12439 static int maybe_repair_root_item(struct btrfs_path *path,
12440 const struct btrfs_key *root_key,
12441 const int read_only_mode)
12443 const u64 root_id = root_key->objectid;
12444 struct cache_extent *entry;
12445 struct root_item_info *rii;
12446 struct btrfs_root_item ri;
12447 unsigned long offset;
12449 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12452 "Error: could not find extent items for root %llu\n",
12453 root_key->objectid);
12457 rii = container_of(entry, struct root_item_info, cache_extent);
12458 ASSERT(rii->cache_extent.start == root_id);
12459 ASSERT(rii->cache_extent.size == 1);
12461 if (rii->node_count != 1) {
12463 "Error: could not find btree root extent for root %llu\n",
12468 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12469 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12471 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12472 btrfs_root_level(&ri) != rii->level ||
12473 btrfs_root_generation(&ri) != rii->gen) {
12476 * If we're in repair mode but our caller told us to not update
12477 * the root item, i.e. just check if it needs to be updated, don't
12478 * print this message, since the caller will call us again shortly
12479 * for the same root item without read only mode (the caller will
12480 * open a transaction first).
12482 if (!(read_only_mode && repair))
12484 "%sroot item for root %llu,"
12485 " current bytenr %llu, current gen %llu, current level %u,"
12486 " new bytenr %llu, new gen %llu, new level %u\n",
12487 (read_only_mode ? "" : "fixing "),
12489 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12490 btrfs_root_level(&ri),
12491 rii->bytenr, rii->gen, rii->level);
12493 if (btrfs_root_generation(&ri) > rii->gen) {
12495 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12496 root_id, btrfs_root_generation(&ri), rii->gen);
12500 if (!read_only_mode) {
12501 btrfs_set_root_bytenr(&ri, rii->bytenr);
12502 btrfs_set_root_level(&ri, rii->level);
12503 btrfs_set_root_generation(&ri, rii->gen);
12504 write_extent_buffer(path->nodes[0], &ri,
12505 offset, sizeof(ri));
12515 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12516 * caused read-only snapshots to be corrupted if they were created at a moment
12517 * when the source subvolume/snapshot had orphan items. The issue was that the
12518 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12519 * node instead of the post orphan cleanup root node.
12520 * So this function, and its callees, just detects and fixes those cases. Even
12521 * though the regression was for read-only snapshots, this function applies to
12522 * any snapshot/subvolume root.
12523 * This must be run before any other repair code - not doing it so, makes other
12524 * repair code delete or modify backrefs in the extent tree for example, which
12525 * will result in an inconsistent fs after repairing the root items.
12527 static int repair_root_items(struct btrfs_fs_info *info)
12529 struct btrfs_path path;
12530 struct btrfs_key key;
12531 struct extent_buffer *leaf;
12532 struct btrfs_trans_handle *trans = NULL;
12535 int need_trans = 0;
12537 btrfs_init_path(&path);
12539 ret = build_roots_info_cache(info);
12543 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12544 key.type = BTRFS_ROOT_ITEM_KEY;
12549 * Avoid opening and committing transactions if a leaf doesn't have
12550 * any root items that need to be fixed, so that we avoid rotating
12551 * backup roots unnecessarily.
12554 trans = btrfs_start_transaction(info->tree_root, 1);
12555 if (IS_ERR(trans)) {
12556 ret = PTR_ERR(trans);
12561 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12565 leaf = path.nodes[0];
12568 struct btrfs_key found_key;
12570 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12571 int no_more_keys = find_next_key(&path, &key);
12573 btrfs_release_path(&path);
12575 ret = btrfs_commit_transaction(trans,
12587 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12589 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12591 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12594 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12598 if (!trans && repair) {
12601 btrfs_release_path(&path);
12611 free_roots_info_cache();
12612 btrfs_release_path(&path);
12614 btrfs_commit_transaction(trans, info->tree_root);
12621 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12623 struct btrfs_trans_handle *trans;
12624 struct btrfs_block_group_cache *bg_cache;
12628 /* Clear all free space cache inodes and its extent data */
12630 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12633 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12636 current = bg_cache->key.objectid + bg_cache->key.offset;
12639 /* Don't forget to set cache_generation to -1 */
12640 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12641 if (IS_ERR(trans)) {
12642 error("failed to update super block cache generation");
12643 return PTR_ERR(trans);
12645 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12646 btrfs_commit_transaction(trans, fs_info->tree_root);
12651 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12656 if (clear_version == 1) {
12657 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12659 "free space cache v2 detected, use --clear-space-cache v2");
12663 printf("Clearing free space cache\n");
12664 ret = clear_free_space_cache(fs_info);
12666 error("failed to clear free space cache");
12669 printf("Free space cache cleared\n");
12671 } else if (clear_version == 2) {
12672 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12673 printf("no free space cache v2 to clear\n");
12677 printf("Clear free space cache v2\n");
12678 ret = btrfs_clear_free_space_tree(fs_info);
12680 error("failed to clear free space cache v2: %d", ret);
12683 printf("free space cache v2 cleared\n");
12690 const char * const cmd_check_usage[] = {
12691 "btrfs check [options] <device>",
12692 "Check structural integrity of a filesystem (unmounted).",
12693 "Check structural integrity of an unmounted filesystem. Verify internal",
12694 "trees' consistency and item connectivity. In the repair mode try to",
12695 "fix the problems found. ",
12696 "WARNING: the repair mode is considered dangerous",
12698 "-s|--super <superblock> use this superblock copy",
12699 "-b|--backup use the first valid backup root copy",
12700 "--repair try to repair the filesystem",
12701 "--readonly run in read-only mode (default)",
12702 "--init-csum-tree create a new CRC tree",
12703 "--init-extent-tree create a new extent tree",
12704 "--mode <MODE> allows choice of memory/IO trade-offs",
12705 " where MODE is one of:",
12706 " original - read inodes and extents to memory (requires",
12707 " more memory, does less IO)",
12708 " lowmem - try to use less memory but read blocks again",
12710 "--check-data-csum verify checksums of data blocks",
12711 "-Q|--qgroup-report print a report on qgroup consistency",
12712 "-E|--subvol-extents <subvolid>",
12713 " print subvolume extents and sharing state",
12714 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12715 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12716 "-p|--progress indicate progress",
12717 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12721 int cmd_check(int argc, char **argv)
12723 struct cache_tree root_cache;
12724 struct btrfs_root *root;
12725 struct btrfs_fs_info *info;
12728 u64 tree_root_bytenr = 0;
12729 u64 chunk_root_bytenr = 0;
12730 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12734 int init_csum_tree = 0;
12736 int clear_space_cache = 0;
12737 int qgroup_report = 0;
12738 int qgroups_repaired = 0;
12739 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12743 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12744 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12745 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12746 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12747 static const struct option long_options[] = {
12748 { "super", required_argument, NULL, 's' },
12749 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12750 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12751 { "init-csum-tree", no_argument, NULL,
12752 GETOPT_VAL_INIT_CSUM },
12753 { "init-extent-tree", no_argument, NULL,
12754 GETOPT_VAL_INIT_EXTENT },
12755 { "check-data-csum", no_argument, NULL,
12756 GETOPT_VAL_CHECK_CSUM },
12757 { "backup", no_argument, NULL, 'b' },
12758 { "subvol-extents", required_argument, NULL, 'E' },
12759 { "qgroup-report", no_argument, NULL, 'Q' },
12760 { "tree-root", required_argument, NULL, 'r' },
12761 { "chunk-root", required_argument, NULL,
12762 GETOPT_VAL_CHUNK_TREE },
12763 { "progress", no_argument, NULL, 'p' },
12764 { "mode", required_argument, NULL,
12766 { "clear-space-cache", required_argument, NULL,
12767 GETOPT_VAL_CLEAR_SPACE_CACHE},
12768 { NULL, 0, NULL, 0}
12771 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12775 case 'a': /* ignored */ break;
12777 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12780 num = arg_strtou64(optarg);
12781 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12783 "super mirror should be less than %d",
12784 BTRFS_SUPER_MIRROR_MAX);
12787 bytenr = btrfs_sb_offset(((int)num));
12788 printf("using SB copy %llu, bytenr %llu\n", num,
12789 (unsigned long long)bytenr);
12795 subvolid = arg_strtou64(optarg);
12798 tree_root_bytenr = arg_strtou64(optarg);
12800 case GETOPT_VAL_CHUNK_TREE:
12801 chunk_root_bytenr = arg_strtou64(optarg);
12804 ctx.progress_enabled = true;
12808 usage(cmd_check_usage);
12809 case GETOPT_VAL_REPAIR:
12810 printf("enabling repair mode\n");
12812 ctree_flags |= OPEN_CTREE_WRITES;
12814 case GETOPT_VAL_READONLY:
12817 case GETOPT_VAL_INIT_CSUM:
12818 printf("Creating a new CRC tree\n");
12819 init_csum_tree = 1;
12821 ctree_flags |= OPEN_CTREE_WRITES;
12823 case GETOPT_VAL_INIT_EXTENT:
12824 init_extent_tree = 1;
12825 ctree_flags |= (OPEN_CTREE_WRITES |
12826 OPEN_CTREE_NO_BLOCK_GROUPS);
12829 case GETOPT_VAL_CHECK_CSUM:
12830 check_data_csum = 1;
12832 case GETOPT_VAL_MODE:
12833 check_mode = parse_check_mode(optarg);
12834 if (check_mode == CHECK_MODE_UNKNOWN) {
12835 error("unknown mode: %s", optarg);
12839 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12840 if (strcmp(optarg, "v1") == 0) {
12841 clear_space_cache = 1;
12842 } else if (strcmp(optarg, "v2") == 0) {
12843 clear_space_cache = 2;
12844 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12847 "invalid argument to --clear-space-cache, must be v1 or v2");
12850 ctree_flags |= OPEN_CTREE_WRITES;
12855 if (check_argc_exact(argc - optind, 1))
12856 usage(cmd_check_usage);
12858 if (ctx.progress_enabled) {
12859 ctx.tp = TASK_NOTHING;
12860 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12863 /* This check is the only reason for --readonly to exist */
12864 if (readonly && repair) {
12865 error("repair options are not compatible with --readonly");
12870 * Not supported yet
12872 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12873 error("low memory mode doesn't support repair yet");
12878 cache_tree_init(&root_cache);
12880 if((ret = check_mounted(argv[optind])) < 0) {
12881 error("could not check mount status: %s", strerror(-ret));
12885 error("%s is currently mounted, aborting", argv[optind]);
12891 /* only allow partial opening under repair mode */
12893 ctree_flags |= OPEN_CTREE_PARTIAL;
12895 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12896 chunk_root_bytenr, ctree_flags);
12898 error("cannot open file system");
12904 global_info = info;
12905 root = info->fs_root;
12907 if (clear_space_cache) {
12908 ret = do_clear_free_space_cache(info, clear_space_cache);
12914 * repair mode will force us to commit transaction which
12915 * will make us fail to load log tree when mounting.
12917 if (repair && btrfs_super_log_root(info->super_copy)) {
12918 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12924 ret = zero_log_tree(root);
12927 error("failed to zero log tree: %d", ret);
12932 uuid_unparse(info->super_copy->fsid, uuidbuf);
12933 if (qgroup_report) {
12934 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12936 ret = qgroup_verify_all(info);
12943 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12944 subvolid, argv[optind], uuidbuf);
12945 ret = print_extent_state(info, subvolid);
12949 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12951 if (!extent_buffer_uptodate(info->tree_root->node) ||
12952 !extent_buffer_uptodate(info->dev_root->node) ||
12953 !extent_buffer_uptodate(info->chunk_root->node)) {
12954 error("critical roots corrupted, unable to check the filesystem");
12960 if (init_extent_tree || init_csum_tree) {
12961 struct btrfs_trans_handle *trans;
12963 trans = btrfs_start_transaction(info->extent_root, 0);
12964 if (IS_ERR(trans)) {
12965 error("error starting transaction");
12966 ret = PTR_ERR(trans);
12971 if (init_extent_tree) {
12972 printf("Creating a new extent tree\n");
12973 ret = reinit_extent_tree(trans, info);
12979 if (init_csum_tree) {
12980 printf("Reinitialize checksum tree\n");
12981 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12983 error("checksum tree initialization failed: %d",
12990 ret = fill_csum_tree(trans, info->csum_root,
12994 error("checksum tree refilling failed: %d", ret);
12999 * Ok now we commit and run the normal fsck, which will add
13000 * extent entries for all of the items it finds.
13002 ret = btrfs_commit_transaction(trans, info->extent_root);
13007 if (!extent_buffer_uptodate(info->extent_root->node)) {
13008 error("critical: extent_root, unable to check the filesystem");
13013 if (!extent_buffer_uptodate(info->csum_root->node)) {
13014 error("critical: csum_root, unable to check the filesystem");
13020 if (!ctx.progress_enabled)
13021 fprintf(stderr, "checking extents\n");
13022 if (check_mode == CHECK_MODE_LOWMEM)
13023 ret = check_chunks_and_extents_v2(root);
13025 ret = check_chunks_and_extents(root);
13029 "errors found in extent allocation tree or chunk allocation");
13031 ret = repair_root_items(info);
13034 error("failed to repair root items: %s", strerror(-ret));
13038 fprintf(stderr, "Fixed %d roots.\n", ret);
13040 } else if (ret > 0) {
13042 "Found %d roots with an outdated root item.\n",
13045 "Please run a filesystem check with the option --repair to fix them.\n");
13051 if (!ctx.progress_enabled) {
13052 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13053 fprintf(stderr, "checking free space tree\n");
13055 fprintf(stderr, "checking free space cache\n");
13057 ret = check_space_cache(root);
13060 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13061 error("errors found in free space tree");
13063 error("errors found in free space cache");
13068 * We used to have to have these hole extents in between our real
13069 * extents so if we don't have this flag set we need to make sure there
13070 * are no gaps in the file extents for inodes, otherwise we can just
13071 * ignore it when this happens.
13073 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13074 if (!ctx.progress_enabled)
13075 fprintf(stderr, "checking fs roots\n");
13076 if (check_mode == CHECK_MODE_LOWMEM)
13077 ret = check_fs_roots_v2(root->fs_info);
13079 ret = check_fs_roots(root, &root_cache);
13082 error("errors found in fs roots");
13086 fprintf(stderr, "checking csums\n");
13087 ret = check_csums(root);
13090 error("errors found in csum tree");
13094 fprintf(stderr, "checking root refs\n");
13095 /* For low memory mode, check_fs_roots_v2 handles root refs */
13096 if (check_mode != CHECK_MODE_LOWMEM) {
13097 ret = check_root_refs(root, &root_cache);
13100 error("errors found in root refs");
13105 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13106 struct extent_buffer *eb;
13108 eb = list_first_entry(&root->fs_info->recow_ebs,
13109 struct extent_buffer, recow);
13110 list_del_init(&eb->recow);
13111 ret = recow_extent_buffer(root, eb);
13114 error("fails to fix transid errors");
13119 while (!list_empty(&delete_items)) {
13120 struct bad_item *bad;
13122 bad = list_first_entry(&delete_items, struct bad_item, list);
13123 list_del_init(&bad->list);
13125 ret = delete_bad_item(root, bad);
13131 if (info->quota_enabled) {
13132 fprintf(stderr, "checking quota groups\n");
13133 ret = qgroup_verify_all(info);
13136 error("failed to check quota groups");
13140 ret = repair_qgroups(info, &qgroups_repaired);
13143 error("failed to repair quota groups");
13149 if (!list_empty(&root->fs_info->recow_ebs)) {
13150 error("transid errors in file system");
13155 printf("found %llu bytes used, ",
13156 (unsigned long long)bytes_used);
13158 printf("error(s) found\n");
13160 printf("no error found\n");
13161 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13162 printf("total tree bytes: %llu\n",
13163 (unsigned long long)total_btree_bytes);
13164 printf("total fs tree bytes: %llu\n",
13165 (unsigned long long)total_fs_tree_bytes);
13166 printf("total extent tree bytes: %llu\n",
13167 (unsigned long long)total_extent_tree_bytes);
13168 printf("btree space waste bytes: %llu\n",
13169 (unsigned long long)btree_space_waste);
13170 printf("file data blocks allocated: %llu\n referenced %llu\n",
13171 (unsigned long long)data_bytes_allocated,
13172 (unsigned long long)data_bytes_referenced);
13174 free_qgroup_counts();
13175 free_root_recs_tree(&root_cache);
13179 if (ctx.progress_enabled)
13180 task_deinit(ctx.info);