2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
136 return container_of(back, struct data_backref, node);
140 * Much like data_backref, just removed the undetermined members
141 * and change it to use list_head.
142 * During extent scan, it is stored in root->orphan_data_extent.
143 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
145 struct orphan_data_extent {
146 struct list_head list;
154 struct tree_backref {
155 struct extent_backref node;
162 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
164 return container_of(back, struct tree_backref, node);
167 /* Explicit initialization for extent_record::flag_block_full_backref */
168 enum { FLAG_UNSET = 2 };
170 struct extent_record {
171 struct list_head backrefs;
172 struct list_head dups;
173 struct list_head list;
174 struct cache_extent cache;
175 struct btrfs_disk_key parent_key;
180 u64 extent_item_refs;
182 u64 parent_generation;
186 unsigned int flag_block_full_backref:2;
187 unsigned int found_rec:1;
188 unsigned int content_checked:1;
189 unsigned int owner_ref_checked:1;
190 unsigned int is_root:1;
191 unsigned int metadata:1;
192 unsigned int bad_full_backref:1;
193 unsigned int crossing_stripes:1;
194 unsigned int wrong_chunk_type:1;
197 static inline struct extent_record* to_extent_record(struct list_head *entry)
199 return container_of(entry, struct extent_record, list);
202 struct inode_backref {
203 struct list_head list;
204 unsigned int found_dir_item:1;
205 unsigned int found_dir_index:1;
206 unsigned int found_inode_ref:1;
216 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
218 return list_entry(entry, struct inode_backref, list);
221 struct root_item_record {
222 struct list_head list;
228 struct btrfs_key drop_key;
231 #define REF_ERR_NO_DIR_ITEM (1 << 0)
232 #define REF_ERR_NO_DIR_INDEX (1 << 1)
233 #define REF_ERR_NO_INODE_REF (1 << 2)
234 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
235 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
236 #define REF_ERR_DUP_INODE_REF (1 << 5)
237 #define REF_ERR_INDEX_UNMATCH (1 << 6)
238 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
239 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
240 #define REF_ERR_NO_ROOT_REF (1 << 9)
241 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
242 #define REF_ERR_DUP_ROOT_REF (1 << 11)
243 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
245 struct file_extent_hole {
251 struct inode_record {
252 struct list_head backrefs;
253 unsigned int checked:1;
254 unsigned int merging:1;
255 unsigned int found_inode_item:1;
256 unsigned int found_dir_item:1;
257 unsigned int found_file_extent:1;
258 unsigned int found_csum_item:1;
259 unsigned int some_csum_missing:1;
260 unsigned int nodatasum:1;
273 struct rb_root holes;
274 struct list_head orphan_extents;
279 #define I_ERR_NO_INODE_ITEM (1 << 0)
280 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
281 #define I_ERR_DUP_INODE_ITEM (1 << 2)
282 #define I_ERR_DUP_DIR_INDEX (1 << 3)
283 #define I_ERR_ODD_DIR_ITEM (1 << 4)
284 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
285 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
286 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
287 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
288 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
289 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
290 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
291 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
292 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
293 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
295 struct root_backref {
296 struct list_head list;
297 unsigned int found_dir_item:1;
298 unsigned int found_dir_index:1;
299 unsigned int found_back_ref:1;
300 unsigned int found_forward_ref:1;
301 unsigned int reachable:1;
310 static inline struct root_backref* to_root_backref(struct list_head *entry)
312 return list_entry(entry, struct root_backref, list);
316 struct list_head backrefs;
317 struct cache_extent cache;
318 unsigned int found_root_item:1;
324 struct cache_extent cache;
329 struct cache_extent cache;
330 struct cache_tree root_cache;
331 struct cache_tree inode_cache;
332 struct inode_record *current;
341 struct walk_control {
342 struct cache_tree shared;
343 struct shared_node *nodes[BTRFS_MAX_LEVEL];
349 struct btrfs_key key;
351 struct list_head list;
354 struct extent_entry {
359 struct list_head list;
362 struct root_item_info {
363 /* level of the root */
365 /* number of nodes at this level, must be 1 for a root */
369 struct cache_extent cache_extent;
373 * Error bit for low memory mode check.
375 * Currently no caller cares about it yet. Just internal use for error
378 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
379 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
380 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
381 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
382 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
383 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
384 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
385 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
386 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
387 #define CHUNK_TYPE_MISMATCH (1 << 8)
389 static void *print_status_check(void *p)
391 struct task_ctx *priv = p;
392 const char work_indicator[] = { '.', 'o', 'O', 'o' };
394 static char *task_position_string[] = {
396 "checking free space cache",
400 task_period_start(priv->info, 1000 /* 1s */);
402 if (priv->tp == TASK_NOTHING)
406 printf("%s [%c]\r", task_position_string[priv->tp],
407 work_indicator[count % 4]);
410 task_period_wait(priv->info);
415 static int print_status_return(void *p)
423 static enum btrfs_check_mode parse_check_mode(const char *str)
425 if (strcmp(str, "lowmem") == 0)
426 return CHECK_MODE_LOWMEM;
427 if (strcmp(str, "orig") == 0)
428 return CHECK_MODE_ORIGINAL;
429 if (strcmp(str, "original") == 0)
430 return CHECK_MODE_ORIGINAL;
432 return CHECK_MODE_UNKNOWN;
435 /* Compatible function to allow reuse of old codes */
436 static u64 first_extent_gap(struct rb_root *holes)
438 struct file_extent_hole *hole;
440 if (RB_EMPTY_ROOT(holes))
443 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
447 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
449 struct file_extent_hole *hole1;
450 struct file_extent_hole *hole2;
452 hole1 = rb_entry(node1, struct file_extent_hole, node);
453 hole2 = rb_entry(node2, struct file_extent_hole, node);
455 if (hole1->start > hole2->start)
457 if (hole1->start < hole2->start)
459 /* Now hole1->start == hole2->start */
460 if (hole1->len >= hole2->len)
462 * Hole 1 will be merge center
463 * Same hole will be merged later
466 /* Hole 2 will be merge center */
471 * Add a hole to the record
473 * This will do hole merge for copy_file_extent_holes(),
474 * which will ensure there won't be continuous holes.
476 static int add_file_extent_hole(struct rb_root *holes,
479 struct file_extent_hole *hole;
480 struct file_extent_hole *prev = NULL;
481 struct file_extent_hole *next = NULL;
483 hole = malloc(sizeof(*hole));
488 /* Since compare will not return 0, no -EEXIST will happen */
489 rb_insert(holes, &hole->node, compare_hole);
491 /* simple merge with previous hole */
492 if (rb_prev(&hole->node))
493 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
495 if (prev && prev->start + prev->len >= hole->start) {
496 hole->len = hole->start + hole->len - prev->start;
497 hole->start = prev->start;
498 rb_erase(&prev->node, holes);
503 /* iterate merge with next holes */
505 if (!rb_next(&hole->node))
507 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
509 if (hole->start + hole->len >= next->start) {
510 if (hole->start + hole->len <= next->start + next->len)
511 hole->len = next->start + next->len -
513 rb_erase(&next->node, holes);
522 static int compare_hole_range(struct rb_node *node, void *data)
524 struct file_extent_hole *hole;
527 hole = (struct file_extent_hole *)data;
530 hole = rb_entry(node, struct file_extent_hole, node);
531 if (start < hole->start)
533 if (start >= hole->start && start < hole->start + hole->len)
539 * Delete a hole in the record
541 * This will do the hole split and is much restrict than add.
543 static int del_file_extent_hole(struct rb_root *holes,
546 struct file_extent_hole *hole;
547 struct file_extent_hole tmp;
552 struct rb_node *node;
559 node = rb_search(holes, &tmp, compare_hole_range, NULL);
562 hole = rb_entry(node, struct file_extent_hole, node);
563 if (start + len > hole->start + hole->len)
567 * Now there will be no overlap, delete the hole and re-add the
568 * split(s) if they exists.
570 if (start > hole->start) {
571 prev_start = hole->start;
572 prev_len = start - hole->start;
575 if (hole->start + hole->len > start + len) {
576 next_start = start + len;
577 next_len = hole->start + hole->len - start - len;
580 rb_erase(node, holes);
583 ret = add_file_extent_hole(holes, prev_start, prev_len);
588 ret = add_file_extent_hole(holes, next_start, next_len);
595 static int copy_file_extent_holes(struct rb_root *dst,
598 struct file_extent_hole *hole;
599 struct rb_node *node;
602 node = rb_first(src);
604 hole = rb_entry(node, struct file_extent_hole, node);
605 ret = add_file_extent_hole(dst, hole->start, hole->len);
608 node = rb_next(node);
613 static void free_file_extent_holes(struct rb_root *holes)
615 struct rb_node *node;
616 struct file_extent_hole *hole;
618 node = rb_first(holes);
620 hole = rb_entry(node, struct file_extent_hole, node);
621 rb_erase(node, holes);
623 node = rb_first(holes);
627 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
629 static void record_root_in_trans(struct btrfs_trans_handle *trans,
630 struct btrfs_root *root)
632 if (root->last_trans != trans->transid) {
633 root->track_dirty = 1;
634 root->last_trans = trans->transid;
635 root->commit_root = root->node;
636 extent_buffer_get(root->node);
640 static u8 imode_to_type(u32 imode)
643 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
644 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
645 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
646 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
647 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
648 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
649 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
650 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
653 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
657 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
659 struct device_record *rec1;
660 struct device_record *rec2;
662 rec1 = rb_entry(node1, struct device_record, node);
663 rec2 = rb_entry(node2, struct device_record, node);
664 if (rec1->devid > rec2->devid)
666 else if (rec1->devid < rec2->devid)
672 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
674 struct inode_record *rec;
675 struct inode_backref *backref;
676 struct inode_backref *orig;
677 struct inode_backref *tmp;
678 struct orphan_data_extent *src_orphan;
679 struct orphan_data_extent *dst_orphan;
684 rec = malloc(sizeof(*rec));
686 return ERR_PTR(-ENOMEM);
687 memcpy(rec, orig_rec, sizeof(*rec));
689 INIT_LIST_HEAD(&rec->backrefs);
690 INIT_LIST_HEAD(&rec->orphan_extents);
691 rec->holes = RB_ROOT;
693 list_for_each_entry(orig, &orig_rec->backrefs, list) {
694 size = sizeof(*orig) + orig->namelen + 1;
695 backref = malloc(size);
700 memcpy(backref, orig, size);
701 list_add_tail(&backref->list, &rec->backrefs);
703 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
704 dst_orphan = malloc(sizeof(*dst_orphan));
709 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
710 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
712 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
719 rb = rb_first(&rec->holes);
721 struct file_extent_hole *hole;
723 hole = rb_entry(rb, struct file_extent_hole, node);
729 if (!list_empty(&rec->backrefs))
730 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
731 list_del(&orig->list);
735 if (!list_empty(&rec->orphan_extents))
736 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
737 list_del(&orig->list);
746 static void print_orphan_data_extents(struct list_head *orphan_extents,
749 struct orphan_data_extent *orphan;
751 if (list_empty(orphan_extents))
753 printf("The following data extent is lost in tree %llu:\n",
755 list_for_each_entry(orphan, orphan_extents, list) {
756 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
757 orphan->objectid, orphan->offset, orphan->disk_bytenr,
762 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
764 u64 root_objectid = root->root_key.objectid;
765 int errors = rec->errors;
769 /* reloc root errors, we print its corresponding fs root objectid*/
770 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
771 root_objectid = root->root_key.offset;
772 fprintf(stderr, "reloc");
774 fprintf(stderr, "root %llu inode %llu errors %x",
775 (unsigned long long) root_objectid,
776 (unsigned long long) rec->ino, rec->errors);
778 if (errors & I_ERR_NO_INODE_ITEM)
779 fprintf(stderr, ", no inode item");
780 if (errors & I_ERR_NO_ORPHAN_ITEM)
781 fprintf(stderr, ", no orphan item");
782 if (errors & I_ERR_DUP_INODE_ITEM)
783 fprintf(stderr, ", dup inode item");
784 if (errors & I_ERR_DUP_DIR_INDEX)
785 fprintf(stderr, ", dup dir index");
786 if (errors & I_ERR_ODD_DIR_ITEM)
787 fprintf(stderr, ", odd dir item");
788 if (errors & I_ERR_ODD_FILE_EXTENT)
789 fprintf(stderr, ", odd file extent");
790 if (errors & I_ERR_BAD_FILE_EXTENT)
791 fprintf(stderr, ", bad file extent");
792 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
793 fprintf(stderr, ", file extent overlap");
794 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
795 fprintf(stderr, ", file extent discount");
796 if (errors & I_ERR_DIR_ISIZE_WRONG)
797 fprintf(stderr, ", dir isize wrong");
798 if (errors & I_ERR_FILE_NBYTES_WRONG)
799 fprintf(stderr, ", nbytes wrong");
800 if (errors & I_ERR_ODD_CSUM_ITEM)
801 fprintf(stderr, ", odd csum item");
802 if (errors & I_ERR_SOME_CSUM_MISSING)
803 fprintf(stderr, ", some csum missing");
804 if (errors & I_ERR_LINK_COUNT_WRONG)
805 fprintf(stderr, ", link count wrong");
806 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
807 fprintf(stderr, ", orphan file extent");
808 fprintf(stderr, "\n");
809 /* Print the orphan extents if needed */
810 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
811 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
813 /* Print the holes if needed */
814 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
815 struct file_extent_hole *hole;
816 struct rb_node *node;
819 node = rb_first(&rec->holes);
820 fprintf(stderr, "Found file extent holes:\n");
823 hole = rb_entry(node, struct file_extent_hole, node);
824 fprintf(stderr, "\tstart: %llu, len: %llu\n",
825 hole->start, hole->len);
826 node = rb_next(node);
829 fprintf(stderr, "\tstart: 0, len: %llu\n",
831 root->fs_info->sectorsize));
835 static void print_ref_error(int errors)
837 if (errors & REF_ERR_NO_DIR_ITEM)
838 fprintf(stderr, ", no dir item");
839 if (errors & REF_ERR_NO_DIR_INDEX)
840 fprintf(stderr, ", no dir index");
841 if (errors & REF_ERR_NO_INODE_REF)
842 fprintf(stderr, ", no inode ref");
843 if (errors & REF_ERR_DUP_DIR_ITEM)
844 fprintf(stderr, ", dup dir item");
845 if (errors & REF_ERR_DUP_DIR_INDEX)
846 fprintf(stderr, ", dup dir index");
847 if (errors & REF_ERR_DUP_INODE_REF)
848 fprintf(stderr, ", dup inode ref");
849 if (errors & REF_ERR_INDEX_UNMATCH)
850 fprintf(stderr, ", index mismatch");
851 if (errors & REF_ERR_FILETYPE_UNMATCH)
852 fprintf(stderr, ", filetype mismatch");
853 if (errors & REF_ERR_NAME_TOO_LONG)
854 fprintf(stderr, ", name too long");
855 if (errors & REF_ERR_NO_ROOT_REF)
856 fprintf(stderr, ", no root ref");
857 if (errors & REF_ERR_NO_ROOT_BACKREF)
858 fprintf(stderr, ", no root backref");
859 if (errors & REF_ERR_DUP_ROOT_REF)
860 fprintf(stderr, ", dup root ref");
861 if (errors & REF_ERR_DUP_ROOT_BACKREF)
862 fprintf(stderr, ", dup root backref");
863 fprintf(stderr, "\n");
866 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
869 struct ptr_node *node;
870 struct cache_extent *cache;
871 struct inode_record *rec = NULL;
874 cache = lookup_cache_extent(inode_cache, ino, 1);
876 node = container_of(cache, struct ptr_node, cache);
878 if (mod && rec->refs > 1) {
879 node->data = clone_inode_rec(rec);
880 if (IS_ERR(node->data))
886 rec = calloc(1, sizeof(*rec));
888 return ERR_PTR(-ENOMEM);
890 rec->extent_start = (u64)-1;
892 INIT_LIST_HEAD(&rec->backrefs);
893 INIT_LIST_HEAD(&rec->orphan_extents);
894 rec->holes = RB_ROOT;
896 node = malloc(sizeof(*node));
899 return ERR_PTR(-ENOMEM);
901 node->cache.start = ino;
902 node->cache.size = 1;
905 if (ino == BTRFS_FREE_INO_OBJECTID)
908 ret = insert_cache_extent(inode_cache, &node->cache);
910 return ERR_PTR(-EEXIST);
915 static void free_orphan_data_extents(struct list_head *orphan_extents)
917 struct orphan_data_extent *orphan;
919 while (!list_empty(orphan_extents)) {
920 orphan = list_entry(orphan_extents->next,
921 struct orphan_data_extent, list);
922 list_del(&orphan->list);
927 static void free_inode_rec(struct inode_record *rec)
929 struct inode_backref *backref;
934 while (!list_empty(&rec->backrefs)) {
935 backref = to_inode_backref(rec->backrefs.next);
936 list_del(&backref->list);
939 free_orphan_data_extents(&rec->orphan_extents);
940 free_file_extent_holes(&rec->holes);
944 static int can_free_inode_rec(struct inode_record *rec)
946 if (!rec->errors && rec->checked && rec->found_inode_item &&
947 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
952 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
953 struct inode_record *rec)
955 struct cache_extent *cache;
956 struct inode_backref *tmp, *backref;
957 struct ptr_node *node;
960 if (!rec->found_inode_item)
963 filetype = imode_to_type(rec->imode);
964 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
965 if (backref->found_dir_item && backref->found_dir_index) {
966 if (backref->filetype != filetype)
967 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
968 if (!backref->errors && backref->found_inode_ref &&
969 rec->nlink == rec->found_link) {
970 list_del(&backref->list);
976 if (!rec->checked || rec->merging)
979 if (S_ISDIR(rec->imode)) {
980 if (rec->found_size != rec->isize)
981 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
982 if (rec->found_file_extent)
983 rec->errors |= I_ERR_ODD_FILE_EXTENT;
984 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
985 if (rec->found_dir_item)
986 rec->errors |= I_ERR_ODD_DIR_ITEM;
987 if (rec->found_size != rec->nbytes)
988 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
989 if (rec->nlink > 0 && !no_holes &&
990 (rec->extent_end < rec->isize ||
991 first_extent_gap(&rec->holes) < rec->isize))
992 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
995 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
996 if (rec->found_csum_item && rec->nodatasum)
997 rec->errors |= I_ERR_ODD_CSUM_ITEM;
998 if (rec->some_csum_missing && !rec->nodatasum)
999 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1002 BUG_ON(rec->refs != 1);
1003 if (can_free_inode_rec(rec)) {
1004 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1005 node = container_of(cache, struct ptr_node, cache);
1006 BUG_ON(node->data != rec);
1007 remove_cache_extent(inode_cache, &node->cache);
1009 free_inode_rec(rec);
1013 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1015 struct btrfs_path path;
1016 struct btrfs_key key;
1019 key.objectid = BTRFS_ORPHAN_OBJECTID;
1020 key.type = BTRFS_ORPHAN_ITEM_KEY;
1023 btrfs_init_path(&path);
1024 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1025 btrfs_release_path(&path);
1031 static int process_inode_item(struct extent_buffer *eb,
1032 int slot, struct btrfs_key *key,
1033 struct shared_node *active_node)
1035 struct inode_record *rec;
1036 struct btrfs_inode_item *item;
1038 rec = active_node->current;
1039 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1040 if (rec->found_inode_item) {
1041 rec->errors |= I_ERR_DUP_INODE_ITEM;
1044 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1045 rec->nlink = btrfs_inode_nlink(eb, item);
1046 rec->isize = btrfs_inode_size(eb, item);
1047 rec->nbytes = btrfs_inode_nbytes(eb, item);
1048 rec->imode = btrfs_inode_mode(eb, item);
1049 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1051 rec->found_inode_item = 1;
1052 if (rec->nlink == 0)
1053 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1054 maybe_free_inode_rec(&active_node->inode_cache, rec);
1058 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1060 int namelen, u64 dir)
1062 struct inode_backref *backref;
1064 list_for_each_entry(backref, &rec->backrefs, list) {
1065 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1067 if (backref->dir != dir || backref->namelen != namelen)
1069 if (memcmp(name, backref->name, namelen))
1074 backref = malloc(sizeof(*backref) + namelen + 1);
1077 memset(backref, 0, sizeof(*backref));
1079 backref->namelen = namelen;
1080 memcpy(backref->name, name, namelen);
1081 backref->name[namelen] = '\0';
1082 list_add_tail(&backref->list, &rec->backrefs);
1086 static int add_inode_backref(struct cache_tree *inode_cache,
1087 u64 ino, u64 dir, u64 index,
1088 const char *name, int namelen,
1089 u8 filetype, u8 itemtype, int errors)
1091 struct inode_record *rec;
1092 struct inode_backref *backref;
1094 rec = get_inode_rec(inode_cache, ino, 1);
1095 BUG_ON(IS_ERR(rec));
1096 backref = get_inode_backref(rec, name, namelen, dir);
1099 backref->errors |= errors;
1100 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1101 if (backref->found_dir_index)
1102 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1103 if (backref->found_inode_ref && backref->index != index)
1104 backref->errors |= REF_ERR_INDEX_UNMATCH;
1105 if (backref->found_dir_item && backref->filetype != filetype)
1106 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1108 backref->index = index;
1109 backref->filetype = filetype;
1110 backref->found_dir_index = 1;
1111 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1113 if (backref->found_dir_item)
1114 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1115 if (backref->found_dir_index && backref->filetype != filetype)
1116 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1118 backref->filetype = filetype;
1119 backref->found_dir_item = 1;
1120 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1121 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1122 if (backref->found_inode_ref)
1123 backref->errors |= REF_ERR_DUP_INODE_REF;
1124 if (backref->found_dir_index && backref->index != index)
1125 backref->errors |= REF_ERR_INDEX_UNMATCH;
1127 backref->index = index;
1129 backref->ref_type = itemtype;
1130 backref->found_inode_ref = 1;
1135 maybe_free_inode_rec(inode_cache, rec);
1139 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1140 struct cache_tree *dst_cache)
1142 struct inode_backref *backref;
1147 list_for_each_entry(backref, &src->backrefs, list) {
1148 if (backref->found_dir_index) {
1149 add_inode_backref(dst_cache, dst->ino, backref->dir,
1150 backref->index, backref->name,
1151 backref->namelen, backref->filetype,
1152 BTRFS_DIR_INDEX_KEY, backref->errors);
1154 if (backref->found_dir_item) {
1156 add_inode_backref(dst_cache, dst->ino,
1157 backref->dir, 0, backref->name,
1158 backref->namelen, backref->filetype,
1159 BTRFS_DIR_ITEM_KEY, backref->errors);
1161 if (backref->found_inode_ref) {
1162 add_inode_backref(dst_cache, dst->ino,
1163 backref->dir, backref->index,
1164 backref->name, backref->namelen, 0,
1165 backref->ref_type, backref->errors);
1169 if (src->found_dir_item)
1170 dst->found_dir_item = 1;
1171 if (src->found_file_extent)
1172 dst->found_file_extent = 1;
1173 if (src->found_csum_item)
1174 dst->found_csum_item = 1;
1175 if (src->some_csum_missing)
1176 dst->some_csum_missing = 1;
1177 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1178 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1183 BUG_ON(src->found_link < dir_count);
1184 dst->found_link += src->found_link - dir_count;
1185 dst->found_size += src->found_size;
1186 if (src->extent_start != (u64)-1) {
1187 if (dst->extent_start == (u64)-1) {
1188 dst->extent_start = src->extent_start;
1189 dst->extent_end = src->extent_end;
1191 if (dst->extent_end > src->extent_start)
1192 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1193 else if (dst->extent_end < src->extent_start) {
1194 ret = add_file_extent_hole(&dst->holes,
1196 src->extent_start - dst->extent_end);
1198 if (dst->extent_end < src->extent_end)
1199 dst->extent_end = src->extent_end;
1203 dst->errors |= src->errors;
1204 if (src->found_inode_item) {
1205 if (!dst->found_inode_item) {
1206 dst->nlink = src->nlink;
1207 dst->isize = src->isize;
1208 dst->nbytes = src->nbytes;
1209 dst->imode = src->imode;
1210 dst->nodatasum = src->nodatasum;
1211 dst->found_inode_item = 1;
1213 dst->errors |= I_ERR_DUP_INODE_ITEM;
1221 static int splice_shared_node(struct shared_node *src_node,
1222 struct shared_node *dst_node)
1224 struct cache_extent *cache;
1225 struct ptr_node *node, *ins;
1226 struct cache_tree *src, *dst;
1227 struct inode_record *rec, *conflict;
1228 u64 current_ino = 0;
1232 if (--src_node->refs == 0)
1234 if (src_node->current)
1235 current_ino = src_node->current->ino;
1237 src = &src_node->root_cache;
1238 dst = &dst_node->root_cache;
1240 cache = search_cache_extent(src, 0);
1242 node = container_of(cache, struct ptr_node, cache);
1244 cache = next_cache_extent(cache);
1247 remove_cache_extent(src, &node->cache);
1250 ins = malloc(sizeof(*ins));
1252 ins->cache.start = node->cache.start;
1253 ins->cache.size = node->cache.size;
1257 ret = insert_cache_extent(dst, &ins->cache);
1258 if (ret == -EEXIST) {
1259 conflict = get_inode_rec(dst, rec->ino, 1);
1260 BUG_ON(IS_ERR(conflict));
1261 merge_inode_recs(rec, conflict, dst);
1263 conflict->checked = 1;
1264 if (dst_node->current == conflict)
1265 dst_node->current = NULL;
1267 maybe_free_inode_rec(dst, conflict);
1268 free_inode_rec(rec);
1275 if (src == &src_node->root_cache) {
1276 src = &src_node->inode_cache;
1277 dst = &dst_node->inode_cache;
1281 if (current_ino > 0 && (!dst_node->current ||
1282 current_ino > dst_node->current->ino)) {
1283 if (dst_node->current) {
1284 dst_node->current->checked = 1;
1285 maybe_free_inode_rec(dst, dst_node->current);
1287 dst_node->current = get_inode_rec(dst, current_ino, 1);
1288 BUG_ON(IS_ERR(dst_node->current));
1293 static void free_inode_ptr(struct cache_extent *cache)
1295 struct ptr_node *node;
1296 struct inode_record *rec;
1298 node = container_of(cache, struct ptr_node, cache);
1300 free_inode_rec(rec);
1304 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1306 static struct shared_node *find_shared_node(struct cache_tree *shared,
1309 struct cache_extent *cache;
1310 struct shared_node *node;
1312 cache = lookup_cache_extent(shared, bytenr, 1);
1314 node = container_of(cache, struct shared_node, cache);
1320 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1323 struct shared_node *node;
1325 node = calloc(1, sizeof(*node));
1328 node->cache.start = bytenr;
1329 node->cache.size = 1;
1330 cache_tree_init(&node->root_cache);
1331 cache_tree_init(&node->inode_cache);
1334 ret = insert_cache_extent(shared, &node->cache);
1339 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1340 struct walk_control *wc, int level)
1342 struct shared_node *node;
1343 struct shared_node *dest;
1346 if (level == wc->active_node)
1349 BUG_ON(wc->active_node <= level);
1350 node = find_shared_node(&wc->shared, bytenr);
1352 ret = add_shared_node(&wc->shared, bytenr, refs);
1354 node = find_shared_node(&wc->shared, bytenr);
1355 wc->nodes[level] = node;
1356 wc->active_node = level;
1360 if (wc->root_level == wc->active_node &&
1361 btrfs_root_refs(&root->root_item) == 0) {
1362 if (--node->refs == 0) {
1363 free_inode_recs_tree(&node->root_cache);
1364 free_inode_recs_tree(&node->inode_cache);
1365 remove_cache_extent(&wc->shared, &node->cache);
1371 dest = wc->nodes[wc->active_node];
1372 splice_shared_node(node, dest);
1373 if (node->refs == 0) {
1374 remove_cache_extent(&wc->shared, &node->cache);
1380 static int leave_shared_node(struct btrfs_root *root,
1381 struct walk_control *wc, int level)
1383 struct shared_node *node;
1384 struct shared_node *dest;
1387 if (level == wc->root_level)
1390 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1394 BUG_ON(i >= BTRFS_MAX_LEVEL);
1396 node = wc->nodes[wc->active_node];
1397 wc->nodes[wc->active_node] = NULL;
1398 wc->active_node = i;
1400 dest = wc->nodes[wc->active_node];
1401 if (wc->active_node < wc->root_level ||
1402 btrfs_root_refs(&root->root_item) > 0) {
1403 BUG_ON(node->refs <= 1);
1404 splice_shared_node(node, dest);
1406 BUG_ON(node->refs < 2);
1415 * 1 - if the root with id child_root_id is a child of root parent_root_id
1416 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1417 * has other root(s) as parent(s)
1418 * 2 - if the root child_root_id doesn't have any parent roots
1420 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1423 struct btrfs_path path;
1424 struct btrfs_key key;
1425 struct extent_buffer *leaf;
1429 btrfs_init_path(&path);
1431 key.objectid = parent_root_id;
1432 key.type = BTRFS_ROOT_REF_KEY;
1433 key.offset = child_root_id;
1434 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1438 btrfs_release_path(&path);
1442 key.objectid = child_root_id;
1443 key.type = BTRFS_ROOT_BACKREF_KEY;
1445 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1451 leaf = path.nodes[0];
1452 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1453 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1456 leaf = path.nodes[0];
1459 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1460 if (key.objectid != child_root_id ||
1461 key.type != BTRFS_ROOT_BACKREF_KEY)
1466 if (key.offset == parent_root_id) {
1467 btrfs_release_path(&path);
1474 btrfs_release_path(&path);
1477 return has_parent ? 0 : 2;
1480 static int process_dir_item(struct extent_buffer *eb,
1481 int slot, struct btrfs_key *key,
1482 struct shared_node *active_node)
1492 struct btrfs_dir_item *di;
1493 struct inode_record *rec;
1494 struct cache_tree *root_cache;
1495 struct cache_tree *inode_cache;
1496 struct btrfs_key location;
1497 char namebuf[BTRFS_NAME_LEN];
1499 root_cache = &active_node->root_cache;
1500 inode_cache = &active_node->inode_cache;
1501 rec = active_node->current;
1502 rec->found_dir_item = 1;
1504 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1505 total = btrfs_item_size_nr(eb, slot);
1506 while (cur < total) {
1508 btrfs_dir_item_key_to_cpu(eb, di, &location);
1509 name_len = btrfs_dir_name_len(eb, di);
1510 data_len = btrfs_dir_data_len(eb, di);
1511 filetype = btrfs_dir_type(eb, di);
1513 rec->found_size += name_len;
1514 if (cur + sizeof(*di) + name_len > total ||
1515 name_len > BTRFS_NAME_LEN) {
1516 error = REF_ERR_NAME_TOO_LONG;
1518 if (cur + sizeof(*di) > total)
1520 len = min_t(u32, total - cur - sizeof(*di),
1527 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1529 if (key->type == BTRFS_DIR_ITEM_KEY &&
1530 key->offset != btrfs_name_hash(namebuf, len)) {
1531 rec->errors |= I_ERR_ODD_DIR_ITEM;
1532 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1533 key->objectid, key->offset, namebuf, len, filetype,
1534 key->offset, btrfs_name_hash(namebuf, len));
1537 if (location.type == BTRFS_INODE_ITEM_KEY) {
1538 add_inode_backref(inode_cache, location.objectid,
1539 key->objectid, key->offset, namebuf,
1540 len, filetype, key->type, error);
1541 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1542 add_inode_backref(root_cache, location.objectid,
1543 key->objectid, key->offset,
1544 namebuf, len, filetype,
1547 fprintf(stderr, "invalid location in dir item %u\n",
1549 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1550 key->objectid, key->offset, namebuf,
1551 len, filetype, key->type, error);
1554 len = sizeof(*di) + name_len + data_len;
1555 di = (struct btrfs_dir_item *)((char *)di + len);
1558 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1559 rec->errors |= I_ERR_DUP_DIR_INDEX;
1564 static int process_inode_ref(struct extent_buffer *eb,
1565 int slot, struct btrfs_key *key,
1566 struct shared_node *active_node)
1574 struct cache_tree *inode_cache;
1575 struct btrfs_inode_ref *ref;
1576 char namebuf[BTRFS_NAME_LEN];
1578 inode_cache = &active_node->inode_cache;
1580 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1581 total = btrfs_item_size_nr(eb, slot);
1582 while (cur < total) {
1583 name_len = btrfs_inode_ref_name_len(eb, ref);
1584 index = btrfs_inode_ref_index(eb, ref);
1586 /* inode_ref + namelen should not cross item boundary */
1587 if (cur + sizeof(*ref) + name_len > total ||
1588 name_len > BTRFS_NAME_LEN) {
1589 if (total < cur + sizeof(*ref))
1592 /* Still try to read out the remaining part */
1593 len = min_t(u32, total - cur - sizeof(*ref),
1595 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1602 add_inode_backref(inode_cache, key->objectid, key->offset,
1603 index, namebuf, len, 0, key->type, error);
1605 len = sizeof(*ref) + name_len;
1606 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1612 static int process_inode_extref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1623 struct cache_tree *inode_cache;
1624 struct btrfs_inode_extref *extref;
1625 char namebuf[BTRFS_NAME_LEN];
1627 inode_cache = &active_node->inode_cache;
1629 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1630 total = btrfs_item_size_nr(eb, slot);
1631 while (cur < total) {
1632 name_len = btrfs_inode_extref_name_len(eb, extref);
1633 index = btrfs_inode_extref_index(eb, extref);
1634 parent = btrfs_inode_extref_parent(eb, extref);
1635 if (name_len <= BTRFS_NAME_LEN) {
1639 len = BTRFS_NAME_LEN;
1640 error = REF_ERR_NAME_TOO_LONG;
1642 read_extent_buffer(eb, namebuf,
1643 (unsigned long)(extref + 1), len);
1644 add_inode_backref(inode_cache, key->objectid, parent,
1645 index, namebuf, len, 0, key->type, error);
1647 len = sizeof(*extref) + name_len;
1648 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1655 static int count_csum_range(struct btrfs_root *root, u64 start,
1656 u64 len, u64 *found)
1658 struct btrfs_key key;
1659 struct btrfs_path path;
1660 struct extent_buffer *leaf;
1665 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1667 btrfs_init_path(&path);
1669 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1671 key.type = BTRFS_EXTENT_CSUM_KEY;
1673 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1677 if (ret > 0 && path.slots[0] > 0) {
1678 leaf = path.nodes[0];
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1680 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1681 key.type == BTRFS_EXTENT_CSUM_KEY)
1686 leaf = path.nodes[0];
1687 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1688 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1693 leaf = path.nodes[0];
1696 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1697 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1698 key.type != BTRFS_EXTENT_CSUM_KEY)
1701 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1702 if (key.offset >= start + len)
1705 if (key.offset > start)
1708 size = btrfs_item_size_nr(leaf, path.slots[0]);
1709 csum_end = key.offset + (size / csum_size) *
1710 root->fs_info->sectorsize;
1711 if (csum_end > start) {
1712 size = min(csum_end - start, len);
1721 btrfs_release_path(&path);
1727 static int process_file_extent(struct btrfs_root *root,
1728 struct extent_buffer *eb,
1729 int slot, struct btrfs_key *key,
1730 struct shared_node *active_node)
1732 struct inode_record *rec;
1733 struct btrfs_file_extent_item *fi;
1735 u64 disk_bytenr = 0;
1736 u64 extent_offset = 0;
1737 u64 mask = root->fs_info->sectorsize - 1;
1741 rec = active_node->current;
1742 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743 rec->found_file_extent = 1;
1745 if (rec->extent_start == (u64)-1) {
1746 rec->extent_start = key->offset;
1747 rec->extent_end = key->offset;
1750 if (rec->extent_end > key->offset)
1751 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752 else if (rec->extent_end < key->offset) {
1753 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754 key->offset - rec->extent_end);
1759 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760 extent_type = btrfs_file_extent_type(eb, fi);
1762 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766 rec->found_size += num_bytes;
1767 num_bytes = (num_bytes + mask) & ~mask;
1768 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772 extent_offset = btrfs_file_extent_offset(eb, fi);
1773 if (num_bytes == 0 || (num_bytes & mask))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (num_bytes + extent_offset >
1776 btrfs_file_extent_ram_bytes(eb, fi))
1777 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779 (btrfs_file_extent_compression(eb, fi) ||
1780 btrfs_file_extent_encryption(eb, fi) ||
1781 btrfs_file_extent_other_encoding(eb, fi)))
1782 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783 if (disk_bytenr > 0)
1784 rec->found_size += num_bytes;
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788 rec->extent_end = key->offset + num_bytes;
1791 * The data reloc tree will copy full extents into its inode and then
1792 * copy the corresponding csums. Because the extent it copied could be
1793 * a preallocated extent that hasn't been written to yet there may be no
1794 * csums to copy, ergo we won't have csums for our file extent. This is
1795 * ok so just don't bother checking csums if the inode belongs to the
1798 if (disk_bytenr > 0 &&
1799 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801 if (btrfs_file_extent_compression(eb, fi))
1802 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804 disk_bytenr += extent_offset;
1806 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811 rec->found_csum_item = 1;
1812 if (found < num_bytes)
1813 rec->some_csum_missing = 1;
1814 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823 struct walk_control *wc)
1825 struct btrfs_key key;
1829 struct cache_tree *inode_cache;
1830 struct shared_node *active_node;
1832 if (wc->root_level == wc->active_node &&
1833 btrfs_root_refs(&root->root_item) == 0)
1836 active_node = wc->nodes[wc->active_node];
1837 inode_cache = &active_node->inode_cache;
1838 nritems = btrfs_header_nritems(eb);
1839 for (i = 0; i < nritems; i++) {
1840 btrfs_item_key_to_cpu(eb, &key, i);
1842 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847 if (active_node->current == NULL ||
1848 active_node->current->ino < key.objectid) {
1849 if (active_node->current) {
1850 active_node->current->checked = 1;
1851 maybe_free_inode_rec(inode_cache,
1852 active_node->current);
1854 active_node->current = get_inode_rec(inode_cache,
1856 BUG_ON(IS_ERR(active_node->current));
1859 case BTRFS_DIR_ITEM_KEY:
1860 case BTRFS_DIR_INDEX_KEY:
1861 ret = process_dir_item(eb, i, &key, active_node);
1863 case BTRFS_INODE_REF_KEY:
1864 ret = process_inode_ref(eb, i, &key, active_node);
1866 case BTRFS_INODE_EXTREF_KEY:
1867 ret = process_inode_extref(eb, i, &key, active_node);
1869 case BTRFS_INODE_ITEM_KEY:
1870 ret = process_inode_item(eb, i, &key, active_node);
1872 case BTRFS_EXTENT_DATA_KEY:
1873 ret = process_file_extent(root, eb, i, &key,
1884 u64 bytenr[BTRFS_MAX_LEVEL];
1885 u64 refs[BTRFS_MAX_LEVEL];
1886 int need_check[BTRFS_MAX_LEVEL];
1889 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1890 struct node_refs *nrefs, u64 level);
1891 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1892 unsigned int ext_ref);
1895 * Returns >0 Found error, not fatal, should continue
1896 * Returns <0 Fatal error, must exit the whole check
1897 * Returns 0 No errors found
1899 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1900 struct node_refs *nrefs, int *level, int ext_ref)
1902 struct extent_buffer *cur = path->nodes[0];
1903 struct btrfs_key key;
1907 int root_level = btrfs_header_level(root->node);
1909 int ret = 0; /* Final return value */
1910 int err = 0; /* Positive error bitmap */
1912 cur_bytenr = cur->start;
1914 /* skip to first inode item or the first inode number change */
1915 nritems = btrfs_header_nritems(cur);
1916 for (i = 0; i < nritems; i++) {
1917 btrfs_item_key_to_cpu(cur, &key, i);
1919 first_ino = key.objectid;
1920 if (key.type == BTRFS_INODE_ITEM_KEY ||
1921 (first_ino && first_ino != key.objectid))
1925 path->slots[0] = nritems;
1931 err |= check_inode_item(root, path, ext_ref);
1933 if (err & LAST_ITEM)
1936 /* still have inode items in thie leaf */
1937 if (cur->start == cur_bytenr)
1941 * we have switched to another leaf, above nodes may
1942 * have changed, here walk down the path, if a node
1943 * or leaf is shared, check whether we can skip this
1946 for (i = root_level; i >= 0; i--) {
1947 if (path->nodes[i]->start == nrefs->bytenr[i])
1950 ret = update_nodes_refs(root,
1951 path->nodes[i]->start,
1956 if (!nrefs->need_check[i]) {
1962 for (i = 0; i < *level; i++) {
1963 free_extent_buffer(path->nodes[i]);
1964 path->nodes[i] = NULL;
1973 static void reada_walk_down(struct btrfs_root *root,
1974 struct extent_buffer *node, int slot)
1976 struct btrfs_fs_info *fs_info = root->fs_info;
1983 level = btrfs_header_level(node);
1987 nritems = btrfs_header_nritems(node);
1988 for (i = slot; i < nritems; i++) {
1989 bytenr = btrfs_node_blockptr(node, i);
1990 ptr_gen = btrfs_node_ptr_generation(node, i);
1991 readahead_tree_block(fs_info, bytenr, ptr_gen);
1996 * Check the child node/leaf by the following condition:
1997 * 1. the first item key of the node/leaf should be the same with the one
1999 * 2. block in parent node should match the child node/leaf.
2000 * 3. generation of parent node and child's header should be consistent.
2002 * Or the child node/leaf pointed by the key in parent is not valid.
2004 * We hope to check leaf owner too, but since subvol may share leaves,
2005 * which makes leaf owner check not so strong, key check should be
2006 * sufficient enough for that case.
2008 static int check_child_node(struct extent_buffer *parent, int slot,
2009 struct extent_buffer *child)
2011 struct btrfs_key parent_key;
2012 struct btrfs_key child_key;
2015 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2016 if (btrfs_header_level(child) == 0)
2017 btrfs_item_key_to_cpu(child, &child_key, 0);
2019 btrfs_node_key_to_cpu(child, &child_key, 0);
2021 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2024 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2025 parent_key.objectid, parent_key.type, parent_key.offset,
2026 child_key.objectid, child_key.type, child_key.offset);
2028 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2030 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2031 btrfs_node_blockptr(parent, slot),
2032 btrfs_header_bytenr(child));
2034 if (btrfs_node_ptr_generation(parent, slot) !=
2035 btrfs_header_generation(child)) {
2037 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2038 btrfs_header_generation(child),
2039 btrfs_node_ptr_generation(parent, slot));
2045 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2046 * in every fs or file tree check. Here we find its all root ids, and only check
2047 * it in the fs or file tree which has the smallest root id.
2049 static int need_check(struct btrfs_root *root, struct ulist *roots)
2051 struct rb_node *node;
2052 struct ulist_node *u;
2054 if (roots->nnodes == 1)
2057 node = rb_first(&roots->root);
2058 u = rb_entry(node, struct ulist_node, rb_node);
2060 * current root id is not smallest, we skip it and let it be checked
2061 * in the fs or file tree who hash the smallest root id.
2063 if (root->objectid != u->val)
2070 * for a tree node or leaf, we record its reference count, so later if we still
2071 * process this node or leaf, don't need to compute its reference count again.
2073 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2074 struct node_refs *nrefs, u64 level)
2078 struct ulist *roots;
2080 if (nrefs->bytenr[level] != bytenr) {
2081 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2082 level, 1, &refs, NULL);
2086 nrefs->bytenr[level] = bytenr;
2087 nrefs->refs[level] = refs;
2089 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2094 check = need_check(root, roots);
2096 nrefs->need_check[level] = check;
2098 nrefs->need_check[level] = 1;
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106 struct walk_control *wc, int *level,
2107 struct node_refs *nrefs)
2109 enum btrfs_tree_block_status status;
2112 struct btrfs_fs_info *fs_info = root->fs_info;
2113 struct extent_buffer *next;
2114 struct extent_buffer *cur;
2118 WARN_ON(*level < 0);
2119 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2121 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122 refs = nrefs->refs[*level];
2125 ret = btrfs_lookup_extent_info(NULL, root,
2126 path->nodes[*level]->start,
2127 *level, 1, &refs, NULL);
2132 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133 nrefs->refs[*level] = refs;
2137 ret = enter_shared_node(root, path->nodes[*level]->start,
2145 while (*level >= 0) {
2146 WARN_ON(*level < 0);
2147 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148 cur = path->nodes[*level];
2150 if (btrfs_header_level(cur) != *level)
2153 if (path->slots[*level] >= btrfs_header_nritems(cur))
2156 ret = process_one_leaf(root, cur, wc);
2161 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2164 if (bytenr == nrefs->bytenr[*level - 1]) {
2165 refs = nrefs->refs[*level - 1];
2167 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168 *level - 1, 1, &refs, NULL);
2172 nrefs->bytenr[*level - 1] = bytenr;
2173 nrefs->refs[*level - 1] = refs;
2178 ret = enter_shared_node(root, bytenr, refs,
2181 path->slots[*level]++;
2186 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188 free_extent_buffer(next);
2189 reada_walk_down(root, cur, path->slots[*level]);
2190 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191 if (!extent_buffer_uptodate(next)) {
2192 struct btrfs_key node_key;
2194 btrfs_node_key_to_cpu(path->nodes[*level],
2196 path->slots[*level]);
2197 btrfs_add_corrupt_extent_record(root->fs_info,
2199 path->nodes[*level]->start,
2200 root->fs_info->nodesize,
2207 ret = check_child_node(cur, path->slots[*level], next);
2209 free_extent_buffer(next);
2214 if (btrfs_is_leaf(next))
2215 status = btrfs_check_leaf(root, NULL, next);
2217 status = btrfs_check_node(root, NULL, next);
2218 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219 free_extent_buffer(next);
2224 *level = *level - 1;
2225 free_extent_buffer(path->nodes[*level]);
2226 path->nodes[*level] = next;
2227 path->slots[*level] = 0;
2230 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2234 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2235 unsigned int ext_ref);
2238 * Returns >0 Found error, should continue
2239 * Returns <0 Fatal error, must exit the whole check
2240 * Returns 0 No errors found
2242 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2243 int *level, struct node_refs *nrefs, int ext_ref)
2245 enum btrfs_tree_block_status status;
2248 struct btrfs_fs_info *fs_info = root->fs_info;
2249 struct extent_buffer *next;
2250 struct extent_buffer *cur;
2253 WARN_ON(*level < 0);
2254 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2256 ret = update_nodes_refs(root, path->nodes[*level]->start,
2261 while (*level >= 0) {
2262 WARN_ON(*level < 0);
2263 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264 cur = path->nodes[*level];
2266 if (btrfs_header_level(cur) != *level)
2269 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271 /* Don't forgot to check leaf/node validation */
2273 ret = btrfs_check_leaf(root, NULL, cur);
2274 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278 ret = process_one_leaf_v2(root, path, nrefs,
2282 ret = btrfs_check_node(root, NULL, cur);
2283 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2288 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2291 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2294 if (!nrefs->need_check[*level - 1]) {
2295 path->slots[*level]++;
2299 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2300 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301 free_extent_buffer(next);
2302 reada_walk_down(root, cur, path->slots[*level]);
2303 next = read_tree_block(fs_info, bytenr, ptr_gen);
2304 if (!extent_buffer_uptodate(next)) {
2305 struct btrfs_key node_key;
2307 btrfs_node_key_to_cpu(path->nodes[*level],
2309 path->slots[*level]);
2310 btrfs_add_corrupt_extent_record(fs_info,
2312 path->nodes[*level]->start,
2320 ret = check_child_node(cur, path->slots[*level], next);
2324 if (btrfs_is_leaf(next))
2325 status = btrfs_check_leaf(root, NULL, next);
2327 status = btrfs_check_node(root, NULL, next);
2328 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2329 free_extent_buffer(next);
2334 *level = *level - 1;
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = next;
2337 path->slots[*level] = 0;
2342 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2343 struct walk_control *wc, int *level)
2346 struct extent_buffer *leaf;
2348 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2349 leaf = path->nodes[i];
2350 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2355 free_extent_buffer(path->nodes[*level]);
2356 path->nodes[*level] = NULL;
2357 BUG_ON(*level > wc->active_node);
2358 if (*level == wc->active_node)
2359 leave_shared_node(root, wc, *level);
2366 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2370 struct extent_buffer *leaf;
2372 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2373 leaf = path->nodes[i];
2374 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2379 free_extent_buffer(path->nodes[*level]);
2380 path->nodes[*level] = NULL;
2387 static int check_root_dir(struct inode_record *rec)
2389 struct inode_backref *backref;
2392 if (!rec->found_inode_item || rec->errors)
2394 if (rec->nlink != 1 || rec->found_link != 0)
2396 if (list_empty(&rec->backrefs))
2398 backref = to_inode_backref(rec->backrefs.next);
2399 if (!backref->found_inode_ref)
2401 if (backref->index != 0 || backref->namelen != 2 ||
2402 memcmp(backref->name, "..", 2))
2404 if (backref->found_dir_index || backref->found_dir_item)
2411 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2412 struct btrfs_root *root, struct btrfs_path *path,
2413 struct inode_record *rec)
2415 struct btrfs_inode_item *ei;
2416 struct btrfs_key key;
2419 key.objectid = rec->ino;
2420 key.type = BTRFS_INODE_ITEM_KEY;
2421 key.offset = (u64)-1;
2423 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2427 if (!path->slots[0]) {
2434 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2435 if (key.objectid != rec->ino) {
2440 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2441 struct btrfs_inode_item);
2442 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2443 btrfs_mark_buffer_dirty(path->nodes[0]);
2444 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2445 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2446 root->root_key.objectid);
2448 btrfs_release_path(path);
2452 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2453 struct btrfs_root *root,
2454 struct btrfs_path *path,
2455 struct inode_record *rec)
2459 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2460 btrfs_release_path(path);
2462 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2466 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2467 struct btrfs_root *root,
2468 struct btrfs_path *path,
2469 struct inode_record *rec)
2471 struct btrfs_inode_item *ei;
2472 struct btrfs_key key;
2475 key.objectid = rec->ino;
2476 key.type = BTRFS_INODE_ITEM_KEY;
2479 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2486 /* Since ret == 0, no need to check anything */
2487 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2488 struct btrfs_inode_item);
2489 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2490 btrfs_mark_buffer_dirty(path->nodes[0]);
2491 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2492 printf("reset nbytes for ino %llu root %llu\n",
2493 rec->ino, root->root_key.objectid);
2495 btrfs_release_path(path);
2499 static int add_missing_dir_index(struct btrfs_root *root,
2500 struct cache_tree *inode_cache,
2501 struct inode_record *rec,
2502 struct inode_backref *backref)
2504 struct btrfs_path path;
2505 struct btrfs_trans_handle *trans;
2506 struct btrfs_dir_item *dir_item;
2507 struct extent_buffer *leaf;
2508 struct btrfs_key key;
2509 struct btrfs_disk_key disk_key;
2510 struct inode_record *dir_rec;
2511 unsigned long name_ptr;
2512 u32 data_size = sizeof(*dir_item) + backref->namelen;
2515 trans = btrfs_start_transaction(root, 1);
2517 return PTR_ERR(trans);
2519 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2520 (unsigned long long)rec->ino);
2522 btrfs_init_path(&path);
2523 key.objectid = backref->dir;
2524 key.type = BTRFS_DIR_INDEX_KEY;
2525 key.offset = backref->index;
2526 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2529 leaf = path.nodes[0];
2530 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2532 disk_key.objectid = cpu_to_le64(rec->ino);
2533 disk_key.type = BTRFS_INODE_ITEM_KEY;
2534 disk_key.offset = 0;
2536 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2537 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2538 btrfs_set_dir_data_len(leaf, dir_item, 0);
2539 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2540 name_ptr = (unsigned long)(dir_item + 1);
2541 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2542 btrfs_mark_buffer_dirty(leaf);
2543 btrfs_release_path(&path);
2544 btrfs_commit_transaction(trans, root);
2546 backref->found_dir_index = 1;
2547 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2548 BUG_ON(IS_ERR(dir_rec));
2551 dir_rec->found_size += backref->namelen;
2552 if (dir_rec->found_size == dir_rec->isize &&
2553 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2554 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2555 if (dir_rec->found_size != dir_rec->isize)
2556 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2561 static int delete_dir_index(struct btrfs_root *root,
2562 struct inode_backref *backref)
2564 struct btrfs_trans_handle *trans;
2565 struct btrfs_dir_item *di;
2566 struct btrfs_path path;
2569 trans = btrfs_start_transaction(root, 1);
2571 return PTR_ERR(trans);
2573 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2574 (unsigned long long)backref->dir,
2575 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2576 (unsigned long long)root->objectid);
2578 btrfs_init_path(&path);
2579 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2580 backref->name, backref->namelen,
2581 backref->index, -1);
2584 btrfs_release_path(&path);
2585 btrfs_commit_transaction(trans, root);
2592 ret = btrfs_del_item(trans, root, &path);
2594 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2596 btrfs_release_path(&path);
2597 btrfs_commit_transaction(trans, root);
2601 static int create_inode_item(struct btrfs_root *root,
2602 struct inode_record *rec,
2605 struct btrfs_trans_handle *trans;
2606 struct btrfs_inode_item inode_item;
2607 time_t now = time(NULL);
2610 trans = btrfs_start_transaction(root, 1);
2611 if (IS_ERR(trans)) {
2612 ret = PTR_ERR(trans);
2616 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2617 "be incomplete, please check permissions and content after "
2618 "the fsck completes.\n", (unsigned long long)root->objectid,
2619 (unsigned long long)rec->ino);
2621 memset(&inode_item, 0, sizeof(inode_item));
2622 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2624 btrfs_set_stack_inode_nlink(&inode_item, 1);
2626 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2627 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2628 if (rec->found_dir_item) {
2629 if (rec->found_file_extent)
2630 fprintf(stderr, "root %llu inode %llu has both a dir "
2631 "item and extents, unsure if it is a dir or a "
2632 "regular file so setting it as a directory\n",
2633 (unsigned long long)root->objectid,
2634 (unsigned long long)rec->ino);
2635 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2636 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2637 } else if (!rec->found_dir_item) {
2638 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2639 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2641 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2642 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2643 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2644 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2645 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2646 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2647 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2648 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2650 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2652 btrfs_commit_transaction(trans, root);
2656 static int repair_inode_backrefs(struct btrfs_root *root,
2657 struct inode_record *rec,
2658 struct cache_tree *inode_cache,
2661 struct inode_backref *tmp, *backref;
2662 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2666 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2667 if (!delete && rec->ino == root_dirid) {
2668 if (!rec->found_inode_item) {
2669 ret = create_inode_item(root, rec, 1);
2676 /* Index 0 for root dir's are special, don't mess with it */
2677 if (rec->ino == root_dirid && backref->index == 0)
2681 ((backref->found_dir_index && !backref->found_inode_ref) ||
2682 (backref->found_dir_index && backref->found_inode_ref &&
2683 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2684 ret = delete_dir_index(root, backref);
2688 list_del(&backref->list);
2693 if (!delete && !backref->found_dir_index &&
2694 backref->found_dir_item && backref->found_inode_ref) {
2695 ret = add_missing_dir_index(root, inode_cache, rec,
2700 if (backref->found_dir_item &&
2701 backref->found_dir_index) {
2702 if (!backref->errors &&
2703 backref->found_inode_ref) {
2704 list_del(&backref->list);
2711 if (!delete && (!backref->found_dir_index &&
2712 !backref->found_dir_item &&
2713 backref->found_inode_ref)) {
2714 struct btrfs_trans_handle *trans;
2715 struct btrfs_key location;
2717 ret = check_dir_conflict(root, backref->name,
2723 * let nlink fixing routine to handle it,
2724 * which can do it better.
2729 location.objectid = rec->ino;
2730 location.type = BTRFS_INODE_ITEM_KEY;
2731 location.offset = 0;
2733 trans = btrfs_start_transaction(root, 1);
2734 if (IS_ERR(trans)) {
2735 ret = PTR_ERR(trans);
2738 fprintf(stderr, "adding missing dir index/item pair "
2740 (unsigned long long)rec->ino);
2741 ret = btrfs_insert_dir_item(trans, root, backref->name,
2743 backref->dir, &location,
2744 imode_to_type(rec->imode),
2747 btrfs_commit_transaction(trans, root);
2751 if (!delete && (backref->found_inode_ref &&
2752 backref->found_dir_index &&
2753 backref->found_dir_item &&
2754 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2755 !rec->found_inode_item)) {
2756 ret = create_inode_item(root, rec, 0);
2763 return ret ? ret : repaired;
2767 * To determine the file type for nlink/inode_item repair
2769 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2770 * Return -ENOENT if file type is not found.
2772 static int find_file_type(struct inode_record *rec, u8 *type)
2774 struct inode_backref *backref;
2776 /* For inode item recovered case */
2777 if (rec->found_inode_item) {
2778 *type = imode_to_type(rec->imode);
2782 list_for_each_entry(backref, &rec->backrefs, list) {
2783 if (backref->found_dir_index || backref->found_dir_item) {
2784 *type = backref->filetype;
2792 * To determine the file name for nlink repair
2794 * Return 0 if file name is found, set name and namelen.
2795 * Return -ENOENT if file name is not found.
2797 static int find_file_name(struct inode_record *rec,
2798 char *name, int *namelen)
2800 struct inode_backref *backref;
2802 list_for_each_entry(backref, &rec->backrefs, list) {
2803 if (backref->found_dir_index || backref->found_dir_item ||
2804 backref->found_inode_ref) {
2805 memcpy(name, backref->name, backref->namelen);
2806 *namelen = backref->namelen;
2813 /* Reset the nlink of the inode to the correct one */
2814 static int reset_nlink(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct btrfs_path *path,
2817 struct inode_record *rec)
2819 struct inode_backref *backref;
2820 struct inode_backref *tmp;
2821 struct btrfs_key key;
2822 struct btrfs_inode_item *inode_item;
2825 /* We don't believe this either, reset it and iterate backref */
2826 rec->found_link = 0;
2828 /* Remove all backref including the valid ones */
2829 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2830 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2831 backref->index, backref->name,
2832 backref->namelen, 0);
2836 /* remove invalid backref, so it won't be added back */
2837 if (!(backref->found_dir_index &&
2838 backref->found_dir_item &&
2839 backref->found_inode_ref)) {
2840 list_del(&backref->list);
2847 /* Set nlink to 0 */
2848 key.objectid = rec->ino;
2849 key.type = BTRFS_INODE_ITEM_KEY;
2851 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2858 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2859 struct btrfs_inode_item);
2860 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2861 btrfs_mark_buffer_dirty(path->nodes[0]);
2862 btrfs_release_path(path);
2865 * Add back valid inode_ref/dir_item/dir_index,
2866 * add_link() will handle the nlink inc, so new nlink must be correct
2868 list_for_each_entry(backref, &rec->backrefs, list) {
2869 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2870 backref->name, backref->namelen,
2871 backref->filetype, &backref->index, 1);
2876 btrfs_release_path(path);
2880 static int get_highest_inode(struct btrfs_trans_handle *trans,
2881 struct btrfs_root *root,
2882 struct btrfs_path *path,
2885 struct btrfs_key key, found_key;
2888 btrfs_init_path(path);
2889 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2891 key.type = BTRFS_INODE_ITEM_KEY;
2892 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2894 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2895 path->slots[0] - 1);
2896 *highest_ino = found_key.objectid;
2899 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2901 btrfs_release_path(path);
2905 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 char *dir_name = "lost+found";
2911 char namebuf[BTRFS_NAME_LEN] = {0};
2916 int name_recovered = 0;
2917 int type_recovered = 0;
2921 * Get file name and type first before these invalid inode ref
2922 * are deleted by remove_all_invalid_backref()
2924 name_recovered = !find_file_name(rec, namebuf, &namelen);
2925 type_recovered = !find_file_type(rec, &type);
2927 if (!name_recovered) {
2928 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2929 rec->ino, rec->ino);
2930 namelen = count_digits(rec->ino);
2931 sprintf(namebuf, "%llu", rec->ino);
2934 if (!type_recovered) {
2935 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2937 type = BTRFS_FT_REG_FILE;
2941 ret = reset_nlink(trans, root, path, rec);
2944 "Failed to reset nlink for inode %llu: %s\n",
2945 rec->ino, strerror(-ret));
2949 if (rec->found_link == 0) {
2950 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2954 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2955 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2958 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2959 dir_name, strerror(-ret));
2962 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2963 namebuf, namelen, type, NULL, 1);
2965 * Add ".INO" suffix several times to handle case where
2966 * "FILENAME.INO" is already taken by another file.
2968 while (ret == -EEXIST) {
2970 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2972 if (namelen + count_digits(rec->ino) + 1 >
2977 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2979 namelen += count_digits(rec->ino) + 1;
2980 ret = btrfs_add_link(trans, root, rec->ino,
2981 lost_found_ino, namebuf,
2982 namelen, type, NULL, 1);
2986 "Failed to link the inode %llu to %s dir: %s\n",
2987 rec->ino, dir_name, strerror(-ret));
2991 * Just increase the found_link, don't actually add the
2992 * backref. This will make things easier and this inode
2993 * record will be freed after the repair is done.
2994 * So fsck will not report problem about this inode.
2997 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2998 namelen, namebuf, dir_name);
3000 printf("Fixed the nlink of inode %llu\n", rec->ino);
3003 * Clear the flag anyway, or we will loop forever for the same inode
3004 * as it will not be removed from the bad inode list and the dead loop
3007 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3008 btrfs_release_path(path);
3013 * Check if there is any normal(reg or prealloc) file extent for given
3015 * This is used to determine the file type when neither its dir_index/item or
3016 * inode_item exists.
3018 * This will *NOT* report error, if any error happens, just consider it does
3019 * not have any normal file extent.
3021 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3023 struct btrfs_path path;
3024 struct btrfs_key key;
3025 struct btrfs_key found_key;
3026 struct btrfs_file_extent_item *fi;
3030 btrfs_init_path(&path);
3032 key.type = BTRFS_EXTENT_DATA_KEY;
3035 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3040 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3041 ret = btrfs_next_leaf(root, &path);
3048 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3050 if (found_key.objectid != ino ||
3051 found_key.type != BTRFS_EXTENT_DATA_KEY)
3053 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3054 struct btrfs_file_extent_item);
3055 type = btrfs_file_extent_type(path.nodes[0], fi);
3056 if (type != BTRFS_FILE_EXTENT_INLINE) {
3062 btrfs_release_path(&path);
3066 static u32 btrfs_type_to_imode(u8 type)
3068 static u32 imode_by_btrfs_type[] = {
3069 [BTRFS_FT_REG_FILE] = S_IFREG,
3070 [BTRFS_FT_DIR] = S_IFDIR,
3071 [BTRFS_FT_CHRDEV] = S_IFCHR,
3072 [BTRFS_FT_BLKDEV] = S_IFBLK,
3073 [BTRFS_FT_FIFO] = S_IFIFO,
3074 [BTRFS_FT_SOCK] = S_IFSOCK,
3075 [BTRFS_FT_SYMLINK] = S_IFLNK,
3078 return imode_by_btrfs_type[(type)];
3081 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3082 struct btrfs_root *root,
3083 struct btrfs_path *path,
3084 struct inode_record *rec)
3088 int type_recovered = 0;
3091 printf("Trying to rebuild inode:%llu\n", rec->ino);
3093 type_recovered = !find_file_type(rec, &filetype);
3096 * Try to determine inode type if type not found.
3098 * For found regular file extent, it must be FILE.
3099 * For found dir_item/index, it must be DIR.
3101 * For undetermined one, use FILE as fallback.
3104 * 1. If found backref(inode_index/item is already handled) to it,
3106 * Need new inode-inode ref structure to allow search for that.
3108 if (!type_recovered) {
3109 if (rec->found_file_extent &&
3110 find_normal_file_extent(root, rec->ino)) {
3112 filetype = BTRFS_FT_REG_FILE;
3113 } else if (rec->found_dir_item) {
3115 filetype = BTRFS_FT_DIR;
3116 } else if (!list_empty(&rec->orphan_extents)) {
3118 filetype = BTRFS_FT_REG_FILE;
3120 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3123 filetype = BTRFS_FT_REG_FILE;
3127 ret = btrfs_new_inode(trans, root, rec->ino,
3128 mode | btrfs_type_to_imode(filetype));
3133 * Here inode rebuild is done, we only rebuild the inode item,
3134 * don't repair the nlink(like move to lost+found).
3135 * That is the job of nlink repair.
3137 * We just fill the record and return
3139 rec->found_dir_item = 1;
3140 rec->imode = mode | btrfs_type_to_imode(filetype);
3142 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3143 /* Ensure the inode_nlinks repair function will be called */
3144 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3149 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3150 struct btrfs_root *root,
3151 struct btrfs_path *path,
3152 struct inode_record *rec)
3154 struct orphan_data_extent *orphan;
3155 struct orphan_data_extent *tmp;
3158 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3160 * Check for conflicting file extents
3162 * Here we don't know whether the extents is compressed or not,
3163 * so we can only assume it not compressed nor data offset,
3164 * and use its disk_len as extent length.
3166 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3167 orphan->offset, orphan->disk_len, 0);
3168 btrfs_release_path(path);
3173 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3174 orphan->disk_bytenr, orphan->disk_len);
3175 ret = btrfs_free_extent(trans,
3176 root->fs_info->extent_root,
3177 orphan->disk_bytenr, orphan->disk_len,
3178 0, root->objectid, orphan->objectid,
3183 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3184 orphan->offset, orphan->disk_bytenr,
3185 orphan->disk_len, orphan->disk_len);
3189 /* Update file size info */
3190 rec->found_size += orphan->disk_len;
3191 if (rec->found_size == rec->nbytes)
3192 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3194 /* Update the file extent hole info too */
3195 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3199 if (RB_EMPTY_ROOT(&rec->holes))
3200 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3202 list_del(&orphan->list);
3205 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3210 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3211 struct btrfs_root *root,
3212 struct btrfs_path *path,
3213 struct inode_record *rec)
3215 struct rb_node *node;
3216 struct file_extent_hole *hole;
3220 node = rb_first(&rec->holes);
3224 hole = rb_entry(node, struct file_extent_hole, node);
3225 ret = btrfs_punch_hole(trans, root, rec->ino,
3226 hole->start, hole->len);
3229 ret = del_file_extent_hole(&rec->holes, hole->start,
3233 if (RB_EMPTY_ROOT(&rec->holes))
3234 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3235 node = rb_first(&rec->holes);
3237 /* special case for a file losing all its file extent */
3239 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3240 round_up(rec->isize,
3241 root->fs_info->sectorsize));
3245 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3246 rec->ino, root->objectid);
3251 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3253 struct btrfs_trans_handle *trans;
3254 struct btrfs_path path;
3257 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3258 I_ERR_NO_ORPHAN_ITEM |
3259 I_ERR_LINK_COUNT_WRONG |
3260 I_ERR_NO_INODE_ITEM |
3261 I_ERR_FILE_EXTENT_ORPHAN |
3262 I_ERR_FILE_EXTENT_DISCOUNT|
3263 I_ERR_FILE_NBYTES_WRONG)))
3267 * For nlink repair, it may create a dir and add link, so
3268 * 2 for parent(256)'s dir_index and dir_item
3269 * 2 for lost+found dir's inode_item and inode_ref
3270 * 1 for the new inode_ref of the file
3271 * 2 for lost+found dir's dir_index and dir_item for the file
3273 trans = btrfs_start_transaction(root, 7);
3275 return PTR_ERR(trans);
3277 btrfs_init_path(&path);
3278 if (rec->errors & I_ERR_NO_INODE_ITEM)
3279 ret = repair_inode_no_item(trans, root, &path, rec);
3280 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3281 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3283 ret = repair_inode_discount_extent(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3285 ret = repair_inode_isize(trans, root, &path, rec);
3286 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3287 ret = repair_inode_orphan_item(trans, root, &path, rec);
3288 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3289 ret = repair_inode_nlinks(trans, root, &path, rec);
3290 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3291 ret = repair_inode_nbytes(trans, root, &path, rec);
3292 btrfs_commit_transaction(trans, root);
3293 btrfs_release_path(&path);
3297 static int check_inode_recs(struct btrfs_root *root,
3298 struct cache_tree *inode_cache)
3300 struct cache_extent *cache;
3301 struct ptr_node *node;
3302 struct inode_record *rec;
3303 struct inode_backref *backref;
3308 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3310 if (btrfs_root_refs(&root->root_item) == 0) {
3311 if (!cache_tree_empty(inode_cache))
3312 fprintf(stderr, "warning line %d\n", __LINE__);
3317 * We need to repair backrefs first because we could change some of the
3318 * errors in the inode recs.
3320 * We also need to go through and delete invalid backrefs first and then
3321 * add the correct ones second. We do this because we may get EEXIST
3322 * when adding back the correct index because we hadn't yet deleted the
3325 * For example, if we were missing a dir index then the directories
3326 * isize would be wrong, so if we fixed the isize to what we thought it
3327 * would be and then fixed the backref we'd still have a invalid fs, so
3328 * we need to add back the dir index and then check to see if the isize
3333 if (stage == 3 && !err)
3336 cache = search_cache_extent(inode_cache, 0);
3337 while (repair && cache) {
3338 node = container_of(cache, struct ptr_node, cache);
3340 cache = next_cache_extent(cache);
3342 /* Need to free everything up and rescan */
3344 remove_cache_extent(inode_cache, &node->cache);
3346 free_inode_rec(rec);
3350 if (list_empty(&rec->backrefs))
3353 ret = repair_inode_backrefs(root, rec, inode_cache,
3367 rec = get_inode_rec(inode_cache, root_dirid, 0);
3368 BUG_ON(IS_ERR(rec));
3370 ret = check_root_dir(rec);
3372 fprintf(stderr, "root %llu root dir %llu error\n",
3373 (unsigned long long)root->root_key.objectid,
3374 (unsigned long long)root_dirid);
3375 print_inode_error(root, rec);
3380 struct btrfs_trans_handle *trans;
3382 trans = btrfs_start_transaction(root, 1);
3383 if (IS_ERR(trans)) {
3384 err = PTR_ERR(trans);
3389 "root %llu missing its root dir, recreating\n",
3390 (unsigned long long)root->objectid);
3392 ret = btrfs_make_root_dir(trans, root, root_dirid);
3395 btrfs_commit_transaction(trans, root);
3399 fprintf(stderr, "root %llu root dir %llu not found\n",
3400 (unsigned long long)root->root_key.objectid,
3401 (unsigned long long)root_dirid);
3405 cache = search_cache_extent(inode_cache, 0);
3408 node = container_of(cache, struct ptr_node, cache);
3410 remove_cache_extent(inode_cache, &node->cache);
3412 if (rec->ino == root_dirid ||
3413 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3414 free_inode_rec(rec);
3418 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3419 ret = check_orphan_item(root, rec->ino);
3421 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3422 if (can_free_inode_rec(rec)) {
3423 free_inode_rec(rec);
3428 if (!rec->found_inode_item)
3429 rec->errors |= I_ERR_NO_INODE_ITEM;
3430 if (rec->found_link != rec->nlink)
3431 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3433 ret = try_repair_inode(root, rec);
3434 if (ret == 0 && can_free_inode_rec(rec)) {
3435 free_inode_rec(rec);
3441 if (!(repair && ret == 0))
3443 print_inode_error(root, rec);
3444 list_for_each_entry(backref, &rec->backrefs, list) {
3445 if (!backref->found_dir_item)
3446 backref->errors |= REF_ERR_NO_DIR_ITEM;
3447 if (!backref->found_dir_index)
3448 backref->errors |= REF_ERR_NO_DIR_INDEX;
3449 if (!backref->found_inode_ref)
3450 backref->errors |= REF_ERR_NO_INODE_REF;
3451 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3452 " namelen %u name %s filetype %d errors %x",
3453 (unsigned long long)backref->dir,
3454 (unsigned long long)backref->index,
3455 backref->namelen, backref->name,
3456 backref->filetype, backref->errors);
3457 print_ref_error(backref->errors);
3459 free_inode_rec(rec);
3461 return (error > 0) ? -1 : 0;
3464 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3467 struct cache_extent *cache;
3468 struct root_record *rec = NULL;
3471 cache = lookup_cache_extent(root_cache, objectid, 1);
3473 rec = container_of(cache, struct root_record, cache);
3475 rec = calloc(1, sizeof(*rec));
3477 return ERR_PTR(-ENOMEM);
3478 rec->objectid = objectid;
3479 INIT_LIST_HEAD(&rec->backrefs);
3480 rec->cache.start = objectid;
3481 rec->cache.size = 1;
3483 ret = insert_cache_extent(root_cache, &rec->cache);
3485 return ERR_PTR(-EEXIST);
3490 static struct root_backref *get_root_backref(struct root_record *rec,
3491 u64 ref_root, u64 dir, u64 index,
3492 const char *name, int namelen)
3494 struct root_backref *backref;
3496 list_for_each_entry(backref, &rec->backrefs, list) {
3497 if (backref->ref_root != ref_root || backref->dir != dir ||
3498 backref->namelen != namelen)
3500 if (memcmp(name, backref->name, namelen))
3505 backref = calloc(1, sizeof(*backref) + namelen + 1);
3508 backref->ref_root = ref_root;
3510 backref->index = index;
3511 backref->namelen = namelen;
3512 memcpy(backref->name, name, namelen);
3513 backref->name[namelen] = '\0';
3514 list_add_tail(&backref->list, &rec->backrefs);
3518 static void free_root_record(struct cache_extent *cache)
3520 struct root_record *rec;
3521 struct root_backref *backref;
3523 rec = container_of(cache, struct root_record, cache);
3524 while (!list_empty(&rec->backrefs)) {
3525 backref = to_root_backref(rec->backrefs.next);
3526 list_del(&backref->list);
3533 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3535 static int add_root_backref(struct cache_tree *root_cache,
3536 u64 root_id, u64 ref_root, u64 dir, u64 index,
3537 const char *name, int namelen,
3538 int item_type, int errors)
3540 struct root_record *rec;
3541 struct root_backref *backref;
3543 rec = get_root_rec(root_cache, root_id);
3544 BUG_ON(IS_ERR(rec));
3545 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3548 backref->errors |= errors;
3550 if (item_type != BTRFS_DIR_ITEM_KEY) {
3551 if (backref->found_dir_index || backref->found_back_ref ||
3552 backref->found_forward_ref) {
3553 if (backref->index != index)
3554 backref->errors |= REF_ERR_INDEX_UNMATCH;
3556 backref->index = index;
3560 if (item_type == BTRFS_DIR_ITEM_KEY) {
3561 if (backref->found_forward_ref)
3563 backref->found_dir_item = 1;
3564 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3565 backref->found_dir_index = 1;
3566 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3567 if (backref->found_forward_ref)
3568 backref->errors |= REF_ERR_DUP_ROOT_REF;
3569 else if (backref->found_dir_item)
3571 backref->found_forward_ref = 1;
3572 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3573 if (backref->found_back_ref)
3574 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3575 backref->found_back_ref = 1;
3580 if (backref->found_forward_ref && backref->found_dir_item)
3581 backref->reachable = 1;
3585 static int merge_root_recs(struct btrfs_root *root,
3586 struct cache_tree *src_cache,
3587 struct cache_tree *dst_cache)
3589 struct cache_extent *cache;
3590 struct ptr_node *node;
3591 struct inode_record *rec;
3592 struct inode_backref *backref;
3595 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3596 free_inode_recs_tree(src_cache);
3601 cache = search_cache_extent(src_cache, 0);
3604 node = container_of(cache, struct ptr_node, cache);
3606 remove_cache_extent(src_cache, &node->cache);
3609 ret = is_child_root(root, root->objectid, rec->ino);
3615 list_for_each_entry(backref, &rec->backrefs, list) {
3616 BUG_ON(backref->found_inode_ref);
3617 if (backref->found_dir_item)
3618 add_root_backref(dst_cache, rec->ino,
3619 root->root_key.objectid, backref->dir,
3620 backref->index, backref->name,
3621 backref->namelen, BTRFS_DIR_ITEM_KEY,
3623 if (backref->found_dir_index)
3624 add_root_backref(dst_cache, rec->ino,
3625 root->root_key.objectid, backref->dir,
3626 backref->index, backref->name,
3627 backref->namelen, BTRFS_DIR_INDEX_KEY,
3631 free_inode_rec(rec);
3638 static int check_root_refs(struct btrfs_root *root,
3639 struct cache_tree *root_cache)
3641 struct root_record *rec;
3642 struct root_record *ref_root;
3643 struct root_backref *backref;
3644 struct cache_extent *cache;
3650 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3651 BUG_ON(IS_ERR(rec));
3654 /* fixme: this can not detect circular references */
3657 cache = search_cache_extent(root_cache, 0);
3661 rec = container_of(cache, struct root_record, cache);
3662 cache = next_cache_extent(cache);
3664 if (rec->found_ref == 0)
3667 list_for_each_entry(backref, &rec->backrefs, list) {
3668 if (!backref->reachable)
3671 ref_root = get_root_rec(root_cache,
3673 BUG_ON(IS_ERR(ref_root));
3674 if (ref_root->found_ref > 0)
3677 backref->reachable = 0;
3679 if (rec->found_ref == 0)
3685 cache = search_cache_extent(root_cache, 0);
3689 rec = container_of(cache, struct root_record, cache);
3690 cache = next_cache_extent(cache);
3692 if (rec->found_ref == 0 &&
3693 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3694 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3695 ret = check_orphan_item(root->fs_info->tree_root,
3701 * If we don't have a root item then we likely just have
3702 * a dir item in a snapshot for this root but no actual
3703 * ref key or anything so it's meaningless.
3705 if (!rec->found_root_item)
3708 fprintf(stderr, "fs tree %llu not referenced\n",
3709 (unsigned long long)rec->objectid);
3713 if (rec->found_ref > 0 && !rec->found_root_item)
3715 list_for_each_entry(backref, &rec->backrefs, list) {
3716 if (!backref->found_dir_item)
3717 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718 if (!backref->found_dir_index)
3719 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720 if (!backref->found_back_ref)
3721 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3722 if (!backref->found_forward_ref)
3723 backref->errors |= REF_ERR_NO_ROOT_REF;
3724 if (backref->reachable && backref->errors)
3731 fprintf(stderr, "fs tree %llu refs %u %s\n",
3732 (unsigned long long)rec->objectid, rec->found_ref,
3733 rec->found_root_item ? "" : "not found");
3735 list_for_each_entry(backref, &rec->backrefs, list) {
3736 if (!backref->reachable)
3738 if (!backref->errors && rec->found_root_item)
3740 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3741 " index %llu namelen %u name %s errors %x\n",
3742 (unsigned long long)backref->ref_root,
3743 (unsigned long long)backref->dir,
3744 (unsigned long long)backref->index,
3745 backref->namelen, backref->name,
3747 print_ref_error(backref->errors);
3750 return errors > 0 ? 1 : 0;
3753 static int process_root_ref(struct extent_buffer *eb, int slot,
3754 struct btrfs_key *key,
3755 struct cache_tree *root_cache)
3761 struct btrfs_root_ref *ref;
3762 char namebuf[BTRFS_NAME_LEN];
3765 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3767 dirid = btrfs_root_ref_dirid(eb, ref);
3768 index = btrfs_root_ref_sequence(eb, ref);
3769 name_len = btrfs_root_ref_name_len(eb, ref);
3771 if (name_len <= BTRFS_NAME_LEN) {
3775 len = BTRFS_NAME_LEN;
3776 error = REF_ERR_NAME_TOO_LONG;
3778 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3780 if (key->type == BTRFS_ROOT_REF_KEY) {
3781 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3782 index, namebuf, len, key->type, error);
3784 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3785 index, namebuf, len, key->type, error);
3790 static void free_corrupt_block(struct cache_extent *cache)
3792 struct btrfs_corrupt_block *corrupt;
3794 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3798 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3801 * Repair the btree of the given root.
3803 * The fix is to remove the node key in corrupt_blocks cache_tree.
3804 * and rebalance the tree.
3805 * After the fix, the btree should be writeable.
3807 static int repair_btree(struct btrfs_root *root,
3808 struct cache_tree *corrupt_blocks)
3810 struct btrfs_trans_handle *trans;
3811 struct btrfs_path path;
3812 struct btrfs_corrupt_block *corrupt;
3813 struct cache_extent *cache;
3814 struct btrfs_key key;
3819 if (cache_tree_empty(corrupt_blocks))
3822 trans = btrfs_start_transaction(root, 1);
3823 if (IS_ERR(trans)) {
3824 ret = PTR_ERR(trans);
3825 fprintf(stderr, "Error starting transaction: %s\n",
3829 btrfs_init_path(&path);
3830 cache = first_cache_extent(corrupt_blocks);
3832 corrupt = container_of(cache, struct btrfs_corrupt_block,
3834 level = corrupt->level;
3835 path.lowest_level = level;
3836 key.objectid = corrupt->key.objectid;
3837 key.type = corrupt->key.type;
3838 key.offset = corrupt->key.offset;
3841 * Here we don't want to do any tree balance, since it may
3842 * cause a balance with corrupted brother leaf/node,
3843 * so ins_len set to 0 here.
3844 * Balance will be done after all corrupt node/leaf is deleted.
3846 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3849 offset = btrfs_node_blockptr(path.nodes[level],
3852 /* Remove the ptr */
3853 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3857 * Remove the corresponding extent
3858 * return value is not concerned.
3860 btrfs_release_path(&path);
3861 ret = btrfs_free_extent(trans, root, offset,
3862 root->fs_info->nodesize, 0,
3863 root->root_key.objectid, level - 1, 0);
3864 cache = next_cache_extent(cache);
3867 /* Balance the btree using btrfs_search_slot() */
3868 cache = first_cache_extent(corrupt_blocks);
3870 corrupt = container_of(cache, struct btrfs_corrupt_block,
3872 memcpy(&key, &corrupt->key, sizeof(key));
3873 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3876 /* return will always >0 since it won't find the item */
3878 btrfs_release_path(&path);
3879 cache = next_cache_extent(cache);
3882 btrfs_commit_transaction(trans, root);
3883 btrfs_release_path(&path);
3887 static int check_fs_root(struct btrfs_root *root,
3888 struct cache_tree *root_cache,
3889 struct walk_control *wc)
3895 struct btrfs_path path;
3896 struct shared_node root_node;
3897 struct root_record *rec;
3898 struct btrfs_root_item *root_item = &root->root_item;
3899 struct cache_tree corrupt_blocks;
3900 struct orphan_data_extent *orphan;
3901 struct orphan_data_extent *tmp;
3902 enum btrfs_tree_block_status status;
3903 struct node_refs nrefs;
3906 * Reuse the corrupt_block cache tree to record corrupted tree block
3908 * Unlike the usage in extent tree check, here we do it in a per
3909 * fs/subvol tree base.
3911 cache_tree_init(&corrupt_blocks);
3912 root->fs_info->corrupt_blocks = &corrupt_blocks;
3914 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3915 rec = get_root_rec(root_cache, root->root_key.objectid);
3916 BUG_ON(IS_ERR(rec));
3917 if (btrfs_root_refs(root_item) > 0)
3918 rec->found_root_item = 1;
3921 btrfs_init_path(&path);
3922 memset(&root_node, 0, sizeof(root_node));
3923 cache_tree_init(&root_node.root_cache);
3924 cache_tree_init(&root_node.inode_cache);
3925 memset(&nrefs, 0, sizeof(nrefs));
3927 /* Move the orphan extent record to corresponding inode_record */
3928 list_for_each_entry_safe(orphan, tmp,
3929 &root->orphan_data_extents, list) {
3930 struct inode_record *inode;
3932 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3934 BUG_ON(IS_ERR(inode));
3935 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3936 list_move(&orphan->list, &inode->orphan_extents);
3939 level = btrfs_header_level(root->node);
3940 memset(wc->nodes, 0, sizeof(wc->nodes));
3941 wc->nodes[level] = &root_node;
3942 wc->active_node = level;
3943 wc->root_level = level;
3945 /* We may not have checked the root block, lets do that now */
3946 if (btrfs_is_leaf(root->node))
3947 status = btrfs_check_leaf(root, NULL, root->node);
3949 status = btrfs_check_node(root, NULL, root->node);
3950 if (status != BTRFS_TREE_BLOCK_CLEAN)
3953 if (btrfs_root_refs(root_item) > 0 ||
3954 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3955 path.nodes[level] = root->node;
3956 extent_buffer_get(root->node);
3957 path.slots[level] = 0;
3959 struct btrfs_key key;
3960 struct btrfs_disk_key found_key;
3962 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3963 level = root_item->drop_level;
3964 path.lowest_level = level;
3965 if (level > btrfs_header_level(root->node) ||
3966 level >= BTRFS_MAX_LEVEL) {
3967 error("ignoring invalid drop level: %u", level);
3970 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3973 btrfs_node_key(path.nodes[level], &found_key,
3975 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3976 sizeof(found_key)));
3980 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3986 wret = walk_up_tree(root, &path, wc, &level);
3993 btrfs_release_path(&path);
3995 if (!cache_tree_empty(&corrupt_blocks)) {
3996 struct cache_extent *cache;
3997 struct btrfs_corrupt_block *corrupt;
3999 printf("The following tree block(s) is corrupted in tree %llu:\n",
4000 root->root_key.objectid);
4001 cache = first_cache_extent(&corrupt_blocks);
4003 corrupt = container_of(cache,
4004 struct btrfs_corrupt_block,
4006 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4007 cache->start, corrupt->level,
4008 corrupt->key.objectid, corrupt->key.type,
4009 corrupt->key.offset);
4010 cache = next_cache_extent(cache);
4013 printf("Try to repair the btree for root %llu\n",
4014 root->root_key.objectid);
4015 ret = repair_btree(root, &corrupt_blocks);
4017 fprintf(stderr, "Failed to repair btree: %s\n",
4020 printf("Btree for root %llu is fixed\n",
4021 root->root_key.objectid);
4025 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4029 if (root_node.current) {
4030 root_node.current->checked = 1;
4031 maybe_free_inode_rec(&root_node.inode_cache,
4035 err = check_inode_recs(root, &root_node.inode_cache);
4039 free_corrupt_blocks_tree(&corrupt_blocks);
4040 root->fs_info->corrupt_blocks = NULL;
4041 free_orphan_data_extents(&root->orphan_data_extents);
4045 static int fs_root_objectid(u64 objectid)
4047 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4048 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4050 return is_fstree(objectid);
4053 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4054 struct cache_tree *root_cache)
4056 struct btrfs_path path;
4057 struct btrfs_key key;
4058 struct walk_control wc;
4059 struct extent_buffer *leaf, *tree_node;
4060 struct btrfs_root *root = fs_info->fs_root;
4061 struct btrfs_root *tmp_root;
4062 struct btrfs_root *tree_root = root->fs_info->tree_root;
4066 if (ctx.progress_enabled) {
4067 ctx.tp = TASK_FS_ROOTS;
4068 task_start(ctx.info);
4072 * Just in case we made any changes to the extent tree that weren't
4073 * reflected into the free space cache yet.
4076 reset_cached_block_groups(root->fs_info);
4077 memset(&wc, 0, sizeof(wc));
4078 cache_tree_init(&wc.shared);
4079 btrfs_init_path(&path);
4084 key.type = BTRFS_ROOT_ITEM_KEY;
4085 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4090 tree_node = tree_root->node;
4092 if (tree_node != tree_root->node) {
4093 free_root_recs_tree(root_cache);
4094 btrfs_release_path(&path);
4097 leaf = path.nodes[0];
4098 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4099 ret = btrfs_next_leaf(tree_root, &path);
4105 leaf = path.nodes[0];
4107 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4108 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4109 fs_root_objectid(key.objectid)) {
4110 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4111 tmp_root = btrfs_read_fs_root_no_cache(
4112 root->fs_info, &key);
4114 key.offset = (u64)-1;
4115 tmp_root = btrfs_read_fs_root(
4116 root->fs_info, &key);
4118 if (IS_ERR(tmp_root)) {
4122 ret = check_fs_root(tmp_root, root_cache, &wc);
4123 if (ret == -EAGAIN) {
4124 free_root_recs_tree(root_cache);
4125 btrfs_release_path(&path);
4130 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4131 btrfs_free_fs_root(tmp_root);
4132 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4133 key.type == BTRFS_ROOT_BACKREF_KEY) {
4134 process_root_ref(leaf, path.slots[0], &key,
4141 btrfs_release_path(&path);
4143 free_extent_cache_tree(&wc.shared);
4144 if (!cache_tree_empty(&wc.shared))
4145 fprintf(stderr, "warning line %d\n", __LINE__);
4147 task_stop(ctx.info);
4153 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4154 * INODE_REF/INODE_EXTREF match.
4156 * @root: the root of the fs/file tree
4157 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4158 * @key: the key of the DIR_ITEM/DIR_INDEX
4159 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4160 * distinguish root_dir between normal dir/file
4161 * @name: the name in the INODE_REF/INODE_EXTREF
4162 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4163 * @mode: the st_mode of INODE_ITEM
4165 * Return 0 if no error occurred.
4166 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4167 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4169 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4170 * not match for normal dir/file.
4172 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4173 struct btrfs_key *key, u64 index, char *name,
4174 u32 namelen, u32 mode)
4176 struct btrfs_path path;
4177 struct extent_buffer *node;
4178 struct btrfs_dir_item *di;
4179 struct btrfs_key location;
4180 char namebuf[BTRFS_NAME_LEN] = {0};
4190 btrfs_init_path(&path);
4191 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4193 ret = DIR_ITEM_MISSING;
4197 /* Process root dir and goto out*/
4200 ret = ROOT_DIR_ERROR;
4202 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4204 ref_key->type == BTRFS_INODE_REF_KEY ?
4206 ref_key->objectid, ref_key->offset,
4207 key->type == BTRFS_DIR_ITEM_KEY ?
4208 "DIR_ITEM" : "DIR_INDEX");
4216 /* Process normal file/dir */
4218 ret = DIR_ITEM_MISSING;
4220 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4222 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4223 ref_key->objectid, ref_key->offset,
4224 key->type == BTRFS_DIR_ITEM_KEY ?
4225 "DIR_ITEM" : "DIR_INDEX",
4226 key->objectid, key->offset, namelen, name,
4227 imode_to_type(mode));
4231 /* Check whether inode_id/filetype/name match */
4232 node = path.nodes[0];
4233 slot = path.slots[0];
4234 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4235 total = btrfs_item_size_nr(node, slot);
4236 while (cur < total) {
4237 ret = DIR_ITEM_MISMATCH;
4238 name_len = btrfs_dir_name_len(node, di);
4239 data_len = btrfs_dir_data_len(node, di);
4241 btrfs_dir_item_key_to_cpu(node, di, &location);
4242 if (location.objectid != ref_key->objectid ||
4243 location.type != BTRFS_INODE_ITEM_KEY ||
4244 location.offset != 0)
4247 filetype = btrfs_dir_type(node, di);
4248 if (imode_to_type(mode) != filetype)
4251 if (cur + sizeof(*di) + name_len > total ||
4252 name_len > BTRFS_NAME_LEN) {
4253 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4255 key->type == BTRFS_DIR_ITEM_KEY ?
4256 "DIR_ITEM" : "DIR_INDEX",
4257 key->objectid, key->offset, name_len);
4259 if (cur + sizeof(*di) > total)
4261 len = min_t(u32, total - cur - sizeof(*di),
4267 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4268 if (len != namelen || strncmp(namebuf, name, len))
4274 len = sizeof(*di) + name_len + data_len;
4275 di = (struct btrfs_dir_item *)((char *)di + len);
4278 if (ret == DIR_ITEM_MISMATCH)
4280 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4282 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4283 ref_key->objectid, ref_key->offset,
4284 key->type == BTRFS_DIR_ITEM_KEY ?
4285 "DIR_ITEM" : "DIR_INDEX",
4286 key->objectid, key->offset, namelen, name,
4287 imode_to_type(mode));
4289 btrfs_release_path(&path);
4294 * Traverse the given INODE_REF and call find_dir_item() to find related
4295 * DIR_ITEM/DIR_INDEX.
4297 * @root: the root of the fs/file tree
4298 * @ref_key: the key of the INODE_REF
4299 * @refs: the count of INODE_REF
4300 * @mode: the st_mode of INODE_ITEM
4302 * Return 0 if no error occurred.
4304 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4305 struct extent_buffer *node, int slot, u64 *refs,
4308 struct btrfs_key key;
4309 struct btrfs_inode_ref *ref;
4310 char namebuf[BTRFS_NAME_LEN] = {0};
4318 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4319 total = btrfs_item_size_nr(node, slot);
4322 /* Update inode ref count */
4325 index = btrfs_inode_ref_index(node, ref);
4326 name_len = btrfs_inode_ref_name_len(node, ref);
4327 if (cur + sizeof(*ref) + name_len > total ||
4328 name_len > BTRFS_NAME_LEN) {
4329 warning("root %llu INODE_REF[%llu %llu] name too long",
4330 root->objectid, ref_key->objectid, ref_key->offset);
4332 if (total < cur + sizeof(*ref))
4334 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4339 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4341 /* Check root dir ref name */
4342 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4343 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4344 root->objectid, ref_key->objectid, ref_key->offset,
4346 err |= ROOT_DIR_ERROR;
4349 /* Find related DIR_INDEX */
4350 key.objectid = ref_key->offset;
4351 key.type = BTRFS_DIR_INDEX_KEY;
4353 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4356 /* Find related dir_item */
4357 key.objectid = ref_key->offset;
4358 key.type = BTRFS_DIR_ITEM_KEY;
4359 key.offset = btrfs_name_hash(namebuf, len);
4360 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4363 len = sizeof(*ref) + name_len;
4364 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4374 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4375 * DIR_ITEM/DIR_INDEX.
4377 * @root: the root of the fs/file tree
4378 * @ref_key: the key of the INODE_EXTREF
4379 * @refs: the count of INODE_EXTREF
4380 * @mode: the st_mode of INODE_ITEM
4382 * Return 0 if no error occurred.
4384 static int check_inode_extref(struct btrfs_root *root,
4385 struct btrfs_key *ref_key,
4386 struct extent_buffer *node, int slot, u64 *refs,
4389 struct btrfs_key key;
4390 struct btrfs_inode_extref *extref;
4391 char namebuf[BTRFS_NAME_LEN] = {0};
4401 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4402 total = btrfs_item_size_nr(node, slot);
4405 /* update inode ref count */
4407 name_len = btrfs_inode_extref_name_len(node, extref);
4408 index = btrfs_inode_extref_index(node, extref);
4409 parent = btrfs_inode_extref_parent(node, extref);
4410 if (name_len <= BTRFS_NAME_LEN) {
4413 len = BTRFS_NAME_LEN;
4414 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4415 root->objectid, ref_key->objectid, ref_key->offset);
4417 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4419 /* Check root dir ref name */
4420 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4421 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4422 root->objectid, ref_key->objectid, ref_key->offset,
4424 err |= ROOT_DIR_ERROR;
4427 /* find related dir_index */
4428 key.objectid = parent;
4429 key.type = BTRFS_DIR_INDEX_KEY;
4431 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4434 /* find related dir_item */
4435 key.objectid = parent;
4436 key.type = BTRFS_DIR_ITEM_KEY;
4437 key.offset = btrfs_name_hash(namebuf, len);
4438 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4441 len = sizeof(*extref) + name_len;
4442 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4452 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4453 * DIR_ITEM/DIR_INDEX match.
4455 * @root: the root of the fs/file tree
4456 * @key: the key of the INODE_REF/INODE_EXTREF
4457 * @name: the name in the INODE_REF/INODE_EXTREF
4458 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4459 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4461 * @ext_ref: the EXTENDED_IREF feature
4463 * Return 0 if no error occurred.
4464 * Return >0 for error bitmap
4466 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4467 char *name, int namelen, u64 index,
4468 unsigned int ext_ref)
4470 struct btrfs_path path;
4471 struct btrfs_inode_ref *ref;
4472 struct btrfs_inode_extref *extref;
4473 struct extent_buffer *node;
4474 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4485 btrfs_init_path(&path);
4486 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4488 ret = INODE_REF_MISSING;
4492 node = path.nodes[0];
4493 slot = path.slots[0];
4495 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4496 total = btrfs_item_size_nr(node, slot);
4498 /* Iterate all entry of INODE_REF */
4499 while (cur < total) {
4500 ret = INODE_REF_MISSING;
4502 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4503 ref_index = btrfs_inode_ref_index(node, ref);
4504 if (index != (u64)-1 && index != ref_index)
4507 if (cur + sizeof(*ref) + ref_namelen > total ||
4508 ref_namelen > BTRFS_NAME_LEN) {
4509 warning("root %llu INODE %s[%llu %llu] name too long",
4511 key->type == BTRFS_INODE_REF_KEY ?
4513 key->objectid, key->offset);
4515 if (cur + sizeof(*ref) > total)
4517 len = min_t(u32, total - cur - sizeof(*ref),
4523 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4526 if (len != namelen || strncmp(ref_namebuf, name, len))
4532 len = sizeof(*ref) + ref_namelen;
4533 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4538 /* Skip if not support EXTENDED_IREF feature */
4542 btrfs_release_path(&path);
4543 btrfs_init_path(&path);
4545 dir_id = key->offset;
4546 key->type = BTRFS_INODE_EXTREF_KEY;
4547 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4549 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4551 ret = INODE_REF_MISSING;
4555 node = path.nodes[0];
4556 slot = path.slots[0];
4558 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4560 total = btrfs_item_size_nr(node, slot);
4562 /* Iterate all entry of INODE_EXTREF */
4563 while (cur < total) {
4564 ret = INODE_REF_MISSING;
4566 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4567 ref_index = btrfs_inode_extref_index(node, extref);
4568 parent = btrfs_inode_extref_parent(node, extref);
4569 if (index != (u64)-1 && index != ref_index)
4572 if (parent != dir_id)
4575 if (ref_namelen <= BTRFS_NAME_LEN) {
4578 len = BTRFS_NAME_LEN;
4579 warning("root %llu INODE %s[%llu %llu] name too long",
4581 key->type == BTRFS_INODE_REF_KEY ?
4583 key->objectid, key->offset);
4585 read_extent_buffer(node, ref_namebuf,
4586 (unsigned long)(extref + 1), len);
4588 if (len != namelen || strncmp(ref_namebuf, name, len))
4595 len = sizeof(*extref) + ref_namelen;
4596 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4601 btrfs_release_path(&path);
4606 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4607 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4609 * @root: the root of the fs/file tree
4610 * @key: the key of the INODE_REF/INODE_EXTREF
4611 * @size: the st_size of the INODE_ITEM
4612 * @ext_ref: the EXTENDED_IREF feature
4614 * Return 0 if no error occurred.
4616 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4617 struct extent_buffer *node, int slot, u64 *size,
4618 unsigned int ext_ref)
4620 struct btrfs_dir_item *di;
4621 struct btrfs_inode_item *ii;
4622 struct btrfs_path path;
4623 struct btrfs_key location;
4624 char namebuf[BTRFS_NAME_LEN] = {0};
4637 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4638 * ignore index check.
4640 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4642 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4643 total = btrfs_item_size_nr(node, slot);
4645 while (cur < total) {
4646 data_len = btrfs_dir_data_len(node, di);
4648 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4649 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650 "DIR_ITEM" : "DIR_INDEX",
4651 key->objectid, key->offset, data_len);
4653 name_len = btrfs_dir_name_len(node, di);
4654 if (cur + sizeof(*di) + name_len > total ||
4655 name_len > BTRFS_NAME_LEN) {
4656 warning("root %llu %s[%llu %llu] name too long",
4658 key->type == BTRFS_DIR_ITEM_KEY ?
4659 "DIR_ITEM" : "DIR_INDEX",
4660 key->objectid, key->offset);
4662 if (cur + sizeof(*di) > total)
4664 len = min_t(u32, total - cur - sizeof(*di),
4669 (*size) += name_len;
4671 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4672 filetype = btrfs_dir_type(node, di);
4674 if (key->type == BTRFS_DIR_ITEM_KEY &&
4675 key->offset != btrfs_name_hash(namebuf, len)) {
4677 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4678 root->objectid, key->objectid, key->offset,
4679 namebuf, len, filetype, key->offset,
4680 btrfs_name_hash(namebuf, len));
4683 btrfs_init_path(&path);
4684 btrfs_dir_item_key_to_cpu(node, di, &location);
4686 /* Ignore related ROOT_ITEM check */
4687 if (location.type == BTRFS_ROOT_ITEM_KEY)
4690 /* Check relative INODE_ITEM(existence/filetype) */
4691 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4693 err |= INODE_ITEM_MISSING;
4694 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4695 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4696 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4697 key->offset, location.objectid, name_len,
4702 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4703 struct btrfs_inode_item);
4704 mode = btrfs_inode_mode(path.nodes[0], ii);
4706 if (imode_to_type(mode) != filetype) {
4707 err |= INODE_ITEM_MISMATCH;
4708 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4709 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4710 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4711 key->offset, name_len, namebuf, filetype);
4714 /* Check relative INODE_REF/INODE_EXTREF */
4715 location.type = BTRFS_INODE_REF_KEY;
4716 location.offset = key->objectid;
4717 ret = find_inode_ref(root, &location, namebuf, len,
4720 if (ret & INODE_REF_MISSING)
4721 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4722 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4723 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4724 key->offset, name_len, namebuf, filetype);
4727 btrfs_release_path(&path);
4728 len = sizeof(*di) + name_len + data_len;
4729 di = (struct btrfs_dir_item *)((char *)di + len);
4732 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4733 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4734 root->objectid, key->objectid, key->offset);
4743 * Check file extent datasum/hole, update the size of the file extents,
4744 * check and update the last offset of the file extent.
4746 * @root: the root of fs/file tree.
4747 * @fkey: the key of the file extent.
4748 * @nodatasum: INODE_NODATASUM feature.
4749 * @size: the sum of all EXTENT_DATA items size for this inode.
4750 * @end: the offset of the last extent.
4752 * Return 0 if no error occurred.
4754 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4755 struct extent_buffer *node, int slot,
4756 unsigned int nodatasum, u64 *size, u64 *end)
4758 struct btrfs_file_extent_item *fi;
4761 u64 extent_num_bytes;
4763 u64 csum_found; /* In byte size, sectorsize aligned */
4764 u64 search_start; /* Logical range start we search for csum */
4765 u64 search_len; /* Logical range len we search for csum */
4766 unsigned int extent_type;
4767 unsigned int is_hole;
4772 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4774 /* Check inline extent */
4775 extent_type = btrfs_file_extent_type(node, fi);
4776 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4777 struct btrfs_item *e = btrfs_item_nr(slot);
4778 u32 item_inline_len;
4780 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4781 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4782 compressed = btrfs_file_extent_compression(node, fi);
4783 if (extent_num_bytes == 0) {
4785 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4786 root->objectid, fkey->objectid, fkey->offset);
4787 err |= FILE_EXTENT_ERROR;
4789 if (!compressed && extent_num_bytes != item_inline_len) {
4791 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4792 root->objectid, fkey->objectid, fkey->offset,
4793 extent_num_bytes, item_inline_len);
4794 err |= FILE_EXTENT_ERROR;
4796 *end += extent_num_bytes;
4797 *size += extent_num_bytes;
4801 /* Check extent type */
4802 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4803 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4804 err |= FILE_EXTENT_ERROR;
4805 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4806 root->objectid, fkey->objectid, fkey->offset);
4810 /* Check REG_EXTENT/PREALLOC_EXTENT */
4811 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4812 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4813 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4814 extent_offset = btrfs_file_extent_offset(node, fi);
4815 compressed = btrfs_file_extent_compression(node, fi);
4816 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4819 * Check EXTENT_DATA csum
4821 * For plain (uncompressed) extent, we should only check the range
4822 * we're referring to, as it's possible that part of prealloc extent
4823 * has been written, and has csum:
4825 * |<--- Original large preallocated extent A ---->|
4826 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4829 * For compressed extent, we should check the whole range.
4832 search_start = disk_bytenr + extent_offset;
4833 search_len = extent_num_bytes;
4835 search_start = disk_bytenr;
4836 search_len = disk_num_bytes;
4838 ret = count_csum_range(root, search_start, search_len, &csum_found);
4839 if (csum_found > 0 && nodatasum) {
4840 err |= ODD_CSUM_ITEM;
4841 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4842 root->objectid, fkey->objectid, fkey->offset);
4843 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4844 !is_hole && (ret < 0 || csum_found < search_len)) {
4845 err |= CSUM_ITEM_MISSING;
4846 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4847 root->objectid, fkey->objectid, fkey->offset,
4848 csum_found, search_len);
4849 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4850 err |= ODD_CSUM_ITEM;
4851 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4852 root->objectid, fkey->objectid, fkey->offset, csum_found);
4855 /* Check EXTENT_DATA hole */
4856 if (!no_holes && *end != fkey->offset) {
4857 err |= FILE_EXTENT_ERROR;
4858 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4859 root->objectid, fkey->objectid, fkey->offset);
4862 *end += extent_num_bytes;
4864 *size += extent_num_bytes;
4870 * Check INODE_ITEM and related ITEMs (the same inode number)
4871 * 1. check link count
4872 * 2. check inode ref/extref
4873 * 3. check dir item/index
4875 * @ext_ref: the EXTENDED_IREF feature
4877 * Return 0 if no error occurred.
4878 * Return >0 for error or hit the traversal is done(by error bitmap)
4880 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4881 unsigned int ext_ref)
4883 struct extent_buffer *node;
4884 struct btrfs_inode_item *ii;
4885 struct btrfs_key key;
4894 u64 extent_size = 0;
4896 unsigned int nodatasum;
4901 node = path->nodes[0];
4902 slot = path->slots[0];
4904 btrfs_item_key_to_cpu(node, &key, slot);
4905 inode_id = key.objectid;
4907 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4908 ret = btrfs_next_item(root, path);
4914 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4915 isize = btrfs_inode_size(node, ii);
4916 nbytes = btrfs_inode_nbytes(node, ii);
4917 mode = btrfs_inode_mode(node, ii);
4918 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4919 nlink = btrfs_inode_nlink(node, ii);
4920 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4923 ret = btrfs_next_item(root, path);
4925 /* out will fill 'err' rusing current statistics */
4927 } else if (ret > 0) {
4932 node = path->nodes[0];
4933 slot = path->slots[0];
4934 btrfs_item_key_to_cpu(node, &key, slot);
4935 if (key.objectid != inode_id)
4939 case BTRFS_INODE_REF_KEY:
4940 ret = check_inode_ref(root, &key, node, slot, &refs,
4944 case BTRFS_INODE_EXTREF_KEY:
4945 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4946 warning("root %llu EXTREF[%llu %llu] isn't supported",
4947 root->objectid, key.objectid,
4949 ret = check_inode_extref(root, &key, node, slot, &refs,
4953 case BTRFS_DIR_ITEM_KEY:
4954 case BTRFS_DIR_INDEX_KEY:
4956 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4957 root->objectid, inode_id,
4958 imode_to_type(mode), key.objectid,
4961 ret = check_dir_item(root, &key, node, slot, &size,
4965 case BTRFS_EXTENT_DATA_KEY:
4967 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4968 root->objectid, inode_id, key.objectid,
4971 ret = check_file_extent(root, &key, node, slot,
4972 nodatasum, &extent_size,
4976 case BTRFS_XATTR_ITEM_KEY:
4979 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4980 key.objectid, key.type, key.offset);
4985 /* verify INODE_ITEM nlink/isize/nbytes */
4988 err |= LINK_COUNT_ERROR;
4989 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4990 root->objectid, inode_id, nlink);
4994 * Just a warning, as dir inode nbytes is just an
4995 * instructive value.
4997 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4998 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4999 root->objectid, inode_id,
5000 root->fs_info->nodesize);
5003 if (isize != size) {
5005 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5006 root->objectid, inode_id, isize, size);
5009 if (nlink != refs) {
5010 err |= LINK_COUNT_ERROR;
5011 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5012 root->objectid, inode_id, nlink, refs);
5013 } else if (!nlink) {
5017 if (!nbytes && !no_holes && extent_end < isize) {
5018 err |= NBYTES_ERROR;
5019 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5020 root->objectid, inode_id, isize);
5023 if (nbytes != extent_size) {
5024 err |= NBYTES_ERROR;
5025 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5026 root->objectid, inode_id, nbytes, extent_size);
5033 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5035 struct btrfs_path path;
5036 struct btrfs_key key;
5040 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5041 key.type = BTRFS_INODE_ITEM_KEY;
5044 /* For root being dropped, we don't need to check first inode */
5045 if (btrfs_root_refs(&root->root_item) == 0 &&
5046 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5050 btrfs_init_path(&path);
5052 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5057 err |= INODE_ITEM_MISSING;
5058 error("first inode item of root %llu is missing",
5062 err |= check_inode_item(root, &path, ext_ref);
5067 btrfs_release_path(&path);
5072 * Iterate all item on the tree and call check_inode_item() to check.
5074 * @root: the root of the tree to be checked.
5075 * @ext_ref: the EXTENDED_IREF feature
5077 * Return 0 if no error found.
5078 * Return <0 for error.
5080 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5082 struct btrfs_path path;
5083 struct node_refs nrefs;
5084 struct btrfs_root_item *root_item = &root->root_item;
5090 * We need to manually check the first inode item(256)
5091 * As the following traversal function will only start from
5092 * the first inode item in the leaf, if inode item(256) is missing
5093 * we will just skip it forever.
5095 ret = check_fs_first_inode(root, ext_ref);
5099 memset(&nrefs, 0, sizeof(nrefs));
5100 level = btrfs_header_level(root->node);
5101 btrfs_init_path(&path);
5103 if (btrfs_root_refs(root_item) > 0 ||
5104 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5105 path.nodes[level] = root->node;
5106 path.slots[level] = 0;
5107 extent_buffer_get(root->node);
5109 struct btrfs_key key;
5111 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5112 level = root_item->drop_level;
5113 path.lowest_level = level;
5114 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5121 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5124 /* if ret is negative, walk shall stop */
5130 ret = walk_up_tree_v2(root, &path, &level);
5132 /* Normal exit, reset ret to err */
5139 btrfs_release_path(&path);
5144 * Find the relative ref for root_ref and root_backref.
5146 * @root: the root of the root tree.
5147 * @ref_key: the key of the root ref.
5149 * Return 0 if no error occurred.
5151 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5152 struct extent_buffer *node, int slot)
5154 struct btrfs_path path;
5155 struct btrfs_key key;
5156 struct btrfs_root_ref *ref;
5157 struct btrfs_root_ref *backref;
5158 char ref_name[BTRFS_NAME_LEN] = {0};
5159 char backref_name[BTRFS_NAME_LEN] = {0};
5165 u32 backref_namelen;
5170 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5171 ref_dirid = btrfs_root_ref_dirid(node, ref);
5172 ref_seq = btrfs_root_ref_sequence(node, ref);
5173 ref_namelen = btrfs_root_ref_name_len(node, ref);
5175 if (ref_namelen <= BTRFS_NAME_LEN) {
5178 len = BTRFS_NAME_LEN;
5179 warning("%s[%llu %llu] ref_name too long",
5180 ref_key->type == BTRFS_ROOT_REF_KEY ?
5181 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5184 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5186 /* Find relative root_ref */
5187 key.objectid = ref_key->offset;
5188 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5189 key.offset = ref_key->objectid;
5191 btrfs_init_path(&path);
5192 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5194 err |= ROOT_REF_MISSING;
5195 error("%s[%llu %llu] couldn't find relative ref",
5196 ref_key->type == BTRFS_ROOT_REF_KEY ?
5197 "ROOT_REF" : "ROOT_BACKREF",
5198 ref_key->objectid, ref_key->offset);
5202 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5203 struct btrfs_root_ref);
5204 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5205 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5206 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5208 if (backref_namelen <= BTRFS_NAME_LEN) {
5209 len = backref_namelen;
5211 len = BTRFS_NAME_LEN;
5212 warning("%s[%llu %llu] ref_name too long",
5213 key.type == BTRFS_ROOT_REF_KEY ?
5214 "ROOT_REF" : "ROOT_BACKREF",
5215 key.objectid, key.offset);
5217 read_extent_buffer(path.nodes[0], backref_name,
5218 (unsigned long)(backref + 1), len);
5220 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5221 ref_namelen != backref_namelen ||
5222 strncmp(ref_name, backref_name, len)) {
5223 err |= ROOT_REF_MISMATCH;
5224 error("%s[%llu %llu] mismatch relative ref",
5225 ref_key->type == BTRFS_ROOT_REF_KEY ?
5226 "ROOT_REF" : "ROOT_BACKREF",
5227 ref_key->objectid, ref_key->offset);
5230 btrfs_release_path(&path);
5235 * Check all fs/file tree in low_memory mode.
5237 * 1. for fs tree root item, call check_fs_root_v2()
5238 * 2. for fs tree root ref/backref, call check_root_ref()
5240 * Return 0 if no error occurred.
5242 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5244 struct btrfs_root *tree_root = fs_info->tree_root;
5245 struct btrfs_root *cur_root = NULL;
5246 struct btrfs_path path;
5247 struct btrfs_key key;
5248 struct extent_buffer *node;
5249 unsigned int ext_ref;
5254 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5256 btrfs_init_path(&path);
5257 key.objectid = BTRFS_FS_TREE_OBJECTID;
5259 key.type = BTRFS_ROOT_ITEM_KEY;
5261 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5265 } else if (ret > 0) {
5271 node = path.nodes[0];
5272 slot = path.slots[0];
5273 btrfs_item_key_to_cpu(node, &key, slot);
5274 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5276 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5277 fs_root_objectid(key.objectid)) {
5278 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5279 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5282 key.offset = (u64)-1;
5283 cur_root = btrfs_read_fs_root(fs_info, &key);
5286 if (IS_ERR(cur_root)) {
5287 error("Fail to read fs/subvol tree: %lld",
5293 ret = check_fs_root_v2(cur_root, ext_ref);
5296 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5297 btrfs_free_fs_root(cur_root);
5298 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5299 key.type == BTRFS_ROOT_BACKREF_KEY) {
5300 ret = check_root_ref(tree_root, &key, node, slot);
5304 ret = btrfs_next_item(tree_root, &path);
5314 btrfs_release_path(&path);
5318 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5320 struct list_head *cur = rec->backrefs.next;
5321 struct extent_backref *back;
5322 struct tree_backref *tback;
5323 struct data_backref *dback;
5327 while(cur != &rec->backrefs) {
5328 back = to_extent_backref(cur);
5330 if (!back->found_extent_tree) {
5334 if (back->is_data) {
5335 dback = to_data_backref(back);
5336 fprintf(stderr, "Backref %llu %s %llu"
5337 " owner %llu offset %llu num_refs %lu"
5338 " not found in extent tree\n",
5339 (unsigned long long)rec->start,
5340 back->full_backref ?
5342 back->full_backref ?
5343 (unsigned long long)dback->parent:
5344 (unsigned long long)dback->root,
5345 (unsigned long long)dback->owner,
5346 (unsigned long long)dback->offset,
5347 (unsigned long)dback->num_refs);
5349 tback = to_tree_backref(back);
5350 fprintf(stderr, "Backref %llu parent %llu"
5351 " root %llu not found in extent tree\n",
5352 (unsigned long long)rec->start,
5353 (unsigned long long)tback->parent,
5354 (unsigned long long)tback->root);
5357 if (!back->is_data && !back->found_ref) {
5361 tback = to_tree_backref(back);
5362 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5363 (unsigned long long)rec->start,
5364 back->full_backref ? "parent" : "root",
5365 back->full_backref ?
5366 (unsigned long long)tback->parent :
5367 (unsigned long long)tback->root, back);
5369 if (back->is_data) {
5370 dback = to_data_backref(back);
5371 if (dback->found_ref != dback->num_refs) {
5375 fprintf(stderr, "Incorrect local backref count"
5376 " on %llu %s %llu owner %llu"
5377 " offset %llu found %u wanted %u back %p\n",
5378 (unsigned long long)rec->start,
5379 back->full_backref ?
5381 back->full_backref ?
5382 (unsigned long long)dback->parent:
5383 (unsigned long long)dback->root,
5384 (unsigned long long)dback->owner,
5385 (unsigned long long)dback->offset,
5386 dback->found_ref, dback->num_refs, back);
5388 if (dback->disk_bytenr != rec->start) {
5392 fprintf(stderr, "Backref disk bytenr does not"
5393 " match extent record, bytenr=%llu, "
5394 "ref bytenr=%llu\n",
5395 (unsigned long long)rec->start,
5396 (unsigned long long)dback->disk_bytenr);
5399 if (dback->bytes != rec->nr) {
5403 fprintf(stderr, "Backref bytes do not match "
5404 "extent backref, bytenr=%llu, ref "
5405 "bytes=%llu, backref bytes=%llu\n",
5406 (unsigned long long)rec->start,
5407 (unsigned long long)rec->nr,
5408 (unsigned long long)dback->bytes);
5411 if (!back->is_data) {
5414 dback = to_data_backref(back);
5415 found += dback->found_ref;
5418 if (found != rec->refs) {
5422 fprintf(stderr, "Incorrect global backref count "
5423 "on %llu found %llu wanted %llu\n",
5424 (unsigned long long)rec->start,
5425 (unsigned long long)found,
5426 (unsigned long long)rec->refs);
5432 static int free_all_extent_backrefs(struct extent_record *rec)
5434 struct extent_backref *back;
5435 struct list_head *cur;
5436 while (!list_empty(&rec->backrefs)) {
5437 cur = rec->backrefs.next;
5438 back = to_extent_backref(cur);
5445 static void free_extent_record_cache(struct cache_tree *extent_cache)
5447 struct cache_extent *cache;
5448 struct extent_record *rec;
5451 cache = first_cache_extent(extent_cache);
5454 rec = container_of(cache, struct extent_record, cache);
5455 remove_cache_extent(extent_cache, cache);
5456 free_all_extent_backrefs(rec);
5461 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5462 struct extent_record *rec)
5464 if (rec->content_checked && rec->owner_ref_checked &&
5465 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5466 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5467 !rec->bad_full_backref && !rec->crossing_stripes &&
5468 !rec->wrong_chunk_type) {
5469 remove_cache_extent(extent_cache, &rec->cache);
5470 free_all_extent_backrefs(rec);
5471 list_del_init(&rec->list);
5477 static int check_owner_ref(struct btrfs_root *root,
5478 struct extent_record *rec,
5479 struct extent_buffer *buf)
5481 struct extent_backref *node;
5482 struct tree_backref *back;
5483 struct btrfs_root *ref_root;
5484 struct btrfs_key key;
5485 struct btrfs_path path;
5486 struct extent_buffer *parent;
5491 list_for_each_entry(node, &rec->backrefs, list) {
5494 if (!node->found_ref)
5496 if (node->full_backref)
5498 back = to_tree_backref(node);
5499 if (btrfs_header_owner(buf) == back->root)
5502 BUG_ON(rec->is_root);
5504 /* try to find the block by search corresponding fs tree */
5505 key.objectid = btrfs_header_owner(buf);
5506 key.type = BTRFS_ROOT_ITEM_KEY;
5507 key.offset = (u64)-1;
5509 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5510 if (IS_ERR(ref_root))
5513 level = btrfs_header_level(buf);
5515 btrfs_item_key_to_cpu(buf, &key, 0);
5517 btrfs_node_key_to_cpu(buf, &key, 0);
5519 btrfs_init_path(&path);
5520 path.lowest_level = level + 1;
5521 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5525 parent = path.nodes[level + 1];
5526 if (parent && buf->start == btrfs_node_blockptr(parent,
5527 path.slots[level + 1]))
5530 btrfs_release_path(&path);
5531 return found ? 0 : 1;
5534 static int is_extent_tree_record(struct extent_record *rec)
5536 struct list_head *cur = rec->backrefs.next;
5537 struct extent_backref *node;
5538 struct tree_backref *back;
5541 while(cur != &rec->backrefs) {
5542 node = to_extent_backref(cur);
5546 back = to_tree_backref(node);
5547 if (node->full_backref)
5549 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5556 static int record_bad_block_io(struct btrfs_fs_info *info,
5557 struct cache_tree *extent_cache,
5560 struct extent_record *rec;
5561 struct cache_extent *cache;
5562 struct btrfs_key key;
5564 cache = lookup_cache_extent(extent_cache, start, len);
5568 rec = container_of(cache, struct extent_record, cache);
5569 if (!is_extent_tree_record(rec))
5572 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5573 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5576 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5577 struct extent_buffer *buf, int slot)
5579 if (btrfs_header_level(buf)) {
5580 struct btrfs_key_ptr ptr1, ptr2;
5582 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5583 sizeof(struct btrfs_key_ptr));
5584 read_extent_buffer(buf, &ptr2,
5585 btrfs_node_key_ptr_offset(slot + 1),
5586 sizeof(struct btrfs_key_ptr));
5587 write_extent_buffer(buf, &ptr1,
5588 btrfs_node_key_ptr_offset(slot + 1),
5589 sizeof(struct btrfs_key_ptr));
5590 write_extent_buffer(buf, &ptr2,
5591 btrfs_node_key_ptr_offset(slot),
5592 sizeof(struct btrfs_key_ptr));
5594 struct btrfs_disk_key key;
5595 btrfs_node_key(buf, &key, 0);
5596 btrfs_fixup_low_keys(root, path, &key,
5597 btrfs_header_level(buf) + 1);
5600 struct btrfs_item *item1, *item2;
5601 struct btrfs_key k1, k2;
5602 char *item1_data, *item2_data;
5603 u32 item1_offset, item2_offset, item1_size, item2_size;
5605 item1 = btrfs_item_nr(slot);
5606 item2 = btrfs_item_nr(slot + 1);
5607 btrfs_item_key_to_cpu(buf, &k1, slot);
5608 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5609 item1_offset = btrfs_item_offset(buf, item1);
5610 item2_offset = btrfs_item_offset(buf, item2);
5611 item1_size = btrfs_item_size(buf, item1);
5612 item2_size = btrfs_item_size(buf, item2);
5614 item1_data = malloc(item1_size);
5617 item2_data = malloc(item2_size);
5623 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5624 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5626 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5627 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5631 btrfs_set_item_offset(buf, item1, item2_offset);
5632 btrfs_set_item_offset(buf, item2, item1_offset);
5633 btrfs_set_item_size(buf, item1, item2_size);
5634 btrfs_set_item_size(buf, item2, item1_size);
5636 path->slots[0] = slot;
5637 btrfs_set_item_key_unsafe(root, path, &k2);
5638 path->slots[0] = slot + 1;
5639 btrfs_set_item_key_unsafe(root, path, &k1);
5644 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5646 struct extent_buffer *buf;
5647 struct btrfs_key k1, k2;
5649 int level = path->lowest_level;
5652 buf = path->nodes[level];
5653 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5655 btrfs_node_key_to_cpu(buf, &k1, i);
5656 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5658 btrfs_item_key_to_cpu(buf, &k1, i);
5659 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5661 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5663 ret = swap_values(root, path, buf, i);
5666 btrfs_mark_buffer_dirty(buf);
5672 static int delete_bogus_item(struct btrfs_root *root,
5673 struct btrfs_path *path,
5674 struct extent_buffer *buf, int slot)
5676 struct btrfs_key key;
5677 int nritems = btrfs_header_nritems(buf);
5679 btrfs_item_key_to_cpu(buf, &key, slot);
5681 /* These are all the keys we can deal with missing. */
5682 if (key.type != BTRFS_DIR_INDEX_KEY &&
5683 key.type != BTRFS_EXTENT_ITEM_KEY &&
5684 key.type != BTRFS_METADATA_ITEM_KEY &&
5685 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5686 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5689 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5690 (unsigned long long)key.objectid, key.type,
5691 (unsigned long long)key.offset, slot, buf->start);
5692 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5693 btrfs_item_nr_offset(slot + 1),
5694 sizeof(struct btrfs_item) *
5695 (nritems - slot - 1));
5696 btrfs_set_header_nritems(buf, nritems - 1);
5698 struct btrfs_disk_key disk_key;
5700 btrfs_item_key(buf, &disk_key, 0);
5701 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5703 btrfs_mark_buffer_dirty(buf);
5707 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5709 struct extent_buffer *buf;
5713 /* We should only get this for leaves */
5714 BUG_ON(path->lowest_level);
5715 buf = path->nodes[0];
5717 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5718 unsigned int shift = 0, offset;
5720 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5721 BTRFS_LEAF_DATA_SIZE(root)) {
5722 if (btrfs_item_end_nr(buf, i) >
5723 BTRFS_LEAF_DATA_SIZE(root)) {
5724 ret = delete_bogus_item(root, path, buf, i);
5727 fprintf(stderr, "item is off the end of the "
5728 "leaf, can't fix\n");
5732 shift = BTRFS_LEAF_DATA_SIZE(root) -
5733 btrfs_item_end_nr(buf, i);
5734 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5735 btrfs_item_offset_nr(buf, i - 1)) {
5736 if (btrfs_item_end_nr(buf, i) >
5737 btrfs_item_offset_nr(buf, i - 1)) {
5738 ret = delete_bogus_item(root, path, buf, i);
5741 fprintf(stderr, "items overlap, can't fix\n");
5745 shift = btrfs_item_offset_nr(buf, i - 1) -
5746 btrfs_item_end_nr(buf, i);
5751 printf("Shifting item nr %d by %u bytes in block %llu\n",
5752 i, shift, (unsigned long long)buf->start);
5753 offset = btrfs_item_offset_nr(buf, i);
5754 memmove_extent_buffer(buf,
5755 btrfs_leaf_data(buf) + offset + shift,
5756 btrfs_leaf_data(buf) + offset,
5757 btrfs_item_size_nr(buf, i));
5758 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5760 btrfs_mark_buffer_dirty(buf);
5764 * We may have moved things, in which case we want to exit so we don't
5765 * write those changes out. Once we have proper abort functionality in
5766 * progs this can be changed to something nicer.
5773 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5774 * then just return -EIO.
5776 static int try_to_fix_bad_block(struct btrfs_root *root,
5777 struct extent_buffer *buf,
5778 enum btrfs_tree_block_status status)
5780 struct btrfs_trans_handle *trans;
5781 struct ulist *roots;
5782 struct ulist_node *node;
5783 struct btrfs_root *search_root;
5784 struct btrfs_path path;
5785 struct ulist_iterator iter;
5786 struct btrfs_key root_key, key;
5789 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5790 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5793 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5797 btrfs_init_path(&path);
5798 ULIST_ITER_INIT(&iter);
5799 while ((node = ulist_next(roots, &iter))) {
5800 root_key.objectid = node->val;
5801 root_key.type = BTRFS_ROOT_ITEM_KEY;
5802 root_key.offset = (u64)-1;
5804 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5811 trans = btrfs_start_transaction(search_root, 0);
5812 if (IS_ERR(trans)) {
5813 ret = PTR_ERR(trans);
5817 path.lowest_level = btrfs_header_level(buf);
5818 path.skip_check_block = 1;
5819 if (path.lowest_level)
5820 btrfs_node_key_to_cpu(buf, &key, 0);
5822 btrfs_item_key_to_cpu(buf, &key, 0);
5823 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5826 btrfs_commit_transaction(trans, search_root);
5829 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5830 ret = fix_key_order(search_root, &path);
5831 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5832 ret = fix_item_offset(search_root, &path);
5834 btrfs_commit_transaction(trans, search_root);
5837 btrfs_release_path(&path);
5838 btrfs_commit_transaction(trans, search_root);
5841 btrfs_release_path(&path);
5845 static int check_block(struct btrfs_root *root,
5846 struct cache_tree *extent_cache,
5847 struct extent_buffer *buf, u64 flags)
5849 struct extent_record *rec;
5850 struct cache_extent *cache;
5851 struct btrfs_key key;
5852 enum btrfs_tree_block_status status;
5856 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5859 rec = container_of(cache, struct extent_record, cache);
5860 rec->generation = btrfs_header_generation(buf);
5862 level = btrfs_header_level(buf);
5863 if (btrfs_header_nritems(buf) > 0) {
5866 btrfs_item_key_to_cpu(buf, &key, 0);
5868 btrfs_node_key_to_cpu(buf, &key, 0);
5870 rec->info_objectid = key.objectid;
5872 rec->info_level = level;
5874 if (btrfs_is_leaf(buf))
5875 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5877 status = btrfs_check_node(root, &rec->parent_key, buf);
5879 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5881 status = try_to_fix_bad_block(root, buf, status);
5882 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5884 fprintf(stderr, "bad block %llu\n",
5885 (unsigned long long)buf->start);
5888 * Signal to callers we need to start the scan over
5889 * again since we'll have cowed blocks.
5894 rec->content_checked = 1;
5895 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5896 rec->owner_ref_checked = 1;
5898 ret = check_owner_ref(root, rec, buf);
5900 rec->owner_ref_checked = 1;
5904 maybe_free_extent_rec(extent_cache, rec);
5908 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5909 u64 parent, u64 root)
5911 struct list_head *cur = rec->backrefs.next;
5912 struct extent_backref *node;
5913 struct tree_backref *back;
5915 while(cur != &rec->backrefs) {
5916 node = to_extent_backref(cur);
5920 back = to_tree_backref(node);
5922 if (!node->full_backref)
5924 if (parent == back->parent)
5927 if (node->full_backref)
5929 if (back->root == root)
5936 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5937 u64 parent, u64 root)
5939 struct tree_backref *ref = malloc(sizeof(*ref));
5943 memset(&ref->node, 0, sizeof(ref->node));
5945 ref->parent = parent;
5946 ref->node.full_backref = 1;
5949 ref->node.full_backref = 0;
5951 list_add_tail(&ref->node.list, &rec->backrefs);
5956 static struct data_backref *find_data_backref(struct extent_record *rec,
5957 u64 parent, u64 root,
5958 u64 owner, u64 offset,
5960 u64 disk_bytenr, u64 bytes)
5962 struct list_head *cur = rec->backrefs.next;
5963 struct extent_backref *node;
5964 struct data_backref *back;
5966 while(cur != &rec->backrefs) {
5967 node = to_extent_backref(cur);
5971 back = to_data_backref(node);
5973 if (!node->full_backref)
5975 if (parent == back->parent)
5978 if (node->full_backref)
5980 if (back->root == root && back->owner == owner &&
5981 back->offset == offset) {
5982 if (found_ref && node->found_ref &&
5983 (back->bytes != bytes ||
5984 back->disk_bytenr != disk_bytenr))
5993 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5994 u64 parent, u64 root,
5995 u64 owner, u64 offset,
5998 struct data_backref *ref = malloc(sizeof(*ref));
6002 memset(&ref->node, 0, sizeof(ref->node));
6003 ref->node.is_data = 1;
6006 ref->parent = parent;
6009 ref->node.full_backref = 1;
6013 ref->offset = offset;
6014 ref->node.full_backref = 0;
6016 ref->bytes = max_size;
6019 list_add_tail(&ref->node.list, &rec->backrefs);
6020 if (max_size > rec->max_size)
6021 rec->max_size = max_size;
6025 /* Check if the type of extent matches with its chunk */
6026 static void check_extent_type(struct extent_record *rec)
6028 struct btrfs_block_group_cache *bg_cache;
6030 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6034 /* data extent, check chunk directly*/
6035 if (!rec->metadata) {
6036 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6037 rec->wrong_chunk_type = 1;
6041 /* metadata extent, check the obvious case first */
6042 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6043 BTRFS_BLOCK_GROUP_METADATA))) {
6044 rec->wrong_chunk_type = 1;
6049 * Check SYSTEM extent, as it's also marked as metadata, we can only
6050 * make sure it's a SYSTEM extent by its backref
6052 if (!list_empty(&rec->backrefs)) {
6053 struct extent_backref *node;
6054 struct tree_backref *tback;
6057 node = to_extent_backref(rec->backrefs.next);
6058 if (node->is_data) {
6059 /* tree block shouldn't have data backref */
6060 rec->wrong_chunk_type = 1;
6063 tback = container_of(node, struct tree_backref, node);
6065 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6066 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6068 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6069 if (!(bg_cache->flags & bg_type))
6070 rec->wrong_chunk_type = 1;
6075 * Allocate a new extent record, fill default values from @tmpl and insert int
6076 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6077 * the cache, otherwise it fails.
6079 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6080 struct extent_record *tmpl)
6082 struct extent_record *rec;
6085 BUG_ON(tmpl->max_size == 0);
6086 rec = malloc(sizeof(*rec));
6089 rec->start = tmpl->start;
6090 rec->max_size = tmpl->max_size;
6091 rec->nr = max(tmpl->nr, tmpl->max_size);
6092 rec->found_rec = tmpl->found_rec;
6093 rec->content_checked = tmpl->content_checked;
6094 rec->owner_ref_checked = tmpl->owner_ref_checked;
6095 rec->num_duplicates = 0;
6096 rec->metadata = tmpl->metadata;
6097 rec->flag_block_full_backref = FLAG_UNSET;
6098 rec->bad_full_backref = 0;
6099 rec->crossing_stripes = 0;
6100 rec->wrong_chunk_type = 0;
6101 rec->is_root = tmpl->is_root;
6102 rec->refs = tmpl->refs;
6103 rec->extent_item_refs = tmpl->extent_item_refs;
6104 rec->parent_generation = tmpl->parent_generation;
6105 INIT_LIST_HEAD(&rec->backrefs);
6106 INIT_LIST_HEAD(&rec->dups);
6107 INIT_LIST_HEAD(&rec->list);
6108 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6109 rec->cache.start = tmpl->start;
6110 rec->cache.size = tmpl->nr;
6111 ret = insert_cache_extent(extent_cache, &rec->cache);
6116 bytes_used += rec->nr;
6119 rec->crossing_stripes = check_crossing_stripes(global_info,
6120 rec->start, global_info->nodesize);
6121 check_extent_type(rec);
6126 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6128 * - refs - if found, increase refs
6129 * - is_root - if found, set
6130 * - content_checked - if found, set
6131 * - owner_ref_checked - if found, set
6133 * If not found, create a new one, initialize and insert.
6135 static int add_extent_rec(struct cache_tree *extent_cache,
6136 struct extent_record *tmpl)
6138 struct extent_record *rec;
6139 struct cache_extent *cache;
6143 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6145 rec = container_of(cache, struct extent_record, cache);
6149 rec->nr = max(tmpl->nr, tmpl->max_size);
6152 * We need to make sure to reset nr to whatever the extent
6153 * record says was the real size, this way we can compare it to
6156 if (tmpl->found_rec) {
6157 if (tmpl->start != rec->start || rec->found_rec) {
6158 struct extent_record *tmp;
6161 if (list_empty(&rec->list))
6162 list_add_tail(&rec->list,
6163 &duplicate_extents);
6166 * We have to do this song and dance in case we
6167 * find an extent record that falls inside of
6168 * our current extent record but does not have
6169 * the same objectid.
6171 tmp = malloc(sizeof(*tmp));
6174 tmp->start = tmpl->start;
6175 tmp->max_size = tmpl->max_size;
6178 tmp->metadata = tmpl->metadata;
6179 tmp->extent_item_refs = tmpl->extent_item_refs;
6180 INIT_LIST_HEAD(&tmp->list);
6181 list_add_tail(&tmp->list, &rec->dups);
6182 rec->num_duplicates++;
6189 if (tmpl->extent_item_refs && !dup) {
6190 if (rec->extent_item_refs) {
6191 fprintf(stderr, "block %llu rec "
6192 "extent_item_refs %llu, passed %llu\n",
6193 (unsigned long long)tmpl->start,
6194 (unsigned long long)
6195 rec->extent_item_refs,
6196 (unsigned long long)tmpl->extent_item_refs);
6198 rec->extent_item_refs = tmpl->extent_item_refs;
6202 if (tmpl->content_checked)
6203 rec->content_checked = 1;
6204 if (tmpl->owner_ref_checked)
6205 rec->owner_ref_checked = 1;
6206 memcpy(&rec->parent_key, &tmpl->parent_key,
6207 sizeof(tmpl->parent_key));
6208 if (tmpl->parent_generation)
6209 rec->parent_generation = tmpl->parent_generation;
6210 if (rec->max_size < tmpl->max_size)
6211 rec->max_size = tmpl->max_size;
6214 * A metadata extent can't cross stripe_len boundary, otherwise
6215 * kernel scrub won't be able to handle it.
6216 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6220 rec->crossing_stripes = check_crossing_stripes(
6221 global_info, rec->start,
6222 global_info->nodesize);
6223 check_extent_type(rec);
6224 maybe_free_extent_rec(extent_cache, rec);
6228 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6233 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6234 u64 parent, u64 root, int found_ref)
6236 struct extent_record *rec;
6237 struct tree_backref *back;
6238 struct cache_extent *cache;
6241 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6243 struct extent_record tmpl;
6245 memset(&tmpl, 0, sizeof(tmpl));
6246 tmpl.start = bytenr;
6251 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6255 /* really a bug in cache_extent implement now */
6256 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6261 rec = container_of(cache, struct extent_record, cache);
6262 if (rec->start != bytenr) {
6264 * Several cause, from unaligned bytenr to over lapping extents
6269 back = find_tree_backref(rec, parent, root);
6271 back = alloc_tree_backref(rec, parent, root);
6277 if (back->node.found_ref) {
6278 fprintf(stderr, "Extent back ref already exists "
6279 "for %llu parent %llu root %llu \n",
6280 (unsigned long long)bytenr,
6281 (unsigned long long)parent,
6282 (unsigned long long)root);
6284 back->node.found_ref = 1;
6286 if (back->node.found_extent_tree) {
6287 fprintf(stderr, "Extent back ref already exists "
6288 "for %llu parent %llu root %llu \n",
6289 (unsigned long long)bytenr,
6290 (unsigned long long)parent,
6291 (unsigned long long)root);
6293 back->node.found_extent_tree = 1;
6295 check_extent_type(rec);
6296 maybe_free_extent_rec(extent_cache, rec);
6300 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6301 u64 parent, u64 root, u64 owner, u64 offset,
6302 u32 num_refs, int found_ref, u64 max_size)
6304 struct extent_record *rec;
6305 struct data_backref *back;
6306 struct cache_extent *cache;
6309 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6311 struct extent_record tmpl;
6313 memset(&tmpl, 0, sizeof(tmpl));
6314 tmpl.start = bytenr;
6316 tmpl.max_size = max_size;
6318 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6322 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6327 rec = container_of(cache, struct extent_record, cache);
6328 if (rec->max_size < max_size)
6329 rec->max_size = max_size;
6332 * If found_ref is set then max_size is the real size and must match the
6333 * existing refs. So if we have already found a ref then we need to
6334 * make sure that this ref matches the existing one, otherwise we need
6335 * to add a new backref so we can notice that the backrefs don't match
6336 * and we need to figure out who is telling the truth. This is to
6337 * account for that awful fsync bug I introduced where we'd end up with
6338 * a btrfs_file_extent_item that would have its length include multiple
6339 * prealloc extents or point inside of a prealloc extent.
6341 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6344 back = alloc_data_backref(rec, parent, root, owner, offset,
6350 BUG_ON(num_refs != 1);
6351 if (back->node.found_ref)
6352 BUG_ON(back->bytes != max_size);
6353 back->node.found_ref = 1;
6354 back->found_ref += 1;
6355 back->bytes = max_size;
6356 back->disk_bytenr = bytenr;
6358 rec->content_checked = 1;
6359 rec->owner_ref_checked = 1;
6361 if (back->node.found_extent_tree) {
6362 fprintf(stderr, "Extent back ref already exists "
6363 "for %llu parent %llu root %llu "
6364 "owner %llu offset %llu num_refs %lu\n",
6365 (unsigned long long)bytenr,
6366 (unsigned long long)parent,
6367 (unsigned long long)root,
6368 (unsigned long long)owner,
6369 (unsigned long long)offset,
6370 (unsigned long)num_refs);
6372 back->num_refs = num_refs;
6373 back->node.found_extent_tree = 1;
6375 maybe_free_extent_rec(extent_cache, rec);
6379 static int add_pending(struct cache_tree *pending,
6380 struct cache_tree *seen, u64 bytenr, u32 size)
6383 ret = add_cache_extent(seen, bytenr, size);
6386 add_cache_extent(pending, bytenr, size);
6390 static int pick_next_pending(struct cache_tree *pending,
6391 struct cache_tree *reada,
6392 struct cache_tree *nodes,
6393 u64 last, struct block_info *bits, int bits_nr,
6396 unsigned long node_start = last;
6397 struct cache_extent *cache;
6400 cache = search_cache_extent(reada, 0);
6402 bits[0].start = cache->start;
6403 bits[0].size = cache->size;
6408 if (node_start > 32768)
6409 node_start -= 32768;
6411 cache = search_cache_extent(nodes, node_start);
6413 cache = search_cache_extent(nodes, 0);
6416 cache = search_cache_extent(pending, 0);
6421 bits[ret].start = cache->start;
6422 bits[ret].size = cache->size;
6423 cache = next_cache_extent(cache);
6425 } while (cache && ret < bits_nr);
6431 bits[ret].start = cache->start;
6432 bits[ret].size = cache->size;
6433 cache = next_cache_extent(cache);
6435 } while (cache && ret < bits_nr);
6437 if (bits_nr - ret > 8) {
6438 u64 lookup = bits[0].start + bits[0].size;
6439 struct cache_extent *next;
6440 next = search_cache_extent(pending, lookup);
6442 if (next->start - lookup > 32768)
6444 bits[ret].start = next->start;
6445 bits[ret].size = next->size;
6446 lookup = next->start + next->size;
6450 next = next_cache_extent(next);
6458 static void free_chunk_record(struct cache_extent *cache)
6460 struct chunk_record *rec;
6462 rec = container_of(cache, struct chunk_record, cache);
6463 list_del_init(&rec->list);
6464 list_del_init(&rec->dextents);
6468 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6470 cache_tree_free_extents(chunk_cache, free_chunk_record);
6473 static void free_device_record(struct rb_node *node)
6475 struct device_record *rec;
6477 rec = container_of(node, struct device_record, node);
6481 FREE_RB_BASED_TREE(device_cache, free_device_record);
6483 int insert_block_group_record(struct block_group_tree *tree,
6484 struct block_group_record *bg_rec)
6488 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6492 list_add_tail(&bg_rec->list, &tree->block_groups);
6496 static void free_block_group_record(struct cache_extent *cache)
6498 struct block_group_record *rec;
6500 rec = container_of(cache, struct block_group_record, cache);
6501 list_del_init(&rec->list);
6505 void free_block_group_tree(struct block_group_tree *tree)
6507 cache_tree_free_extents(&tree->tree, free_block_group_record);
6510 int insert_device_extent_record(struct device_extent_tree *tree,
6511 struct device_extent_record *de_rec)
6516 * Device extent is a bit different from the other extents, because
6517 * the extents which belong to the different devices may have the
6518 * same start and size, so we need use the special extent cache
6519 * search/insert functions.
6521 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6525 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6526 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6530 static void free_device_extent_record(struct cache_extent *cache)
6532 struct device_extent_record *rec;
6534 rec = container_of(cache, struct device_extent_record, cache);
6535 if (!list_empty(&rec->chunk_list))
6536 list_del_init(&rec->chunk_list);
6537 if (!list_empty(&rec->device_list))
6538 list_del_init(&rec->device_list);
6542 void free_device_extent_tree(struct device_extent_tree *tree)
6544 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6547 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6548 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6549 struct extent_buffer *leaf, int slot)
6551 struct btrfs_extent_ref_v0 *ref0;
6552 struct btrfs_key key;
6555 btrfs_item_key_to_cpu(leaf, &key, slot);
6556 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6557 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6558 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6561 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6562 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6568 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6569 struct btrfs_key *key,
6572 struct btrfs_chunk *ptr;
6573 struct chunk_record *rec;
6576 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6577 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6579 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6581 fprintf(stderr, "memory allocation failed\n");
6585 INIT_LIST_HEAD(&rec->list);
6586 INIT_LIST_HEAD(&rec->dextents);
6589 rec->cache.start = key->offset;
6590 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6592 rec->generation = btrfs_header_generation(leaf);
6594 rec->objectid = key->objectid;
6595 rec->type = key->type;
6596 rec->offset = key->offset;
6598 rec->length = rec->cache.size;
6599 rec->owner = btrfs_chunk_owner(leaf, ptr);
6600 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6601 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6602 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6603 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6604 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6605 rec->num_stripes = num_stripes;
6606 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6608 for (i = 0; i < rec->num_stripes; ++i) {
6609 rec->stripes[i].devid =
6610 btrfs_stripe_devid_nr(leaf, ptr, i);
6611 rec->stripes[i].offset =
6612 btrfs_stripe_offset_nr(leaf, ptr, i);
6613 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6614 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6621 static int process_chunk_item(struct cache_tree *chunk_cache,
6622 struct btrfs_key *key, struct extent_buffer *eb,
6625 struct chunk_record *rec;
6626 struct btrfs_chunk *chunk;
6629 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6631 * Do extra check for this chunk item,
6633 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6634 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6635 * and owner<->key_type check.
6637 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6640 error("chunk(%llu, %llu) is not valid, ignore it",
6641 key->offset, btrfs_chunk_length(eb, chunk));
6644 rec = btrfs_new_chunk_record(eb, key, slot);
6645 ret = insert_cache_extent(chunk_cache, &rec->cache);
6647 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6648 rec->offset, rec->length);
6655 static int process_device_item(struct rb_root *dev_cache,
6656 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6658 struct btrfs_dev_item *ptr;
6659 struct device_record *rec;
6662 ptr = btrfs_item_ptr(eb,
6663 slot, struct btrfs_dev_item);
6665 rec = malloc(sizeof(*rec));
6667 fprintf(stderr, "memory allocation failed\n");
6671 rec->devid = key->offset;
6672 rec->generation = btrfs_header_generation(eb);
6674 rec->objectid = key->objectid;
6675 rec->type = key->type;
6676 rec->offset = key->offset;
6678 rec->devid = btrfs_device_id(eb, ptr);
6679 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6680 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6682 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6684 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6691 struct block_group_record *
6692 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6695 struct btrfs_block_group_item *ptr;
6696 struct block_group_record *rec;
6698 rec = calloc(1, sizeof(*rec));
6700 fprintf(stderr, "memory allocation failed\n");
6704 rec->cache.start = key->objectid;
6705 rec->cache.size = key->offset;
6707 rec->generation = btrfs_header_generation(leaf);
6709 rec->objectid = key->objectid;
6710 rec->type = key->type;
6711 rec->offset = key->offset;
6713 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6714 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6716 INIT_LIST_HEAD(&rec->list);
6721 static int process_block_group_item(struct block_group_tree *block_group_cache,
6722 struct btrfs_key *key,
6723 struct extent_buffer *eb, int slot)
6725 struct block_group_record *rec;
6728 rec = btrfs_new_block_group_record(eb, key, slot);
6729 ret = insert_block_group_record(block_group_cache, rec);
6731 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6732 rec->objectid, rec->offset);
6739 struct device_extent_record *
6740 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6741 struct btrfs_key *key, int slot)
6743 struct device_extent_record *rec;
6744 struct btrfs_dev_extent *ptr;
6746 rec = calloc(1, sizeof(*rec));
6748 fprintf(stderr, "memory allocation failed\n");
6752 rec->cache.objectid = key->objectid;
6753 rec->cache.start = key->offset;
6755 rec->generation = btrfs_header_generation(leaf);
6757 rec->objectid = key->objectid;
6758 rec->type = key->type;
6759 rec->offset = key->offset;
6761 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6762 rec->chunk_objecteid =
6763 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6765 btrfs_dev_extent_chunk_offset(leaf, ptr);
6766 rec->length = btrfs_dev_extent_length(leaf, ptr);
6767 rec->cache.size = rec->length;
6769 INIT_LIST_HEAD(&rec->chunk_list);
6770 INIT_LIST_HEAD(&rec->device_list);
6776 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6777 struct btrfs_key *key, struct extent_buffer *eb,
6780 struct device_extent_record *rec;
6783 rec = btrfs_new_device_extent_record(eb, key, slot);
6784 ret = insert_device_extent_record(dev_extent_cache, rec);
6787 "Device extent[%llu, %llu, %llu] existed.\n",
6788 rec->objectid, rec->offset, rec->length);
6795 static int process_extent_item(struct btrfs_root *root,
6796 struct cache_tree *extent_cache,
6797 struct extent_buffer *eb, int slot)
6799 struct btrfs_extent_item *ei;
6800 struct btrfs_extent_inline_ref *iref;
6801 struct btrfs_extent_data_ref *dref;
6802 struct btrfs_shared_data_ref *sref;
6803 struct btrfs_key key;
6804 struct extent_record tmpl;
6809 u32 item_size = btrfs_item_size_nr(eb, slot);
6815 btrfs_item_key_to_cpu(eb, &key, slot);
6817 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6819 num_bytes = root->fs_info->nodesize;
6821 num_bytes = key.offset;
6824 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6825 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6826 key.objectid, root->fs_info->sectorsize);
6829 if (item_size < sizeof(*ei)) {
6830 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6831 struct btrfs_extent_item_v0 *ei0;
6832 BUG_ON(item_size != sizeof(*ei0));
6833 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6834 refs = btrfs_extent_refs_v0(eb, ei0);
6838 memset(&tmpl, 0, sizeof(tmpl));
6839 tmpl.start = key.objectid;
6840 tmpl.nr = num_bytes;
6841 tmpl.extent_item_refs = refs;
6842 tmpl.metadata = metadata;
6844 tmpl.max_size = num_bytes;
6846 return add_extent_rec(extent_cache, &tmpl);
6849 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6850 refs = btrfs_extent_refs(eb, ei);
6851 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6855 if (metadata && num_bytes != root->fs_info->nodesize) {
6856 error("ignore invalid metadata extent, length %llu does not equal to %u",
6857 num_bytes, root->fs_info->nodesize);
6860 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6861 error("ignore invalid data extent, length %llu is not aligned to %u",
6862 num_bytes, root->fs_info->sectorsize);
6866 memset(&tmpl, 0, sizeof(tmpl));
6867 tmpl.start = key.objectid;
6868 tmpl.nr = num_bytes;
6869 tmpl.extent_item_refs = refs;
6870 tmpl.metadata = metadata;
6872 tmpl.max_size = num_bytes;
6873 add_extent_rec(extent_cache, &tmpl);
6875 ptr = (unsigned long)(ei + 1);
6876 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6877 key.type == BTRFS_EXTENT_ITEM_KEY)
6878 ptr += sizeof(struct btrfs_tree_block_info);
6880 end = (unsigned long)ei + item_size;
6882 iref = (struct btrfs_extent_inline_ref *)ptr;
6883 type = btrfs_extent_inline_ref_type(eb, iref);
6884 offset = btrfs_extent_inline_ref_offset(eb, iref);
6886 case BTRFS_TREE_BLOCK_REF_KEY:
6887 ret = add_tree_backref(extent_cache, key.objectid,
6891 "add_tree_backref failed (extent items tree block): %s",
6894 case BTRFS_SHARED_BLOCK_REF_KEY:
6895 ret = add_tree_backref(extent_cache, key.objectid,
6899 "add_tree_backref failed (extent items shared block): %s",
6902 case BTRFS_EXTENT_DATA_REF_KEY:
6903 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6904 add_data_backref(extent_cache, key.objectid, 0,
6905 btrfs_extent_data_ref_root(eb, dref),
6906 btrfs_extent_data_ref_objectid(eb,
6908 btrfs_extent_data_ref_offset(eb, dref),
6909 btrfs_extent_data_ref_count(eb, dref),
6912 case BTRFS_SHARED_DATA_REF_KEY:
6913 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6914 add_data_backref(extent_cache, key.objectid, offset,
6916 btrfs_shared_data_ref_count(eb, sref),
6920 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6921 key.objectid, key.type, num_bytes);
6924 ptr += btrfs_extent_inline_ref_size(type);
6931 static int check_cache_range(struct btrfs_root *root,
6932 struct btrfs_block_group_cache *cache,
6933 u64 offset, u64 bytes)
6935 struct btrfs_free_space *entry;
6941 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6942 bytenr = btrfs_sb_offset(i);
6943 ret = btrfs_rmap_block(root->fs_info,
6944 cache->key.objectid, bytenr, 0,
6945 &logical, &nr, &stripe_len);
6950 if (logical[nr] + stripe_len <= offset)
6952 if (offset + bytes <= logical[nr])
6954 if (logical[nr] == offset) {
6955 if (stripe_len >= bytes) {
6959 bytes -= stripe_len;
6960 offset += stripe_len;
6961 } else if (logical[nr] < offset) {
6962 if (logical[nr] + stripe_len >=
6967 bytes = (offset + bytes) -
6968 (logical[nr] + stripe_len);
6969 offset = logical[nr] + stripe_len;
6972 * Could be tricky, the super may land in the
6973 * middle of the area we're checking. First
6974 * check the easiest case, it's at the end.
6976 if (logical[nr] + stripe_len >=
6978 bytes = logical[nr] - offset;
6982 /* Check the left side */
6983 ret = check_cache_range(root, cache,
6985 logical[nr] - offset);
6991 /* Now we continue with the right side */
6992 bytes = (offset + bytes) -
6993 (logical[nr] + stripe_len);
6994 offset = logical[nr] + stripe_len;
7001 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7003 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7004 offset, offset+bytes);
7008 if (entry->offset != offset) {
7009 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7014 if (entry->bytes != bytes) {
7015 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7016 bytes, entry->bytes, offset);
7020 unlink_free_space(cache->free_space_ctl, entry);
7025 static int verify_space_cache(struct btrfs_root *root,
7026 struct btrfs_block_group_cache *cache)
7028 struct btrfs_path path;
7029 struct extent_buffer *leaf;
7030 struct btrfs_key key;
7034 root = root->fs_info->extent_root;
7036 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7038 btrfs_init_path(&path);
7039 key.objectid = last;
7041 key.type = BTRFS_EXTENT_ITEM_KEY;
7042 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7047 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7048 ret = btrfs_next_leaf(root, &path);
7056 leaf = path.nodes[0];
7057 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7058 if (key.objectid >= cache->key.offset + cache->key.objectid)
7060 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7061 key.type != BTRFS_METADATA_ITEM_KEY) {
7066 if (last == key.objectid) {
7067 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7068 last = key.objectid + key.offset;
7070 last = key.objectid + root->fs_info->nodesize;
7075 ret = check_cache_range(root, cache, last,
7076 key.objectid - last);
7079 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7080 last = key.objectid + key.offset;
7082 last = key.objectid + root->fs_info->nodesize;
7086 if (last < cache->key.objectid + cache->key.offset)
7087 ret = check_cache_range(root, cache, last,
7088 cache->key.objectid +
7089 cache->key.offset - last);
7092 btrfs_release_path(&path);
7095 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7096 fprintf(stderr, "There are still entries left in the space "
7104 static int check_space_cache(struct btrfs_root *root)
7106 struct btrfs_block_group_cache *cache;
7107 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7111 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7112 btrfs_super_generation(root->fs_info->super_copy) !=
7113 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7114 printf("cache and super generation don't match, space cache "
7115 "will be invalidated\n");
7119 if (ctx.progress_enabled) {
7120 ctx.tp = TASK_FREE_SPACE;
7121 task_start(ctx.info);
7125 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7129 start = cache->key.objectid + cache->key.offset;
7130 if (!cache->free_space_ctl) {
7131 if (btrfs_init_free_space_ctl(cache,
7132 root->fs_info->sectorsize)) {
7137 btrfs_remove_free_space_cache(cache);
7140 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7141 ret = exclude_super_stripes(root, cache);
7143 fprintf(stderr, "could not exclude super stripes: %s\n",
7148 ret = load_free_space_tree(root->fs_info, cache);
7149 free_excluded_extents(root, cache);
7151 fprintf(stderr, "could not load free space tree: %s\n",
7158 ret = load_free_space_cache(root->fs_info, cache);
7163 ret = verify_space_cache(root, cache);
7165 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7166 cache->key.objectid);
7171 task_stop(ctx.info);
7173 return error ? -EINVAL : 0;
7176 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7177 u64 num_bytes, unsigned long leaf_offset,
7178 struct extent_buffer *eb) {
7180 struct btrfs_fs_info *fs_info = root->fs_info;
7182 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7184 unsigned long csum_offset;
7188 u64 data_checked = 0;
7194 if (num_bytes % fs_info->sectorsize)
7197 data = malloc(num_bytes);
7201 while (offset < num_bytes) {
7204 read_len = num_bytes - offset;
7205 /* read as much space once a time */
7206 ret = read_extent_data(fs_info, data + offset,
7207 bytenr + offset, &read_len, mirror);
7211 /* verify every 4k data's checksum */
7212 while (data_checked < read_len) {
7214 tmp = offset + data_checked;
7216 csum = btrfs_csum_data((char *)data + tmp,
7217 csum, fs_info->sectorsize);
7218 btrfs_csum_final(csum, (u8 *)&csum);
7220 csum_offset = leaf_offset +
7221 tmp / fs_info->sectorsize * csum_size;
7222 read_extent_buffer(eb, (char *)&csum_expected,
7223 csum_offset, csum_size);
7224 /* try another mirror */
7225 if (csum != csum_expected) {
7226 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7227 mirror, bytenr + tmp,
7228 csum, csum_expected);
7229 num_copies = btrfs_num_copies(root->fs_info,
7231 if (mirror < num_copies - 1) {
7236 data_checked += fs_info->sectorsize;
7245 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7248 struct btrfs_path path;
7249 struct extent_buffer *leaf;
7250 struct btrfs_key key;
7253 btrfs_init_path(&path);
7254 key.objectid = bytenr;
7255 key.type = BTRFS_EXTENT_ITEM_KEY;
7256 key.offset = (u64)-1;
7259 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7262 fprintf(stderr, "Error looking up extent record %d\n", ret);
7263 btrfs_release_path(&path);
7266 if (path.slots[0] > 0) {
7269 ret = btrfs_prev_leaf(root, &path);
7272 } else if (ret > 0) {
7279 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7282 * Block group items come before extent items if they have the same
7283 * bytenr, so walk back one more just in case. Dear future traveller,
7284 * first congrats on mastering time travel. Now if it's not too much
7285 * trouble could you go back to 2006 and tell Chris to make the
7286 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7287 * EXTENT_ITEM_KEY please?
7289 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7290 if (path.slots[0] > 0) {
7293 ret = btrfs_prev_leaf(root, &path);
7296 } else if (ret > 0) {
7301 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7305 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7306 ret = btrfs_next_leaf(root, &path);
7308 fprintf(stderr, "Error going to next leaf "
7310 btrfs_release_path(&path);
7316 leaf = path.nodes[0];
7317 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7318 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7322 if (key.objectid + key.offset < bytenr) {
7326 if (key.objectid > bytenr + num_bytes)
7329 if (key.objectid == bytenr) {
7330 if (key.offset >= num_bytes) {
7334 num_bytes -= key.offset;
7335 bytenr += key.offset;
7336 } else if (key.objectid < bytenr) {
7337 if (key.objectid + key.offset >= bytenr + num_bytes) {
7341 num_bytes = (bytenr + num_bytes) -
7342 (key.objectid + key.offset);
7343 bytenr = key.objectid + key.offset;
7345 if (key.objectid + key.offset < bytenr + num_bytes) {
7346 u64 new_start = key.objectid + key.offset;
7347 u64 new_bytes = bytenr + num_bytes - new_start;
7350 * Weird case, the extent is in the middle of
7351 * our range, we'll have to search one side
7352 * and then the other. Not sure if this happens
7353 * in real life, but no harm in coding it up
7354 * anyway just in case.
7356 btrfs_release_path(&path);
7357 ret = check_extent_exists(root, new_start,
7360 fprintf(stderr, "Right section didn't "
7364 num_bytes = key.objectid - bytenr;
7367 num_bytes = key.objectid - bytenr;
7374 if (num_bytes && !ret) {
7375 fprintf(stderr, "There are no extents for csum range "
7376 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7380 btrfs_release_path(&path);
7384 static int check_csums(struct btrfs_root *root)
7386 struct btrfs_path path;
7387 struct extent_buffer *leaf;
7388 struct btrfs_key key;
7389 u64 offset = 0, num_bytes = 0;
7390 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7394 unsigned long leaf_offset;
7396 root = root->fs_info->csum_root;
7397 if (!extent_buffer_uptodate(root->node)) {
7398 fprintf(stderr, "No valid csum tree found\n");
7402 btrfs_init_path(&path);
7403 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7404 key.type = BTRFS_EXTENT_CSUM_KEY;
7406 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7408 fprintf(stderr, "Error searching csum tree %d\n", ret);
7409 btrfs_release_path(&path);
7413 if (ret > 0 && path.slots[0])
7418 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7419 ret = btrfs_next_leaf(root, &path);
7421 fprintf(stderr, "Error going to next leaf "
7428 leaf = path.nodes[0];
7430 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7431 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7436 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7437 csum_size) * root->fs_info->sectorsize;
7438 if (!check_data_csum)
7439 goto skip_csum_check;
7440 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7441 ret = check_extent_csums(root, key.offset, data_len,
7447 offset = key.offset;
7448 } else if (key.offset != offset + num_bytes) {
7449 ret = check_extent_exists(root, offset, num_bytes);
7451 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7452 "there is no extent record\n",
7453 offset, offset+num_bytes);
7456 offset = key.offset;
7459 num_bytes += data_len;
7463 btrfs_release_path(&path);
7467 static int is_dropped_key(struct btrfs_key *key,
7468 struct btrfs_key *drop_key) {
7469 if (key->objectid < drop_key->objectid)
7471 else if (key->objectid == drop_key->objectid) {
7472 if (key->type < drop_key->type)
7474 else if (key->type == drop_key->type) {
7475 if (key->offset < drop_key->offset)
7483 * Here are the rules for FULL_BACKREF.
7485 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7486 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7488 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7489 * if it happened after the relocation occurred since we'll have dropped the
7490 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7491 * have no real way to know for sure.
7493 * We process the blocks one root at a time, and we start from the lowest root
7494 * objectid and go to the highest. So we can just lookup the owner backref for
7495 * the record and if we don't find it then we know it doesn't exist and we have
7498 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7499 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7500 * be set or not and then we can check later once we've gathered all the refs.
7502 static int calc_extent_flag(struct cache_tree *extent_cache,
7503 struct extent_buffer *buf,
7504 struct root_item_record *ri,
7507 struct extent_record *rec;
7508 struct cache_extent *cache;
7509 struct tree_backref *tback;
7512 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7513 /* we have added this extent before */
7517 rec = container_of(cache, struct extent_record, cache);
7520 * Except file/reloc tree, we can not have
7523 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7528 if (buf->start == ri->bytenr)
7531 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7534 owner = btrfs_header_owner(buf);
7535 if (owner == ri->objectid)
7538 tback = find_tree_backref(rec, 0, owner);
7543 if (rec->flag_block_full_backref != FLAG_UNSET &&
7544 rec->flag_block_full_backref != 0)
7545 rec->bad_full_backref = 1;
7548 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7549 if (rec->flag_block_full_backref != FLAG_UNSET &&
7550 rec->flag_block_full_backref != 1)
7551 rec->bad_full_backref = 1;
7555 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7557 fprintf(stderr, "Invalid key type(");
7558 print_key_type(stderr, 0, key_type);
7559 fprintf(stderr, ") found in root(");
7560 print_objectid(stderr, rootid, 0);
7561 fprintf(stderr, ")\n");
7565 * Check if the key is valid with its extent buffer.
7567 * This is a early check in case invalid key exists in a extent buffer
7568 * This is not comprehensive yet, but should prevent wrong key/item passed
7571 static int check_type_with_root(u64 rootid, u8 key_type)
7574 /* Only valid in chunk tree */
7575 case BTRFS_DEV_ITEM_KEY:
7576 case BTRFS_CHUNK_ITEM_KEY:
7577 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7580 /* valid in csum and log tree */
7581 case BTRFS_CSUM_TREE_OBJECTID:
7582 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7586 case BTRFS_EXTENT_ITEM_KEY:
7587 case BTRFS_METADATA_ITEM_KEY:
7588 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7589 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7592 case BTRFS_ROOT_ITEM_KEY:
7593 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7596 case BTRFS_DEV_EXTENT_KEY:
7597 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7603 report_mismatch_key_root(key_type, rootid);
7607 static int run_next_block(struct btrfs_root *root,
7608 struct block_info *bits,
7611 struct cache_tree *pending,
7612 struct cache_tree *seen,
7613 struct cache_tree *reada,
7614 struct cache_tree *nodes,
7615 struct cache_tree *extent_cache,
7616 struct cache_tree *chunk_cache,
7617 struct rb_root *dev_cache,
7618 struct block_group_tree *block_group_cache,
7619 struct device_extent_tree *dev_extent_cache,
7620 struct root_item_record *ri)
7622 struct btrfs_fs_info *fs_info = root->fs_info;
7623 struct extent_buffer *buf;
7624 struct extent_record *rec = NULL;
7635 struct btrfs_key key;
7636 struct cache_extent *cache;
7639 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7640 bits_nr, &reada_bits);
7645 for(i = 0; i < nritems; i++) {
7646 ret = add_cache_extent(reada, bits[i].start,
7651 /* fixme, get the parent transid */
7652 readahead_tree_block(fs_info, bits[i].start, 0);
7655 *last = bits[0].start;
7656 bytenr = bits[0].start;
7657 size = bits[0].size;
7659 cache = lookup_cache_extent(pending, bytenr, size);
7661 remove_cache_extent(pending, cache);
7664 cache = lookup_cache_extent(reada, bytenr, size);
7666 remove_cache_extent(reada, cache);
7669 cache = lookup_cache_extent(nodes, bytenr, size);
7671 remove_cache_extent(nodes, cache);
7674 cache = lookup_cache_extent(extent_cache, bytenr, size);
7676 rec = container_of(cache, struct extent_record, cache);
7677 gen = rec->parent_generation;
7680 /* fixme, get the real parent transid */
7681 buf = read_tree_block(root->fs_info, bytenr, gen);
7682 if (!extent_buffer_uptodate(buf)) {
7683 record_bad_block_io(root->fs_info,
7684 extent_cache, bytenr, size);
7688 nritems = btrfs_header_nritems(buf);
7691 if (!init_extent_tree) {
7692 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7693 btrfs_header_level(buf), 1, NULL,
7696 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7698 fprintf(stderr, "Couldn't calc extent flags\n");
7699 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7704 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7706 fprintf(stderr, "Couldn't calc extent flags\n");
7707 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7711 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7713 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7714 ri->objectid == btrfs_header_owner(buf)) {
7716 * Ok we got to this block from it's original owner and
7717 * we have FULL_BACKREF set. Relocation can leave
7718 * converted blocks over so this is altogether possible,
7719 * however it's not possible if the generation > the
7720 * last snapshot, so check for this case.
7722 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7723 btrfs_header_generation(buf) > ri->last_snapshot) {
7724 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7725 rec->bad_full_backref = 1;
7730 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7731 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7732 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7733 rec->bad_full_backref = 1;
7737 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7738 rec->flag_block_full_backref = 1;
7742 rec->flag_block_full_backref = 0;
7744 owner = btrfs_header_owner(buf);
7747 ret = check_block(root, extent_cache, buf, flags);
7751 if (btrfs_is_leaf(buf)) {
7752 btree_space_waste += btrfs_leaf_free_space(root, buf);
7753 for (i = 0; i < nritems; i++) {
7754 struct btrfs_file_extent_item *fi;
7755 btrfs_item_key_to_cpu(buf, &key, i);
7757 * Check key type against the leaf owner.
7758 * Could filter quite a lot of early error if
7761 if (check_type_with_root(btrfs_header_owner(buf),
7763 fprintf(stderr, "ignoring invalid key\n");
7766 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7767 process_extent_item(root, extent_cache, buf,
7771 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7772 process_extent_item(root, extent_cache, buf,
7776 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7778 btrfs_item_size_nr(buf, i);
7781 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7782 process_chunk_item(chunk_cache, &key, buf, i);
7785 if (key.type == BTRFS_DEV_ITEM_KEY) {
7786 process_device_item(dev_cache, &key, buf, i);
7789 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7790 process_block_group_item(block_group_cache,
7794 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7795 process_device_extent_item(dev_extent_cache,
7800 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7801 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7802 process_extent_ref_v0(extent_cache, buf, i);
7809 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7810 ret = add_tree_backref(extent_cache,
7811 key.objectid, 0, key.offset, 0);
7814 "add_tree_backref failed (leaf tree block): %s",
7818 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7819 ret = add_tree_backref(extent_cache,
7820 key.objectid, key.offset, 0, 0);
7823 "add_tree_backref failed (leaf shared block): %s",
7827 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7828 struct btrfs_extent_data_ref *ref;
7829 ref = btrfs_item_ptr(buf, i,
7830 struct btrfs_extent_data_ref);
7831 add_data_backref(extent_cache,
7833 btrfs_extent_data_ref_root(buf, ref),
7834 btrfs_extent_data_ref_objectid(buf,
7836 btrfs_extent_data_ref_offset(buf, ref),
7837 btrfs_extent_data_ref_count(buf, ref),
7838 0, root->fs_info->sectorsize);
7841 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7842 struct btrfs_shared_data_ref *ref;
7843 ref = btrfs_item_ptr(buf, i,
7844 struct btrfs_shared_data_ref);
7845 add_data_backref(extent_cache,
7846 key.objectid, key.offset, 0, 0, 0,
7847 btrfs_shared_data_ref_count(buf, ref),
7848 0, root->fs_info->sectorsize);
7851 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7852 struct bad_item *bad;
7854 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7858 bad = malloc(sizeof(struct bad_item));
7861 INIT_LIST_HEAD(&bad->list);
7862 memcpy(&bad->key, &key,
7863 sizeof(struct btrfs_key));
7864 bad->root_id = owner;
7865 list_add_tail(&bad->list, &delete_items);
7868 if (key.type != BTRFS_EXTENT_DATA_KEY)
7870 fi = btrfs_item_ptr(buf, i,
7871 struct btrfs_file_extent_item);
7872 if (btrfs_file_extent_type(buf, fi) ==
7873 BTRFS_FILE_EXTENT_INLINE)
7875 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7878 data_bytes_allocated +=
7879 btrfs_file_extent_disk_num_bytes(buf, fi);
7880 if (data_bytes_allocated < root->fs_info->sectorsize) {
7883 data_bytes_referenced +=
7884 btrfs_file_extent_num_bytes(buf, fi);
7885 add_data_backref(extent_cache,
7886 btrfs_file_extent_disk_bytenr(buf, fi),
7887 parent, owner, key.objectid, key.offset -
7888 btrfs_file_extent_offset(buf, fi), 1, 1,
7889 btrfs_file_extent_disk_num_bytes(buf, fi));
7893 struct btrfs_key first_key;
7895 first_key.objectid = 0;
7898 btrfs_item_key_to_cpu(buf, &first_key, 0);
7899 level = btrfs_header_level(buf);
7900 for (i = 0; i < nritems; i++) {
7901 struct extent_record tmpl;
7903 ptr = btrfs_node_blockptr(buf, i);
7904 size = root->fs_info->nodesize;
7905 btrfs_node_key_to_cpu(buf, &key, i);
7907 if ((level == ri->drop_level)
7908 && is_dropped_key(&key, &ri->drop_key)) {
7913 memset(&tmpl, 0, sizeof(tmpl));
7914 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7915 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7920 tmpl.max_size = size;
7921 ret = add_extent_rec(extent_cache, &tmpl);
7925 ret = add_tree_backref(extent_cache, ptr, parent,
7929 "add_tree_backref failed (non-leaf block): %s",
7935 add_pending(nodes, seen, ptr, size);
7937 add_pending(pending, seen, ptr, size);
7940 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7941 nritems) * sizeof(struct btrfs_key_ptr);
7943 total_btree_bytes += buf->len;
7944 if (fs_root_objectid(btrfs_header_owner(buf)))
7945 total_fs_tree_bytes += buf->len;
7946 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7947 total_extent_tree_bytes += buf->len;
7949 free_extent_buffer(buf);
7953 static int add_root_to_pending(struct extent_buffer *buf,
7954 struct cache_tree *extent_cache,
7955 struct cache_tree *pending,
7956 struct cache_tree *seen,
7957 struct cache_tree *nodes,
7960 struct extent_record tmpl;
7963 if (btrfs_header_level(buf) > 0)
7964 add_pending(nodes, seen, buf->start, buf->len);
7966 add_pending(pending, seen, buf->start, buf->len);
7968 memset(&tmpl, 0, sizeof(tmpl));
7969 tmpl.start = buf->start;
7974 tmpl.max_size = buf->len;
7975 add_extent_rec(extent_cache, &tmpl);
7977 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7978 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7979 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7982 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7987 /* as we fix the tree, we might be deleting blocks that
7988 * we're tracking for repair. This hook makes sure we
7989 * remove any backrefs for blocks as we are fixing them.
7991 static int free_extent_hook(struct btrfs_trans_handle *trans,
7992 struct btrfs_root *root,
7993 u64 bytenr, u64 num_bytes, u64 parent,
7994 u64 root_objectid, u64 owner, u64 offset,
7997 struct extent_record *rec;
7998 struct cache_extent *cache;
8000 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8002 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8003 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8007 rec = container_of(cache, struct extent_record, cache);
8009 struct data_backref *back;
8010 back = find_data_backref(rec, parent, root_objectid, owner,
8011 offset, 1, bytenr, num_bytes);
8014 if (back->node.found_ref) {
8015 back->found_ref -= refs_to_drop;
8017 rec->refs -= refs_to_drop;
8019 if (back->node.found_extent_tree) {
8020 back->num_refs -= refs_to_drop;
8021 if (rec->extent_item_refs)
8022 rec->extent_item_refs -= refs_to_drop;
8024 if (back->found_ref == 0)
8025 back->node.found_ref = 0;
8026 if (back->num_refs == 0)
8027 back->node.found_extent_tree = 0;
8029 if (!back->node.found_extent_tree && back->node.found_ref) {
8030 list_del(&back->node.list);
8034 struct tree_backref *back;
8035 back = find_tree_backref(rec, parent, root_objectid);
8038 if (back->node.found_ref) {
8041 back->node.found_ref = 0;
8043 if (back->node.found_extent_tree) {
8044 if (rec->extent_item_refs)
8045 rec->extent_item_refs--;
8046 back->node.found_extent_tree = 0;
8048 if (!back->node.found_extent_tree && back->node.found_ref) {
8049 list_del(&back->node.list);
8053 maybe_free_extent_rec(extent_cache, rec);
8058 static int delete_extent_records(struct btrfs_trans_handle *trans,
8059 struct btrfs_root *root,
8060 struct btrfs_path *path,
8063 struct btrfs_key key;
8064 struct btrfs_key found_key;
8065 struct extent_buffer *leaf;
8070 key.objectid = bytenr;
8072 key.offset = (u64)-1;
8075 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8082 if (path->slots[0] == 0)
8088 leaf = path->nodes[0];
8089 slot = path->slots[0];
8091 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8092 if (found_key.objectid != bytenr)
8095 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8096 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8097 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8098 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8099 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8100 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8101 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8102 btrfs_release_path(path);
8103 if (found_key.type == 0) {
8104 if (found_key.offset == 0)
8106 key.offset = found_key.offset - 1;
8107 key.type = found_key.type;
8109 key.type = found_key.type - 1;
8110 key.offset = (u64)-1;
8114 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8115 found_key.objectid, found_key.type, found_key.offset);
8117 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8120 btrfs_release_path(path);
8122 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8123 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8124 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8125 found_key.offset : root->fs_info->nodesize;
8127 ret = btrfs_update_block_group(trans, root, bytenr,
8134 btrfs_release_path(path);
8139 * for a single backref, this will allocate a new extent
8140 * and add the backref to it.
8142 static int record_extent(struct btrfs_trans_handle *trans,
8143 struct btrfs_fs_info *info,
8144 struct btrfs_path *path,
8145 struct extent_record *rec,
8146 struct extent_backref *back,
8147 int allocated, u64 flags)
8150 struct btrfs_root *extent_root = info->extent_root;
8151 struct extent_buffer *leaf;
8152 struct btrfs_key ins_key;
8153 struct btrfs_extent_item *ei;
8154 struct data_backref *dback;
8155 struct btrfs_tree_block_info *bi;
8158 rec->max_size = max_t(u64, rec->max_size,
8162 u32 item_size = sizeof(*ei);
8165 item_size += sizeof(*bi);
8167 ins_key.objectid = rec->start;
8168 ins_key.offset = rec->max_size;
8169 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8171 ret = btrfs_insert_empty_item(trans, extent_root, path,
8172 &ins_key, item_size);
8176 leaf = path->nodes[0];
8177 ei = btrfs_item_ptr(leaf, path->slots[0],
8178 struct btrfs_extent_item);
8180 btrfs_set_extent_refs(leaf, ei, 0);
8181 btrfs_set_extent_generation(leaf, ei, rec->generation);
8183 if (back->is_data) {
8184 btrfs_set_extent_flags(leaf, ei,
8185 BTRFS_EXTENT_FLAG_DATA);
8187 struct btrfs_disk_key copy_key;;
8189 bi = (struct btrfs_tree_block_info *)(ei + 1);
8190 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8193 btrfs_set_disk_key_objectid(©_key,
8194 rec->info_objectid);
8195 btrfs_set_disk_key_type(©_key, 0);
8196 btrfs_set_disk_key_offset(©_key, 0);
8198 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8199 btrfs_set_tree_block_key(leaf, bi, ©_key);
8201 btrfs_set_extent_flags(leaf, ei,
8202 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8205 btrfs_mark_buffer_dirty(leaf);
8206 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8207 rec->max_size, 1, 0);
8210 btrfs_release_path(path);
8213 if (back->is_data) {
8217 dback = to_data_backref(back);
8218 if (back->full_backref)
8219 parent = dback->parent;
8223 for (i = 0; i < dback->found_ref; i++) {
8224 /* if parent != 0, we're doing a full backref
8225 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8226 * just makes the backref allocator create a data
8229 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8230 rec->start, rec->max_size,
8234 BTRFS_FIRST_FREE_OBJECTID :
8240 fprintf(stderr, "adding new data backref"
8241 " on %llu %s %llu owner %llu"
8242 " offset %llu found %d\n",
8243 (unsigned long long)rec->start,
8244 back->full_backref ?
8246 back->full_backref ?
8247 (unsigned long long)parent :
8248 (unsigned long long)dback->root,
8249 (unsigned long long)dback->owner,
8250 (unsigned long long)dback->offset,
8254 struct tree_backref *tback;
8256 tback = to_tree_backref(back);
8257 if (back->full_backref)
8258 parent = tback->parent;
8262 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8263 rec->start, rec->max_size,
8264 parent, tback->root, 0, 0);
8265 fprintf(stderr, "adding new tree backref on "
8266 "start %llu len %llu parent %llu root %llu\n",
8267 rec->start, rec->max_size, parent, tback->root);
8270 btrfs_release_path(path);
8274 static struct extent_entry *find_entry(struct list_head *entries,
8275 u64 bytenr, u64 bytes)
8277 struct extent_entry *entry = NULL;
8279 list_for_each_entry(entry, entries, list) {
8280 if (entry->bytenr == bytenr && entry->bytes == bytes)
8287 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8289 struct extent_entry *entry, *best = NULL, *prev = NULL;
8291 list_for_each_entry(entry, entries, list) {
8293 * If there are as many broken entries as entries then we know
8294 * not to trust this particular entry.
8296 if (entry->broken == entry->count)
8300 * Special case, when there are only two entries and 'best' is
8310 * If our current entry == best then we can't be sure our best
8311 * is really the best, so we need to keep searching.
8313 if (best && best->count == entry->count) {
8319 /* Prev == entry, not good enough, have to keep searching */
8320 if (!prev->broken && prev->count == entry->count)
8324 best = (prev->count > entry->count) ? prev : entry;
8325 else if (best->count < entry->count)
8333 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8334 struct data_backref *dback, struct extent_entry *entry)
8336 struct btrfs_trans_handle *trans;
8337 struct btrfs_root *root;
8338 struct btrfs_file_extent_item *fi;
8339 struct extent_buffer *leaf;
8340 struct btrfs_key key;
8344 key.objectid = dback->root;
8345 key.type = BTRFS_ROOT_ITEM_KEY;
8346 key.offset = (u64)-1;
8347 root = btrfs_read_fs_root(info, &key);
8349 fprintf(stderr, "Couldn't find root for our ref\n");
8354 * The backref points to the original offset of the extent if it was
8355 * split, so we need to search down to the offset we have and then walk
8356 * forward until we find the backref we're looking for.
8358 key.objectid = dback->owner;
8359 key.type = BTRFS_EXTENT_DATA_KEY;
8360 key.offset = dback->offset;
8361 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8363 fprintf(stderr, "Error looking up ref %d\n", ret);
8368 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8369 ret = btrfs_next_leaf(root, path);
8371 fprintf(stderr, "Couldn't find our ref, next\n");
8375 leaf = path->nodes[0];
8376 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8377 if (key.objectid != dback->owner ||
8378 key.type != BTRFS_EXTENT_DATA_KEY) {
8379 fprintf(stderr, "Couldn't find our ref, search\n");
8382 fi = btrfs_item_ptr(leaf, path->slots[0],
8383 struct btrfs_file_extent_item);
8384 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8385 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8387 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8392 btrfs_release_path(path);
8394 trans = btrfs_start_transaction(root, 1);
8396 return PTR_ERR(trans);
8399 * Ok we have the key of the file extent we want to fix, now we can cow
8400 * down to the thing and fix it.
8402 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8404 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8405 key.objectid, key.type, key.offset, ret);
8409 fprintf(stderr, "Well that's odd, we just found this key "
8410 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8415 leaf = path->nodes[0];
8416 fi = btrfs_item_ptr(leaf, path->slots[0],
8417 struct btrfs_file_extent_item);
8419 if (btrfs_file_extent_compression(leaf, fi) &&
8420 dback->disk_bytenr != entry->bytenr) {
8421 fprintf(stderr, "Ref doesn't match the record start and is "
8422 "compressed, please take a btrfs-image of this file "
8423 "system and send it to a btrfs developer so they can "
8424 "complete this functionality for bytenr %Lu\n",
8425 dback->disk_bytenr);
8430 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8431 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8432 } else if (dback->disk_bytenr > entry->bytenr) {
8433 u64 off_diff, offset;
8435 off_diff = dback->disk_bytenr - entry->bytenr;
8436 offset = btrfs_file_extent_offset(leaf, fi);
8437 if (dback->disk_bytenr + offset +
8438 btrfs_file_extent_num_bytes(leaf, fi) >
8439 entry->bytenr + entry->bytes) {
8440 fprintf(stderr, "Ref is past the entry end, please "
8441 "take a btrfs-image of this file system and "
8442 "send it to a btrfs developer, ref %Lu\n",
8443 dback->disk_bytenr);
8448 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8449 btrfs_set_file_extent_offset(leaf, fi, offset);
8450 } else if (dback->disk_bytenr < entry->bytenr) {
8453 offset = btrfs_file_extent_offset(leaf, fi);
8454 if (dback->disk_bytenr + offset < entry->bytenr) {
8455 fprintf(stderr, "Ref is before the entry start, please"
8456 " take a btrfs-image of this file system and "
8457 "send it to a btrfs developer, ref %Lu\n",
8458 dback->disk_bytenr);
8463 offset += dback->disk_bytenr;
8464 offset -= entry->bytenr;
8465 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8466 btrfs_set_file_extent_offset(leaf, fi, offset);
8469 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8472 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8473 * only do this if we aren't using compression, otherwise it's a
8476 if (!btrfs_file_extent_compression(leaf, fi))
8477 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8479 printf("ram bytes may be wrong?\n");
8480 btrfs_mark_buffer_dirty(leaf);
8482 err = btrfs_commit_transaction(trans, root);
8483 btrfs_release_path(path);
8484 return ret ? ret : err;
8487 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8488 struct extent_record *rec)
8490 struct extent_backref *back;
8491 struct data_backref *dback;
8492 struct extent_entry *entry, *best = NULL;
8495 int broken_entries = 0;
8500 * Metadata is easy and the backrefs should always agree on bytenr and
8501 * size, if not we've got bigger issues.
8506 list_for_each_entry(back, &rec->backrefs, list) {
8507 if (back->full_backref || !back->is_data)
8510 dback = to_data_backref(back);
8513 * We only pay attention to backrefs that we found a real
8516 if (dback->found_ref == 0)
8520 * For now we only catch when the bytes don't match, not the
8521 * bytenr. We can easily do this at the same time, but I want
8522 * to have a fs image to test on before we just add repair
8523 * functionality willy-nilly so we know we won't screw up the
8527 entry = find_entry(&entries, dback->disk_bytenr,
8530 entry = malloc(sizeof(struct extent_entry));
8535 memset(entry, 0, sizeof(*entry));
8536 entry->bytenr = dback->disk_bytenr;
8537 entry->bytes = dback->bytes;
8538 list_add_tail(&entry->list, &entries);
8543 * If we only have on entry we may think the entries agree when
8544 * in reality they don't so we have to do some extra checking.
8546 if (dback->disk_bytenr != rec->start ||
8547 dback->bytes != rec->nr || back->broken)
8558 /* Yay all the backrefs agree, carry on good sir */
8559 if (nr_entries <= 1 && !mismatch)
8562 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8563 "%Lu\n", rec->start);
8566 * First we want to see if the backrefs can agree amongst themselves who
8567 * is right, so figure out which one of the entries has the highest
8570 best = find_most_right_entry(&entries);
8573 * Ok so we may have an even split between what the backrefs think, so
8574 * this is where we use the extent ref to see what it thinks.
8577 entry = find_entry(&entries, rec->start, rec->nr);
8578 if (!entry && (!broken_entries || !rec->found_rec)) {
8579 fprintf(stderr, "Backrefs don't agree with each other "
8580 "and extent record doesn't agree with anybody,"
8581 " so we can't fix bytenr %Lu bytes %Lu\n",
8582 rec->start, rec->nr);
8585 } else if (!entry) {
8587 * Ok our backrefs were broken, we'll assume this is the
8588 * correct value and add an entry for this range.
8590 entry = malloc(sizeof(struct extent_entry));
8595 memset(entry, 0, sizeof(*entry));
8596 entry->bytenr = rec->start;
8597 entry->bytes = rec->nr;
8598 list_add_tail(&entry->list, &entries);
8602 best = find_most_right_entry(&entries);
8604 fprintf(stderr, "Backrefs and extent record evenly "
8605 "split on who is right, this is going to "
8606 "require user input to fix bytenr %Lu bytes "
8607 "%Lu\n", rec->start, rec->nr);
8614 * I don't think this can happen currently as we'll abort() if we catch
8615 * this case higher up, but in case somebody removes that we still can't
8616 * deal with it properly here yet, so just bail out of that's the case.
8618 if (best->bytenr != rec->start) {
8619 fprintf(stderr, "Extent start and backref starts don't match, "
8620 "please use btrfs-image on this file system and send "
8621 "it to a btrfs developer so they can make fsck fix "
8622 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8623 rec->start, rec->nr);
8629 * Ok great we all agreed on an extent record, let's go find the real
8630 * references and fix up the ones that don't match.
8632 list_for_each_entry(back, &rec->backrefs, list) {
8633 if (back->full_backref || !back->is_data)
8636 dback = to_data_backref(back);
8639 * Still ignoring backrefs that don't have a real ref attached
8642 if (dback->found_ref == 0)
8645 if (dback->bytes == best->bytes &&
8646 dback->disk_bytenr == best->bytenr)
8649 ret = repair_ref(info, path, dback, best);
8655 * Ok we messed with the actual refs, which means we need to drop our
8656 * entire cache and go back and rescan. I know this is a huge pain and
8657 * adds a lot of extra work, but it's the only way to be safe. Once all
8658 * the backrefs agree we may not need to do anything to the extent
8663 while (!list_empty(&entries)) {
8664 entry = list_entry(entries.next, struct extent_entry, list);
8665 list_del_init(&entry->list);
8671 static int process_duplicates(struct cache_tree *extent_cache,
8672 struct extent_record *rec)
8674 struct extent_record *good, *tmp;
8675 struct cache_extent *cache;
8679 * If we found a extent record for this extent then return, or if we
8680 * have more than one duplicate we are likely going to need to delete
8683 if (rec->found_rec || rec->num_duplicates > 1)
8686 /* Shouldn't happen but just in case */
8687 BUG_ON(!rec->num_duplicates);
8690 * So this happens if we end up with a backref that doesn't match the
8691 * actual extent entry. So either the backref is bad or the extent
8692 * entry is bad. Either way we want to have the extent_record actually
8693 * reflect what we found in the extent_tree, so we need to take the
8694 * duplicate out and use that as the extent_record since the only way we
8695 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8697 remove_cache_extent(extent_cache, &rec->cache);
8699 good = to_extent_record(rec->dups.next);
8700 list_del_init(&good->list);
8701 INIT_LIST_HEAD(&good->backrefs);
8702 INIT_LIST_HEAD(&good->dups);
8703 good->cache.start = good->start;
8704 good->cache.size = good->nr;
8705 good->content_checked = 0;
8706 good->owner_ref_checked = 0;
8707 good->num_duplicates = 0;
8708 good->refs = rec->refs;
8709 list_splice_init(&rec->backrefs, &good->backrefs);
8711 cache = lookup_cache_extent(extent_cache, good->start,
8715 tmp = container_of(cache, struct extent_record, cache);
8718 * If we find another overlapping extent and it's found_rec is
8719 * set then it's a duplicate and we need to try and delete
8722 if (tmp->found_rec || tmp->num_duplicates > 0) {
8723 if (list_empty(&good->list))
8724 list_add_tail(&good->list,
8725 &duplicate_extents);
8726 good->num_duplicates += tmp->num_duplicates + 1;
8727 list_splice_init(&tmp->dups, &good->dups);
8728 list_del_init(&tmp->list);
8729 list_add_tail(&tmp->list, &good->dups);
8730 remove_cache_extent(extent_cache, &tmp->cache);
8735 * Ok we have another non extent item backed extent rec, so lets
8736 * just add it to this extent and carry on like we did above.
8738 good->refs += tmp->refs;
8739 list_splice_init(&tmp->backrefs, &good->backrefs);
8740 remove_cache_extent(extent_cache, &tmp->cache);
8743 ret = insert_cache_extent(extent_cache, &good->cache);
8746 return good->num_duplicates ? 0 : 1;
8749 static int delete_duplicate_records(struct btrfs_root *root,
8750 struct extent_record *rec)
8752 struct btrfs_trans_handle *trans;
8753 LIST_HEAD(delete_list);
8754 struct btrfs_path path;
8755 struct extent_record *tmp, *good, *n;
8758 struct btrfs_key key;
8760 btrfs_init_path(&path);
8763 /* Find the record that covers all of the duplicates. */
8764 list_for_each_entry(tmp, &rec->dups, list) {
8765 if (good->start < tmp->start)
8767 if (good->nr > tmp->nr)
8770 if (tmp->start + tmp->nr < good->start + good->nr) {
8771 fprintf(stderr, "Ok we have overlapping extents that "
8772 "aren't completely covered by each other, this "
8773 "is going to require more careful thought. "
8774 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8775 tmp->start, tmp->nr, good->start, good->nr);
8782 list_add_tail(&rec->list, &delete_list);
8784 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8787 list_move_tail(&tmp->list, &delete_list);
8790 root = root->fs_info->extent_root;
8791 trans = btrfs_start_transaction(root, 1);
8792 if (IS_ERR(trans)) {
8793 ret = PTR_ERR(trans);
8797 list_for_each_entry(tmp, &delete_list, list) {
8798 if (tmp->found_rec == 0)
8800 key.objectid = tmp->start;
8801 key.type = BTRFS_EXTENT_ITEM_KEY;
8802 key.offset = tmp->nr;
8804 /* Shouldn't happen but just in case */
8805 if (tmp->metadata) {
8806 fprintf(stderr, "Well this shouldn't happen, extent "
8807 "record overlaps but is metadata? "
8808 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8812 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8818 ret = btrfs_del_item(trans, root, &path);
8821 btrfs_release_path(&path);
8824 err = btrfs_commit_transaction(trans, root);
8828 while (!list_empty(&delete_list)) {
8829 tmp = to_extent_record(delete_list.next);
8830 list_del_init(&tmp->list);
8836 while (!list_empty(&rec->dups)) {
8837 tmp = to_extent_record(rec->dups.next);
8838 list_del_init(&tmp->list);
8842 btrfs_release_path(&path);
8844 if (!ret && !nr_del)
8845 rec->num_duplicates = 0;
8847 return ret ? ret : nr_del;
8850 static int find_possible_backrefs(struct btrfs_fs_info *info,
8851 struct btrfs_path *path,
8852 struct cache_tree *extent_cache,
8853 struct extent_record *rec)
8855 struct btrfs_root *root;
8856 struct extent_backref *back;
8857 struct data_backref *dback;
8858 struct cache_extent *cache;
8859 struct btrfs_file_extent_item *fi;
8860 struct btrfs_key key;
8864 list_for_each_entry(back, &rec->backrefs, list) {
8865 /* Don't care about full backrefs (poor unloved backrefs) */
8866 if (back->full_backref || !back->is_data)
8869 dback = to_data_backref(back);
8871 /* We found this one, we don't need to do a lookup */
8872 if (dback->found_ref)
8875 key.objectid = dback->root;
8876 key.type = BTRFS_ROOT_ITEM_KEY;
8877 key.offset = (u64)-1;
8879 root = btrfs_read_fs_root(info, &key);
8881 /* No root, definitely a bad ref, skip */
8882 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8884 /* Other err, exit */
8886 return PTR_ERR(root);
8888 key.objectid = dback->owner;
8889 key.type = BTRFS_EXTENT_DATA_KEY;
8890 key.offset = dback->offset;
8891 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8893 btrfs_release_path(path);
8896 /* Didn't find it, we can carry on */
8901 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8902 struct btrfs_file_extent_item);
8903 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8904 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8905 btrfs_release_path(path);
8906 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8908 struct extent_record *tmp;
8909 tmp = container_of(cache, struct extent_record, cache);
8912 * If we found an extent record for the bytenr for this
8913 * particular backref then we can't add it to our
8914 * current extent record. We only want to add backrefs
8915 * that don't have a corresponding extent item in the
8916 * extent tree since they likely belong to this record
8917 * and we need to fix it if it doesn't match bytenrs.
8923 dback->found_ref += 1;
8924 dback->disk_bytenr = bytenr;
8925 dback->bytes = bytes;
8928 * Set this so the verify backref code knows not to trust the
8929 * values in this backref.
8938 * Record orphan data ref into corresponding root.
8940 * Return 0 if the extent item contains data ref and recorded.
8941 * Return 1 if the extent item contains no useful data ref
8942 * On that case, it may contains only shared_dataref or metadata backref
8943 * or the file extent exists(this should be handled by the extent bytenr
8945 * Return <0 if something goes wrong.
8947 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8948 struct extent_record *rec)
8950 struct btrfs_key key;
8951 struct btrfs_root *dest_root;
8952 struct extent_backref *back;
8953 struct data_backref *dback;
8954 struct orphan_data_extent *orphan;
8955 struct btrfs_path path;
8956 int recorded_data_ref = 0;
8961 btrfs_init_path(&path);
8962 list_for_each_entry(back, &rec->backrefs, list) {
8963 if (back->full_backref || !back->is_data ||
8964 !back->found_extent_tree)
8966 dback = to_data_backref(back);
8967 if (dback->found_ref)
8969 key.objectid = dback->root;
8970 key.type = BTRFS_ROOT_ITEM_KEY;
8971 key.offset = (u64)-1;
8973 dest_root = btrfs_read_fs_root(fs_info, &key);
8975 /* For non-exist root we just skip it */
8976 if (IS_ERR(dest_root) || !dest_root)
8979 key.objectid = dback->owner;
8980 key.type = BTRFS_EXTENT_DATA_KEY;
8981 key.offset = dback->offset;
8983 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8984 btrfs_release_path(&path);
8986 * For ret < 0, it's OK since the fs-tree may be corrupted,
8987 * we need to record it for inode/file extent rebuild.
8988 * For ret > 0, we record it only for file extent rebuild.
8989 * For ret == 0, the file extent exists but only bytenr
8990 * mismatch, let the original bytenr fix routine to handle,
8996 orphan = malloc(sizeof(*orphan));
9001 INIT_LIST_HEAD(&orphan->list);
9002 orphan->root = dback->root;
9003 orphan->objectid = dback->owner;
9004 orphan->offset = dback->offset;
9005 orphan->disk_bytenr = rec->cache.start;
9006 orphan->disk_len = rec->cache.size;
9007 list_add(&dest_root->orphan_data_extents, &orphan->list);
9008 recorded_data_ref = 1;
9011 btrfs_release_path(&path);
9013 return !recorded_data_ref;
9019 * when an incorrect extent item is found, this will delete
9020 * all of the existing entries for it and recreate them
9021 * based on what the tree scan found.
9023 static int fixup_extent_refs(struct btrfs_fs_info *info,
9024 struct cache_tree *extent_cache,
9025 struct extent_record *rec)
9027 struct btrfs_trans_handle *trans = NULL;
9029 struct btrfs_path path;
9030 struct list_head *cur = rec->backrefs.next;
9031 struct cache_extent *cache;
9032 struct extent_backref *back;
9036 if (rec->flag_block_full_backref)
9037 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9039 btrfs_init_path(&path);
9040 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9042 * Sometimes the backrefs themselves are so broken they don't
9043 * get attached to any meaningful rec, so first go back and
9044 * check any of our backrefs that we couldn't find and throw
9045 * them into the list if we find the backref so that
9046 * verify_backrefs can figure out what to do.
9048 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9053 /* step one, make sure all of the backrefs agree */
9054 ret = verify_backrefs(info, &path, rec);
9058 trans = btrfs_start_transaction(info->extent_root, 1);
9059 if (IS_ERR(trans)) {
9060 ret = PTR_ERR(trans);
9064 /* step two, delete all the existing records */
9065 ret = delete_extent_records(trans, info->extent_root, &path,
9071 /* was this block corrupt? If so, don't add references to it */
9072 cache = lookup_cache_extent(info->corrupt_blocks,
9073 rec->start, rec->max_size);
9079 /* step three, recreate all the refs we did find */
9080 while(cur != &rec->backrefs) {
9081 back = to_extent_backref(cur);
9085 * if we didn't find any references, don't create a
9088 if (!back->found_ref)
9091 rec->bad_full_backref = 0;
9092 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9100 int err = btrfs_commit_transaction(trans, info->extent_root);
9106 fprintf(stderr, "Repaired extent references for %llu\n",
9107 (unsigned long long)rec->start);
9109 btrfs_release_path(&path);
9113 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9114 struct extent_record *rec)
9116 struct btrfs_trans_handle *trans;
9117 struct btrfs_root *root = fs_info->extent_root;
9118 struct btrfs_path path;
9119 struct btrfs_extent_item *ei;
9120 struct btrfs_key key;
9124 key.objectid = rec->start;
9125 if (rec->metadata) {
9126 key.type = BTRFS_METADATA_ITEM_KEY;
9127 key.offset = rec->info_level;
9129 key.type = BTRFS_EXTENT_ITEM_KEY;
9130 key.offset = rec->max_size;
9133 trans = btrfs_start_transaction(root, 0);
9135 return PTR_ERR(trans);
9137 btrfs_init_path(&path);
9138 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9140 btrfs_release_path(&path);
9141 btrfs_commit_transaction(trans, root);
9144 fprintf(stderr, "Didn't find extent for %llu\n",
9145 (unsigned long long)rec->start);
9146 btrfs_release_path(&path);
9147 btrfs_commit_transaction(trans, root);
9151 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9152 struct btrfs_extent_item);
9153 flags = btrfs_extent_flags(path.nodes[0], ei);
9154 if (rec->flag_block_full_backref) {
9155 fprintf(stderr, "setting full backref on %llu\n",
9156 (unsigned long long)key.objectid);
9157 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9159 fprintf(stderr, "clearing full backref on %llu\n",
9160 (unsigned long long)key.objectid);
9161 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9163 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9164 btrfs_mark_buffer_dirty(path.nodes[0]);
9165 btrfs_release_path(&path);
9166 ret = btrfs_commit_transaction(trans, root);
9168 fprintf(stderr, "Repaired extent flags for %llu\n",
9169 (unsigned long long)rec->start);
9174 /* right now we only prune from the extent allocation tree */
9175 static int prune_one_block(struct btrfs_trans_handle *trans,
9176 struct btrfs_fs_info *info,
9177 struct btrfs_corrupt_block *corrupt)
9180 struct btrfs_path path;
9181 struct extent_buffer *eb;
9185 int level = corrupt->level + 1;
9187 btrfs_init_path(&path);
9189 /* we want to stop at the parent to our busted block */
9190 path.lowest_level = level;
9192 ret = btrfs_search_slot(trans, info->extent_root,
9193 &corrupt->key, &path, -1, 1);
9198 eb = path.nodes[level];
9205 * hopefully the search gave us the block we want to prune,
9206 * lets try that first
9208 slot = path.slots[level];
9209 found = btrfs_node_blockptr(eb, slot);
9210 if (found == corrupt->cache.start)
9213 nritems = btrfs_header_nritems(eb);
9215 /* the search failed, lets scan this node and hope we find it */
9216 for (slot = 0; slot < nritems; slot++) {
9217 found = btrfs_node_blockptr(eb, slot);
9218 if (found == corrupt->cache.start)
9222 * we couldn't find the bad block. TODO, search all the nodes for pointers
9225 if (eb == info->extent_root->node) {
9230 btrfs_release_path(&path);
9235 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9236 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9239 btrfs_release_path(&path);
9243 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9245 struct btrfs_trans_handle *trans = NULL;
9246 struct cache_extent *cache;
9247 struct btrfs_corrupt_block *corrupt;
9250 cache = search_cache_extent(info->corrupt_blocks, 0);
9254 trans = btrfs_start_transaction(info->extent_root, 1);
9256 return PTR_ERR(trans);
9258 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9259 prune_one_block(trans, info, corrupt);
9260 remove_cache_extent(info->corrupt_blocks, cache);
9263 return btrfs_commit_transaction(trans, info->extent_root);
9267 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9269 struct btrfs_block_group_cache *cache;
9274 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9275 &start, &end, EXTENT_DIRTY);
9278 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9283 cache = btrfs_lookup_first_block_group(fs_info, start);
9288 start = cache->key.objectid + cache->key.offset;
9292 static int check_extent_refs(struct btrfs_root *root,
9293 struct cache_tree *extent_cache)
9295 struct extent_record *rec;
9296 struct cache_extent *cache;
9302 * if we're doing a repair, we have to make sure
9303 * we don't allocate from the problem extents.
9304 * In the worst case, this will be all the
9307 cache = search_cache_extent(extent_cache, 0);
9309 rec = container_of(cache, struct extent_record, cache);
9310 set_extent_dirty(root->fs_info->excluded_extents,
9312 rec->start + rec->max_size - 1);
9313 cache = next_cache_extent(cache);
9316 /* pin down all the corrupted blocks too */
9317 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9319 set_extent_dirty(root->fs_info->excluded_extents,
9321 cache->start + cache->size - 1);
9322 cache = next_cache_extent(cache);
9324 prune_corrupt_blocks(root->fs_info);
9325 reset_cached_block_groups(root->fs_info);
9328 reset_cached_block_groups(root->fs_info);
9331 * We need to delete any duplicate entries we find first otherwise we
9332 * could mess up the extent tree when we have backrefs that actually
9333 * belong to a different extent item and not the weird duplicate one.
9335 while (repair && !list_empty(&duplicate_extents)) {
9336 rec = to_extent_record(duplicate_extents.next);
9337 list_del_init(&rec->list);
9339 /* Sometimes we can find a backref before we find an actual
9340 * extent, so we need to process it a little bit to see if there
9341 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9342 * if this is a backref screwup. If we need to delete stuff
9343 * process_duplicates() will return 0, otherwise it will return
9346 if (process_duplicates(extent_cache, rec))
9348 ret = delete_duplicate_records(root, rec);
9352 * delete_duplicate_records will return the number of entries
9353 * deleted, so if it's greater than 0 then we know we actually
9354 * did something and we need to remove.
9367 cache = search_cache_extent(extent_cache, 0);
9370 rec = container_of(cache, struct extent_record, cache);
9371 if (rec->num_duplicates) {
9372 fprintf(stderr, "extent item %llu has multiple extent "
9373 "items\n", (unsigned long long)rec->start);
9377 if (rec->refs != rec->extent_item_refs) {
9378 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9379 (unsigned long long)rec->start,
9380 (unsigned long long)rec->nr);
9381 fprintf(stderr, "extent item %llu, found %llu\n",
9382 (unsigned long long)rec->extent_item_refs,
9383 (unsigned long long)rec->refs);
9384 ret = record_orphan_data_extents(root->fs_info, rec);
9390 if (all_backpointers_checked(rec, 1)) {
9391 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9392 (unsigned long long)rec->start,
9393 (unsigned long long)rec->nr);
9397 if (!rec->owner_ref_checked) {
9398 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9399 (unsigned long long)rec->start,
9400 (unsigned long long)rec->nr);
9405 if (repair && fix) {
9406 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9412 if (rec->bad_full_backref) {
9413 fprintf(stderr, "bad full backref, on [%llu]\n",
9414 (unsigned long long)rec->start);
9416 ret = fixup_extent_flags(root->fs_info, rec);
9424 * Although it's not a extent ref's problem, we reuse this
9425 * routine for error reporting.
9426 * No repair function yet.
9428 if (rec->crossing_stripes) {
9430 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9431 rec->start, rec->start + rec->max_size);
9435 if (rec->wrong_chunk_type) {
9437 "bad extent [%llu, %llu), type mismatch with chunk\n",
9438 rec->start, rec->start + rec->max_size);
9442 remove_cache_extent(extent_cache, cache);
9443 free_all_extent_backrefs(rec);
9444 if (!init_extent_tree && repair && (!cur_err || fix))
9445 clear_extent_dirty(root->fs_info->excluded_extents,
9447 rec->start + rec->max_size - 1);
9452 if (ret && ret != -EAGAIN) {
9453 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9456 struct btrfs_trans_handle *trans;
9458 root = root->fs_info->extent_root;
9459 trans = btrfs_start_transaction(root, 1);
9460 if (IS_ERR(trans)) {
9461 ret = PTR_ERR(trans);
9465 ret = btrfs_fix_block_accounting(trans, root);
9468 ret = btrfs_commit_transaction(trans, root);
9477 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9481 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9482 stripe_size = length;
9483 stripe_size /= num_stripes;
9484 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9485 stripe_size = length * 2;
9486 stripe_size /= num_stripes;
9487 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9488 stripe_size = length;
9489 stripe_size /= (num_stripes - 1);
9490 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9491 stripe_size = length;
9492 stripe_size /= (num_stripes - 2);
9494 stripe_size = length;
9500 * Check the chunk with its block group/dev list ref:
9501 * Return 0 if all refs seems valid.
9502 * Return 1 if part of refs seems valid, need later check for rebuild ref
9503 * like missing block group and needs to search extent tree to rebuild them.
9504 * Return -1 if essential refs are missing and unable to rebuild.
9506 static int check_chunk_refs(struct chunk_record *chunk_rec,
9507 struct block_group_tree *block_group_cache,
9508 struct device_extent_tree *dev_extent_cache,
9511 struct cache_extent *block_group_item;
9512 struct block_group_record *block_group_rec;
9513 struct cache_extent *dev_extent_item;
9514 struct device_extent_record *dev_extent_rec;
9518 int metadump_v2 = 0;
9522 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9525 if (block_group_item) {
9526 block_group_rec = container_of(block_group_item,
9527 struct block_group_record,
9529 if (chunk_rec->length != block_group_rec->offset ||
9530 chunk_rec->offset != block_group_rec->objectid ||
9532 chunk_rec->type_flags != block_group_rec->flags)) {
9535 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9536 chunk_rec->objectid,
9541 chunk_rec->type_flags,
9542 block_group_rec->objectid,
9543 block_group_rec->type,
9544 block_group_rec->offset,
9545 block_group_rec->offset,
9546 block_group_rec->objectid,
9547 block_group_rec->flags);
9550 list_del_init(&block_group_rec->list);
9551 chunk_rec->bg_rec = block_group_rec;
9556 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9557 chunk_rec->objectid,
9562 chunk_rec->type_flags);
9569 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9570 chunk_rec->num_stripes);
9571 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9572 devid = chunk_rec->stripes[i].devid;
9573 offset = chunk_rec->stripes[i].offset;
9574 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9575 devid, offset, length);
9576 if (dev_extent_item) {
9577 dev_extent_rec = container_of(dev_extent_item,
9578 struct device_extent_record,
9580 if (dev_extent_rec->objectid != devid ||
9581 dev_extent_rec->offset != offset ||
9582 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9583 dev_extent_rec->length != length) {
9586 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9587 chunk_rec->objectid,
9590 chunk_rec->stripes[i].devid,
9591 chunk_rec->stripes[i].offset,
9592 dev_extent_rec->objectid,
9593 dev_extent_rec->offset,
9594 dev_extent_rec->length);
9597 list_move(&dev_extent_rec->chunk_list,
9598 &chunk_rec->dextents);
9603 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9604 chunk_rec->objectid,
9607 chunk_rec->stripes[i].devid,
9608 chunk_rec->stripes[i].offset);
9615 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9616 int check_chunks(struct cache_tree *chunk_cache,
9617 struct block_group_tree *block_group_cache,
9618 struct device_extent_tree *dev_extent_cache,
9619 struct list_head *good, struct list_head *bad,
9620 struct list_head *rebuild, int silent)
9622 struct cache_extent *chunk_item;
9623 struct chunk_record *chunk_rec;
9624 struct block_group_record *bg_rec;
9625 struct device_extent_record *dext_rec;
9629 chunk_item = first_cache_extent(chunk_cache);
9630 while (chunk_item) {
9631 chunk_rec = container_of(chunk_item, struct chunk_record,
9633 err = check_chunk_refs(chunk_rec, block_group_cache,
9634 dev_extent_cache, silent);
9637 if (err == 0 && good)
9638 list_add_tail(&chunk_rec->list, good);
9639 if (err > 0 && rebuild)
9640 list_add_tail(&chunk_rec->list, rebuild);
9642 list_add_tail(&chunk_rec->list, bad);
9643 chunk_item = next_cache_extent(chunk_item);
9646 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9649 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9657 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9661 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9672 static int check_device_used(struct device_record *dev_rec,
9673 struct device_extent_tree *dext_cache)
9675 struct cache_extent *cache;
9676 struct device_extent_record *dev_extent_rec;
9679 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9681 dev_extent_rec = container_of(cache,
9682 struct device_extent_record,
9684 if (dev_extent_rec->objectid != dev_rec->devid)
9687 list_del_init(&dev_extent_rec->device_list);
9688 total_byte += dev_extent_rec->length;
9689 cache = next_cache_extent(cache);
9692 if (total_byte != dev_rec->byte_used) {
9694 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9695 total_byte, dev_rec->byte_used, dev_rec->objectid,
9696 dev_rec->type, dev_rec->offset);
9703 /* check btrfs_dev_item -> btrfs_dev_extent */
9704 static int check_devices(struct rb_root *dev_cache,
9705 struct device_extent_tree *dev_extent_cache)
9707 struct rb_node *dev_node;
9708 struct device_record *dev_rec;
9709 struct device_extent_record *dext_rec;
9713 dev_node = rb_first(dev_cache);
9715 dev_rec = container_of(dev_node, struct device_record, node);
9716 err = check_device_used(dev_rec, dev_extent_cache);
9720 dev_node = rb_next(dev_node);
9722 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9725 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9726 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9733 static int add_root_item_to_list(struct list_head *head,
9734 u64 objectid, u64 bytenr, u64 last_snapshot,
9735 u8 level, u8 drop_level,
9736 struct btrfs_key *drop_key)
9739 struct root_item_record *ri_rec;
9740 ri_rec = malloc(sizeof(*ri_rec));
9743 ri_rec->bytenr = bytenr;
9744 ri_rec->objectid = objectid;
9745 ri_rec->level = level;
9746 ri_rec->drop_level = drop_level;
9747 ri_rec->last_snapshot = last_snapshot;
9749 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9750 list_add_tail(&ri_rec->list, head);
9755 static void free_root_item_list(struct list_head *list)
9757 struct root_item_record *ri_rec;
9759 while (!list_empty(list)) {
9760 ri_rec = list_first_entry(list, struct root_item_record,
9762 list_del_init(&ri_rec->list);
9767 static int deal_root_from_list(struct list_head *list,
9768 struct btrfs_root *root,
9769 struct block_info *bits,
9771 struct cache_tree *pending,
9772 struct cache_tree *seen,
9773 struct cache_tree *reada,
9774 struct cache_tree *nodes,
9775 struct cache_tree *extent_cache,
9776 struct cache_tree *chunk_cache,
9777 struct rb_root *dev_cache,
9778 struct block_group_tree *block_group_cache,
9779 struct device_extent_tree *dev_extent_cache)
9784 while (!list_empty(list)) {
9785 struct root_item_record *rec;
9786 struct extent_buffer *buf;
9787 rec = list_entry(list->next,
9788 struct root_item_record, list);
9790 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9791 if (!extent_buffer_uptodate(buf)) {
9792 free_extent_buffer(buf);
9796 ret = add_root_to_pending(buf, extent_cache, pending,
9797 seen, nodes, rec->objectid);
9801 * To rebuild extent tree, we need deal with snapshot
9802 * one by one, otherwise we deal with node firstly which
9803 * can maximize readahead.
9806 ret = run_next_block(root, bits, bits_nr, &last,
9807 pending, seen, reada, nodes,
9808 extent_cache, chunk_cache,
9809 dev_cache, block_group_cache,
9810 dev_extent_cache, rec);
9814 free_extent_buffer(buf);
9815 list_del(&rec->list);
9821 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9822 reada, nodes, extent_cache, chunk_cache,
9823 dev_cache, block_group_cache,
9824 dev_extent_cache, NULL);
9834 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
9836 struct rb_root dev_cache;
9837 struct cache_tree chunk_cache;
9838 struct block_group_tree block_group_cache;
9839 struct device_extent_tree dev_extent_cache;
9840 struct cache_tree extent_cache;
9841 struct cache_tree seen;
9842 struct cache_tree pending;
9843 struct cache_tree reada;
9844 struct cache_tree nodes;
9845 struct extent_io_tree excluded_extents;
9846 struct cache_tree corrupt_blocks;
9847 struct btrfs_path path;
9848 struct btrfs_key key;
9849 struct btrfs_key found_key;
9851 struct block_info *bits;
9853 struct extent_buffer *leaf;
9855 struct btrfs_root_item ri;
9856 struct list_head dropping_trees;
9857 struct list_head normal_trees;
9858 struct btrfs_root *root1;
9859 struct btrfs_root *root;
9863 root = fs_info->fs_root;
9864 dev_cache = RB_ROOT;
9865 cache_tree_init(&chunk_cache);
9866 block_group_tree_init(&block_group_cache);
9867 device_extent_tree_init(&dev_extent_cache);
9869 cache_tree_init(&extent_cache);
9870 cache_tree_init(&seen);
9871 cache_tree_init(&pending);
9872 cache_tree_init(&nodes);
9873 cache_tree_init(&reada);
9874 cache_tree_init(&corrupt_blocks);
9875 extent_io_tree_init(&excluded_extents);
9876 INIT_LIST_HEAD(&dropping_trees);
9877 INIT_LIST_HEAD(&normal_trees);
9880 fs_info->excluded_extents = &excluded_extents;
9881 fs_info->fsck_extent_cache = &extent_cache;
9882 fs_info->free_extent_hook = free_extent_hook;
9883 fs_info->corrupt_blocks = &corrupt_blocks;
9887 bits = malloc(bits_nr * sizeof(struct block_info));
9893 if (ctx.progress_enabled) {
9894 ctx.tp = TASK_EXTENTS;
9895 task_start(ctx.info);
9899 root1 = fs_info->tree_root;
9900 level = btrfs_header_level(root1->node);
9901 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9902 root1->node->start, 0, level, 0, NULL);
9905 root1 = fs_info->chunk_root;
9906 level = btrfs_header_level(root1->node);
9907 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9908 root1->node->start, 0, level, 0, NULL);
9911 btrfs_init_path(&path);
9914 key.type = BTRFS_ROOT_ITEM_KEY;
9915 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
9919 leaf = path.nodes[0];
9920 slot = path.slots[0];
9921 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9922 ret = btrfs_next_leaf(root, &path);
9925 leaf = path.nodes[0];
9926 slot = path.slots[0];
9928 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9929 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9930 unsigned long offset;
9933 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9934 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9935 last_snapshot = btrfs_root_last_snapshot(&ri);
9936 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9937 level = btrfs_root_level(&ri);
9938 ret = add_root_item_to_list(&normal_trees,
9940 btrfs_root_bytenr(&ri),
9941 last_snapshot, level,
9946 level = btrfs_root_level(&ri);
9947 objectid = found_key.objectid;
9948 btrfs_disk_key_to_cpu(&found_key,
9950 ret = add_root_item_to_list(&dropping_trees,
9952 btrfs_root_bytenr(&ri),
9953 last_snapshot, level,
9954 ri.drop_level, &found_key);
9961 btrfs_release_path(&path);
9964 * check_block can return -EAGAIN if it fixes something, please keep
9965 * this in mind when dealing with return values from these functions, if
9966 * we get -EAGAIN we want to fall through and restart the loop.
9968 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9969 &seen, &reada, &nodes, &extent_cache,
9970 &chunk_cache, &dev_cache, &block_group_cache,
9977 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9978 &pending, &seen, &reada, &nodes,
9979 &extent_cache, &chunk_cache, &dev_cache,
9980 &block_group_cache, &dev_extent_cache);
9987 ret = check_chunks(&chunk_cache, &block_group_cache,
9988 &dev_extent_cache, NULL, NULL, NULL, 0);
9995 ret = check_extent_refs(root, &extent_cache);
10002 ret = check_devices(&dev_cache, &dev_extent_cache);
10007 task_stop(ctx.info);
10009 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10010 extent_io_tree_cleanup(&excluded_extents);
10011 fs_info->fsck_extent_cache = NULL;
10012 fs_info->free_extent_hook = NULL;
10013 fs_info->corrupt_blocks = NULL;
10014 fs_info->excluded_extents = NULL;
10017 free_chunk_cache_tree(&chunk_cache);
10018 free_device_cache_tree(&dev_cache);
10019 free_block_group_tree(&block_group_cache);
10020 free_device_extent_tree(&dev_extent_cache);
10021 free_extent_cache_tree(&seen);
10022 free_extent_cache_tree(&pending);
10023 free_extent_cache_tree(&reada);
10024 free_extent_cache_tree(&nodes);
10025 free_root_item_list(&normal_trees);
10026 free_root_item_list(&dropping_trees);
10029 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10030 free_extent_cache_tree(&seen);
10031 free_extent_cache_tree(&pending);
10032 free_extent_cache_tree(&reada);
10033 free_extent_cache_tree(&nodes);
10034 free_chunk_cache_tree(&chunk_cache);
10035 free_block_group_tree(&block_group_cache);
10036 free_device_cache_tree(&dev_cache);
10037 free_device_extent_tree(&dev_extent_cache);
10038 free_extent_record_cache(&extent_cache);
10039 free_root_item_list(&normal_trees);
10040 free_root_item_list(&dropping_trees);
10041 extent_io_tree_cleanup(&excluded_extents);
10046 * Check backrefs of a tree block given by @bytenr or @eb.
10048 * @root: the root containing the @bytenr or @eb
10049 * @eb: tree block extent buffer, can be NULL
10050 * @bytenr: bytenr of the tree block to search
10051 * @level: tree level of the tree block
10052 * @owner: owner of the tree block
10054 * Return >0 for any error found and output error message
10055 * Return 0 for no error found
10057 static int check_tree_block_ref(struct btrfs_root *root,
10058 struct extent_buffer *eb, u64 bytenr,
10059 int level, u64 owner)
10061 struct btrfs_key key;
10062 struct btrfs_root *extent_root = root->fs_info->extent_root;
10063 struct btrfs_path path;
10064 struct btrfs_extent_item *ei;
10065 struct btrfs_extent_inline_ref *iref;
10066 struct extent_buffer *leaf;
10072 u32 nodesize = root->fs_info->nodesize;
10075 int tree_reloc_root = 0;
10080 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10081 btrfs_header_bytenr(root->node) == bytenr)
10082 tree_reloc_root = 1;
10084 btrfs_init_path(&path);
10085 key.objectid = bytenr;
10086 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10087 key.type = BTRFS_METADATA_ITEM_KEY;
10089 key.type = BTRFS_EXTENT_ITEM_KEY;
10090 key.offset = (u64)-1;
10092 /* Search for the backref in extent tree */
10093 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10095 err |= BACKREF_MISSING;
10098 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10100 err |= BACKREF_MISSING;
10104 leaf = path.nodes[0];
10105 slot = path.slots[0];
10106 btrfs_item_key_to_cpu(leaf, &key, slot);
10108 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10110 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10111 skinny_level = (int)key.offset;
10112 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10114 struct btrfs_tree_block_info *info;
10116 info = (struct btrfs_tree_block_info *)(ei + 1);
10117 skinny_level = btrfs_tree_block_level(leaf, info);
10118 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10125 if (!(btrfs_extent_flags(leaf, ei) &
10126 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10128 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10129 key.objectid, nodesize,
10130 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10131 err = BACKREF_MISMATCH;
10133 header_gen = btrfs_header_generation(eb);
10134 extent_gen = btrfs_extent_generation(leaf, ei);
10135 if (header_gen != extent_gen) {
10137 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10138 key.objectid, nodesize, header_gen,
10140 err = BACKREF_MISMATCH;
10142 if (level != skinny_level) {
10144 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10145 key.objectid, nodesize, level, skinny_level);
10146 err = BACKREF_MISMATCH;
10148 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10150 "extent[%llu %u] is referred by other roots than %llu",
10151 key.objectid, nodesize, root->objectid);
10152 err = BACKREF_MISMATCH;
10157 * Iterate the extent/metadata item to find the exact backref
10159 item_size = btrfs_item_size_nr(leaf, slot);
10160 ptr = (unsigned long)iref;
10161 end = (unsigned long)ei + item_size;
10162 while (ptr < end) {
10163 iref = (struct btrfs_extent_inline_ref *)ptr;
10164 type = btrfs_extent_inline_ref_type(leaf, iref);
10165 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10167 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10168 (offset == root->objectid || offset == owner)) {
10170 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10172 * Backref of tree reloc root points to itself, no need
10173 * to check backref any more.
10175 if (tree_reloc_root)
10178 /* Check if the backref points to valid referencer */
10179 found_ref = !check_tree_block_ref(root, NULL,
10180 offset, level + 1, owner);
10185 ptr += btrfs_extent_inline_ref_size(type);
10189 * Inlined extent item doesn't have what we need, check
10190 * TREE_BLOCK_REF_KEY
10193 btrfs_release_path(&path);
10194 key.objectid = bytenr;
10195 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10196 key.offset = root->objectid;
10198 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10203 err |= BACKREF_MISSING;
10205 btrfs_release_path(&path);
10206 if (eb && (err & BACKREF_MISSING))
10207 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10208 bytenr, nodesize, owner, level);
10213 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10215 * Return >0 any error found and output error message
10216 * Return 0 for no error found
10218 static int check_extent_data_item(struct btrfs_root *root,
10219 struct extent_buffer *eb, int slot)
10221 struct btrfs_file_extent_item *fi;
10222 struct btrfs_path path;
10223 struct btrfs_root *extent_root = root->fs_info->extent_root;
10224 struct btrfs_key fi_key;
10225 struct btrfs_key dbref_key;
10226 struct extent_buffer *leaf;
10227 struct btrfs_extent_item *ei;
10228 struct btrfs_extent_inline_ref *iref;
10229 struct btrfs_extent_data_ref *dref;
10232 u64 disk_num_bytes;
10233 u64 extent_num_bytes;
10240 int found_dbackref = 0;
10244 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10245 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10247 /* Nothing to check for hole and inline data extents */
10248 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10249 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10252 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10253 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10254 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10256 /* Check unaligned disk_num_bytes and num_bytes */
10257 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10259 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10260 fi_key.objectid, fi_key.offset, disk_num_bytes,
10261 root->fs_info->sectorsize);
10262 err |= BYTES_UNALIGNED;
10264 data_bytes_allocated += disk_num_bytes;
10266 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10268 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10269 fi_key.objectid, fi_key.offset, extent_num_bytes,
10270 root->fs_info->sectorsize);
10271 err |= BYTES_UNALIGNED;
10273 data_bytes_referenced += extent_num_bytes;
10275 owner = btrfs_header_owner(eb);
10277 /* Check the extent item of the file extent in extent tree */
10278 btrfs_init_path(&path);
10279 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10280 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10281 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10283 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10287 leaf = path.nodes[0];
10288 slot = path.slots[0];
10289 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10291 extent_flags = btrfs_extent_flags(leaf, ei);
10293 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10295 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10296 disk_bytenr, disk_num_bytes,
10297 BTRFS_EXTENT_FLAG_DATA);
10298 err |= BACKREF_MISMATCH;
10301 /* Check data backref inside that extent item */
10302 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10303 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10304 ptr = (unsigned long)iref;
10305 end = (unsigned long)ei + item_size;
10306 while (ptr < end) {
10307 iref = (struct btrfs_extent_inline_ref *)ptr;
10308 type = btrfs_extent_inline_ref_type(leaf, iref);
10309 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10311 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10312 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10313 if (ref_root == owner || ref_root == root->objectid)
10314 found_dbackref = 1;
10315 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10316 found_dbackref = !check_tree_block_ref(root, NULL,
10317 btrfs_extent_inline_ref_offset(leaf, iref),
10321 if (found_dbackref)
10323 ptr += btrfs_extent_inline_ref_size(type);
10326 if (!found_dbackref) {
10327 btrfs_release_path(&path);
10329 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10330 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10331 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10332 dbref_key.offset = hash_extent_data_ref(root->objectid,
10333 fi_key.objectid, fi_key.offset);
10335 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10336 &dbref_key, &path, 0, 0);
10338 found_dbackref = 1;
10342 btrfs_release_path(&path);
10345 * Neither inlined nor EXTENT_DATA_REF found, try
10346 * SHARED_DATA_REF as last chance.
10348 dbref_key.objectid = disk_bytenr;
10349 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10350 dbref_key.offset = eb->start;
10352 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10353 &dbref_key, &path, 0, 0);
10355 found_dbackref = 1;
10361 if (!found_dbackref)
10362 err |= BACKREF_MISSING;
10363 btrfs_release_path(&path);
10364 if (err & BACKREF_MISSING) {
10365 error("data extent[%llu %llu] backref lost",
10366 disk_bytenr, disk_num_bytes);
10372 * Get real tree block level for the case like shared block
10373 * Return >= 0 as tree level
10374 * Return <0 for error
10376 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10378 struct extent_buffer *eb;
10379 struct btrfs_path path;
10380 struct btrfs_key key;
10381 struct btrfs_extent_item *ei;
10388 /* Search extent tree for extent generation and level */
10389 key.objectid = bytenr;
10390 key.type = BTRFS_METADATA_ITEM_KEY;
10391 key.offset = (u64)-1;
10393 btrfs_init_path(&path);
10394 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10397 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10405 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10406 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10407 struct btrfs_extent_item);
10408 flags = btrfs_extent_flags(path.nodes[0], ei);
10409 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10414 /* Get transid for later read_tree_block() check */
10415 transid = btrfs_extent_generation(path.nodes[0], ei);
10417 /* Get backref level as one source */
10418 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10419 backref_level = key.offset;
10421 struct btrfs_tree_block_info *info;
10423 info = (struct btrfs_tree_block_info *)(ei + 1);
10424 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10426 btrfs_release_path(&path);
10428 /* Get level from tree block as an alternative source */
10429 eb = read_tree_block(fs_info, bytenr, transid);
10430 if (!extent_buffer_uptodate(eb)) {
10431 free_extent_buffer(eb);
10434 header_level = btrfs_header_level(eb);
10435 free_extent_buffer(eb);
10437 if (header_level != backref_level)
10439 return header_level;
10442 btrfs_release_path(&path);
10447 * Check if a tree block backref is valid (points to a valid tree block)
10448 * if level == -1, level will be resolved
10449 * Return >0 for any error found and print error message
10451 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10452 u64 bytenr, int level)
10454 struct btrfs_root *root;
10455 struct btrfs_key key;
10456 struct btrfs_path path;
10457 struct extent_buffer *eb;
10458 struct extent_buffer *node;
10459 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10463 /* Query level for level == -1 special case */
10465 level = query_tree_block_level(fs_info, bytenr);
10467 err |= REFERENCER_MISSING;
10471 key.objectid = root_id;
10472 key.type = BTRFS_ROOT_ITEM_KEY;
10473 key.offset = (u64)-1;
10475 root = btrfs_read_fs_root(fs_info, &key);
10476 if (IS_ERR(root)) {
10477 err |= REFERENCER_MISSING;
10481 /* Read out the tree block to get item/node key */
10482 eb = read_tree_block(fs_info, bytenr, 0);
10483 if (!extent_buffer_uptodate(eb)) {
10484 err |= REFERENCER_MISSING;
10485 free_extent_buffer(eb);
10489 /* Empty tree, no need to check key */
10490 if (!btrfs_header_nritems(eb) && !level) {
10491 free_extent_buffer(eb);
10496 btrfs_node_key_to_cpu(eb, &key, 0);
10498 btrfs_item_key_to_cpu(eb, &key, 0);
10500 free_extent_buffer(eb);
10502 btrfs_init_path(&path);
10503 path.lowest_level = level;
10504 /* Search with the first key, to ensure we can reach it */
10505 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10507 err |= REFERENCER_MISSING;
10511 node = path.nodes[level];
10512 if (btrfs_header_bytenr(node) != bytenr) {
10514 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10515 bytenr, nodesize, bytenr,
10516 btrfs_header_bytenr(node));
10517 err |= REFERENCER_MISMATCH;
10519 if (btrfs_header_level(node) != level) {
10521 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10522 bytenr, nodesize, level,
10523 btrfs_header_level(node));
10524 err |= REFERENCER_MISMATCH;
10528 btrfs_release_path(&path);
10530 if (err & REFERENCER_MISSING) {
10532 error("extent [%llu %d] lost referencer (owner: %llu)",
10533 bytenr, nodesize, root_id);
10536 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10537 bytenr, nodesize, root_id, level);
10544 * Check if tree block @eb is tree reloc root.
10545 * Return 0 if it's not or any problem happens
10546 * Return 1 if it's a tree reloc root
10548 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10549 struct extent_buffer *eb)
10551 struct btrfs_root *tree_reloc_root;
10552 struct btrfs_key key;
10553 u64 bytenr = btrfs_header_bytenr(eb);
10554 u64 owner = btrfs_header_owner(eb);
10557 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10558 key.offset = owner;
10559 key.type = BTRFS_ROOT_ITEM_KEY;
10561 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10562 if (IS_ERR(tree_reloc_root))
10565 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10567 btrfs_free_fs_root(tree_reloc_root);
10572 * Check referencer for shared block backref
10573 * If level == -1, this function will resolve the level.
10575 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10576 u64 parent, u64 bytenr, int level)
10578 struct extent_buffer *eb;
10580 int found_parent = 0;
10583 eb = read_tree_block(fs_info, parent, 0);
10584 if (!extent_buffer_uptodate(eb))
10588 level = query_tree_block_level(fs_info, bytenr);
10592 /* It's possible it's a tree reloc root */
10593 if (parent == bytenr) {
10594 if (is_tree_reloc_root(fs_info, eb))
10599 if (level + 1 != btrfs_header_level(eb))
10602 nr = btrfs_header_nritems(eb);
10603 for (i = 0; i < nr; i++) {
10604 if (bytenr == btrfs_node_blockptr(eb, i)) {
10610 free_extent_buffer(eb);
10611 if (!found_parent) {
10613 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10614 bytenr, fs_info->nodesize, parent, level);
10615 return REFERENCER_MISSING;
10621 * Check referencer for normal (inlined) data ref
10622 * If len == 0, it will be resolved by searching in extent tree
10624 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10625 u64 root_id, u64 objectid, u64 offset,
10626 u64 bytenr, u64 len, u32 count)
10628 struct btrfs_root *root;
10629 struct btrfs_root *extent_root = fs_info->extent_root;
10630 struct btrfs_key key;
10631 struct btrfs_path path;
10632 struct extent_buffer *leaf;
10633 struct btrfs_file_extent_item *fi;
10634 u32 found_count = 0;
10639 key.objectid = bytenr;
10640 key.type = BTRFS_EXTENT_ITEM_KEY;
10641 key.offset = (u64)-1;
10643 btrfs_init_path(&path);
10644 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10647 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10650 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10651 if (key.objectid != bytenr ||
10652 key.type != BTRFS_EXTENT_ITEM_KEY)
10655 btrfs_release_path(&path);
10657 key.objectid = root_id;
10658 key.type = BTRFS_ROOT_ITEM_KEY;
10659 key.offset = (u64)-1;
10660 btrfs_init_path(&path);
10662 root = btrfs_read_fs_root(fs_info, &key);
10666 key.objectid = objectid;
10667 key.type = BTRFS_EXTENT_DATA_KEY;
10669 * It can be nasty as data backref offset is
10670 * file offset - file extent offset, which is smaller or
10671 * equal to original backref offset. The only special case is
10672 * overflow. So we need to special check and do further search.
10674 key.offset = offset & (1ULL << 63) ? 0 : offset;
10676 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10681 * Search afterwards to get correct one
10682 * NOTE: As we must do a comprehensive check on the data backref to
10683 * make sure the dref count also matches, we must iterate all file
10684 * extents for that inode.
10687 leaf = path.nodes[0];
10688 slot = path.slots[0];
10690 if (slot >= btrfs_header_nritems(leaf))
10692 btrfs_item_key_to_cpu(leaf, &key, slot);
10693 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10695 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10697 * Except normal disk bytenr and disk num bytes, we still
10698 * need to do extra check on dbackref offset as
10699 * dbackref offset = file_offset - file_extent_offset
10701 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10702 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10703 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10708 ret = btrfs_next_item(root, &path);
10713 btrfs_release_path(&path);
10714 if (found_count != count) {
10716 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10717 bytenr, len, root_id, objectid, offset, count, found_count);
10718 return REFERENCER_MISSING;
10724 * Check if the referencer of a shared data backref exists
10726 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10727 u64 parent, u64 bytenr)
10729 struct extent_buffer *eb;
10730 struct btrfs_key key;
10731 struct btrfs_file_extent_item *fi;
10733 int found_parent = 0;
10736 eb = read_tree_block(fs_info, parent, 0);
10737 if (!extent_buffer_uptodate(eb))
10740 nr = btrfs_header_nritems(eb);
10741 for (i = 0; i < nr; i++) {
10742 btrfs_item_key_to_cpu(eb, &key, i);
10743 if (key.type != BTRFS_EXTENT_DATA_KEY)
10746 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10747 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10750 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10757 free_extent_buffer(eb);
10758 if (!found_parent) {
10759 error("shared extent %llu referencer lost (parent: %llu)",
10761 return REFERENCER_MISSING;
10767 * This function will check a given extent item, including its backref and
10768 * itself (like crossing stripe boundary and type)
10770 * Since we don't use extent_record anymore, introduce new error bit
10772 static int check_extent_item(struct btrfs_fs_info *fs_info,
10773 struct extent_buffer *eb, int slot)
10775 struct btrfs_extent_item *ei;
10776 struct btrfs_extent_inline_ref *iref;
10777 struct btrfs_extent_data_ref *dref;
10781 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10782 u32 item_size = btrfs_item_size_nr(eb, slot);
10787 struct btrfs_key key;
10791 btrfs_item_key_to_cpu(eb, &key, slot);
10792 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10793 bytes_used += key.offset;
10795 bytes_used += nodesize;
10797 if (item_size < sizeof(*ei)) {
10799 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10800 * old thing when on disk format is still un-determined.
10801 * No need to care about it anymore
10803 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10807 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10808 flags = btrfs_extent_flags(eb, ei);
10810 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10812 if (metadata && check_crossing_stripes(global_info, key.objectid,
10814 error("bad metadata [%llu, %llu) crossing stripe boundary",
10815 key.objectid, key.objectid + nodesize);
10816 err |= CROSSING_STRIPE_BOUNDARY;
10819 ptr = (unsigned long)(ei + 1);
10821 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10822 /* Old EXTENT_ITEM metadata */
10823 struct btrfs_tree_block_info *info;
10825 info = (struct btrfs_tree_block_info *)ptr;
10826 level = btrfs_tree_block_level(eb, info);
10827 ptr += sizeof(struct btrfs_tree_block_info);
10829 /* New METADATA_ITEM */
10830 level = key.offset;
10832 end = (unsigned long)ei + item_size;
10835 /* Reached extent item end normally */
10839 /* Beyond extent item end, wrong item size */
10841 err |= ITEM_SIZE_MISMATCH;
10842 error("extent item at bytenr %llu slot %d has wrong size",
10847 /* Now check every backref in this extent item */
10848 iref = (struct btrfs_extent_inline_ref *)ptr;
10849 type = btrfs_extent_inline_ref_type(eb, iref);
10850 offset = btrfs_extent_inline_ref_offset(eb, iref);
10852 case BTRFS_TREE_BLOCK_REF_KEY:
10853 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10857 case BTRFS_SHARED_BLOCK_REF_KEY:
10858 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10862 case BTRFS_EXTENT_DATA_REF_KEY:
10863 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10864 ret = check_extent_data_backref(fs_info,
10865 btrfs_extent_data_ref_root(eb, dref),
10866 btrfs_extent_data_ref_objectid(eb, dref),
10867 btrfs_extent_data_ref_offset(eb, dref),
10868 key.objectid, key.offset,
10869 btrfs_extent_data_ref_count(eb, dref));
10872 case BTRFS_SHARED_DATA_REF_KEY:
10873 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10877 error("extent[%llu %d %llu] has unknown ref type: %d",
10878 key.objectid, key.type, key.offset, type);
10879 err |= UNKNOWN_TYPE;
10883 ptr += btrfs_extent_inline_ref_size(type);
10891 * Check if a dev extent item is referred correctly by its chunk
10893 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10894 struct extent_buffer *eb, int slot)
10896 struct btrfs_root *chunk_root = fs_info->chunk_root;
10897 struct btrfs_dev_extent *ptr;
10898 struct btrfs_path path;
10899 struct btrfs_key chunk_key;
10900 struct btrfs_key devext_key;
10901 struct btrfs_chunk *chunk;
10902 struct extent_buffer *l;
10906 int found_chunk = 0;
10909 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10910 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10911 length = btrfs_dev_extent_length(eb, ptr);
10913 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10914 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10915 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10917 btrfs_init_path(&path);
10918 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10923 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10924 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10929 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10932 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10933 for (i = 0; i < num_stripes; i++) {
10934 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10935 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10937 if (devid == devext_key.objectid &&
10938 offset == devext_key.offset) {
10944 btrfs_release_path(&path);
10945 if (!found_chunk) {
10947 "device extent[%llu, %llu, %llu] did not find the related chunk",
10948 devext_key.objectid, devext_key.offset, length);
10949 return REFERENCER_MISSING;
10955 * Check if the used space is correct with the dev item
10957 static int check_dev_item(struct btrfs_fs_info *fs_info,
10958 struct extent_buffer *eb, int slot)
10960 struct btrfs_root *dev_root = fs_info->dev_root;
10961 struct btrfs_dev_item *dev_item;
10962 struct btrfs_path path;
10963 struct btrfs_key key;
10964 struct btrfs_dev_extent *ptr;
10970 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10971 dev_id = btrfs_device_id(eb, dev_item);
10972 used = btrfs_device_bytes_used(eb, dev_item);
10974 key.objectid = dev_id;
10975 key.type = BTRFS_DEV_EXTENT_KEY;
10978 btrfs_init_path(&path);
10979 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10981 btrfs_item_key_to_cpu(eb, &key, slot);
10982 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10983 key.objectid, key.type, key.offset);
10984 btrfs_release_path(&path);
10985 return REFERENCER_MISSING;
10988 /* Iterate dev_extents to calculate the used space of a device */
10990 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10993 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10994 if (key.objectid > dev_id)
10996 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10999 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11000 struct btrfs_dev_extent);
11001 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11003 ret = btrfs_next_item(dev_root, &path);
11007 btrfs_release_path(&path);
11009 if (used != total) {
11010 btrfs_item_key_to_cpu(eb, &key, slot);
11012 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11013 total, used, BTRFS_ROOT_TREE_OBJECTID,
11014 BTRFS_DEV_EXTENT_KEY, dev_id);
11015 return ACCOUNTING_MISMATCH;
11021 * Check a block group item with its referener (chunk) and its used space
11022 * with extent/metadata item
11024 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11025 struct extent_buffer *eb, int slot)
11027 struct btrfs_root *extent_root = fs_info->extent_root;
11028 struct btrfs_root *chunk_root = fs_info->chunk_root;
11029 struct btrfs_block_group_item *bi;
11030 struct btrfs_block_group_item bg_item;
11031 struct btrfs_path path;
11032 struct btrfs_key bg_key;
11033 struct btrfs_key chunk_key;
11034 struct btrfs_key extent_key;
11035 struct btrfs_chunk *chunk;
11036 struct extent_buffer *leaf;
11037 struct btrfs_extent_item *ei;
11038 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11046 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11047 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11048 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11049 used = btrfs_block_group_used(&bg_item);
11050 bg_flags = btrfs_block_group_flags(&bg_item);
11052 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11053 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11054 chunk_key.offset = bg_key.objectid;
11056 btrfs_init_path(&path);
11057 /* Search for the referencer chunk */
11058 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11061 "block group[%llu %llu] did not find the related chunk item",
11062 bg_key.objectid, bg_key.offset);
11063 err |= REFERENCER_MISSING;
11065 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11066 struct btrfs_chunk);
11067 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11070 "block group[%llu %llu] related chunk item length does not match",
11071 bg_key.objectid, bg_key.offset);
11072 err |= REFERENCER_MISMATCH;
11075 btrfs_release_path(&path);
11077 /* Search from the block group bytenr */
11078 extent_key.objectid = bg_key.objectid;
11079 extent_key.type = 0;
11080 extent_key.offset = 0;
11082 btrfs_init_path(&path);
11083 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11087 /* Iterate extent tree to account used space */
11089 leaf = path.nodes[0];
11091 /* Search slot can point to the last item beyond leaf nritems */
11092 if (path.slots[0] >= btrfs_header_nritems(leaf))
11095 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11096 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11099 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11100 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11102 if (extent_key.objectid < bg_key.objectid)
11105 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11108 total += extent_key.offset;
11110 ei = btrfs_item_ptr(leaf, path.slots[0],
11111 struct btrfs_extent_item);
11112 flags = btrfs_extent_flags(leaf, ei);
11113 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11114 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11116 "bad extent[%llu, %llu) type mismatch with chunk",
11117 extent_key.objectid,
11118 extent_key.objectid + extent_key.offset);
11119 err |= CHUNK_TYPE_MISMATCH;
11121 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11122 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11123 BTRFS_BLOCK_GROUP_METADATA))) {
11125 "bad extent[%llu, %llu) type mismatch with chunk",
11126 extent_key.objectid,
11127 extent_key.objectid + nodesize);
11128 err |= CHUNK_TYPE_MISMATCH;
11132 ret = btrfs_next_item(extent_root, &path);
11138 btrfs_release_path(&path);
11140 if (total != used) {
11142 "block group[%llu %llu] used %llu but extent items used %llu",
11143 bg_key.objectid, bg_key.offset, used, total);
11144 err |= ACCOUNTING_MISMATCH;
11150 * Check a chunk item.
11151 * Including checking all referred dev_extents and block group
11153 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11154 struct extent_buffer *eb, int slot)
11156 struct btrfs_root *extent_root = fs_info->extent_root;
11157 struct btrfs_root *dev_root = fs_info->dev_root;
11158 struct btrfs_path path;
11159 struct btrfs_key chunk_key;
11160 struct btrfs_key bg_key;
11161 struct btrfs_key devext_key;
11162 struct btrfs_chunk *chunk;
11163 struct extent_buffer *leaf;
11164 struct btrfs_block_group_item *bi;
11165 struct btrfs_block_group_item bg_item;
11166 struct btrfs_dev_extent *ptr;
11178 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11179 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11180 length = btrfs_chunk_length(eb, chunk);
11181 chunk_end = chunk_key.offset + length;
11182 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11185 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11187 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11190 type = btrfs_chunk_type(eb, chunk);
11192 bg_key.objectid = chunk_key.offset;
11193 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11194 bg_key.offset = length;
11196 btrfs_init_path(&path);
11197 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11200 "chunk[%llu %llu) did not find the related block group item",
11201 chunk_key.offset, chunk_end);
11202 err |= REFERENCER_MISSING;
11204 leaf = path.nodes[0];
11205 bi = btrfs_item_ptr(leaf, path.slots[0],
11206 struct btrfs_block_group_item);
11207 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11209 if (btrfs_block_group_flags(&bg_item) != type) {
11211 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11212 chunk_key.offset, chunk_end, type,
11213 btrfs_block_group_flags(&bg_item));
11214 err |= REFERENCER_MISSING;
11218 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11219 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11220 for (i = 0; i < num_stripes; i++) {
11221 btrfs_release_path(&path);
11222 btrfs_init_path(&path);
11223 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11224 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11225 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11227 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11230 goto not_match_dev;
11232 leaf = path.nodes[0];
11233 ptr = btrfs_item_ptr(leaf, path.slots[0],
11234 struct btrfs_dev_extent);
11235 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11236 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11237 if (objectid != chunk_key.objectid ||
11238 offset != chunk_key.offset ||
11239 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11240 goto not_match_dev;
11243 err |= BACKREF_MISSING;
11245 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11246 chunk_key.objectid, chunk_end, i);
11249 btrfs_release_path(&path);
11255 * Main entry function to check known items and update related accounting info
11257 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11259 struct btrfs_fs_info *fs_info = root->fs_info;
11260 struct btrfs_key key;
11263 struct btrfs_extent_data_ref *dref;
11268 btrfs_item_key_to_cpu(eb, &key, slot);
11272 case BTRFS_EXTENT_DATA_KEY:
11273 ret = check_extent_data_item(root, eb, slot);
11276 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11277 ret = check_block_group_item(fs_info, eb, slot);
11280 case BTRFS_DEV_ITEM_KEY:
11281 ret = check_dev_item(fs_info, eb, slot);
11284 case BTRFS_CHUNK_ITEM_KEY:
11285 ret = check_chunk_item(fs_info, eb, slot);
11288 case BTRFS_DEV_EXTENT_KEY:
11289 ret = check_dev_extent_item(fs_info, eb, slot);
11292 case BTRFS_EXTENT_ITEM_KEY:
11293 case BTRFS_METADATA_ITEM_KEY:
11294 ret = check_extent_item(fs_info, eb, slot);
11297 case BTRFS_EXTENT_CSUM_KEY:
11298 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11300 case BTRFS_TREE_BLOCK_REF_KEY:
11301 ret = check_tree_block_backref(fs_info, key.offset,
11305 case BTRFS_EXTENT_DATA_REF_KEY:
11306 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11307 ret = check_extent_data_backref(fs_info,
11308 btrfs_extent_data_ref_root(eb, dref),
11309 btrfs_extent_data_ref_objectid(eb, dref),
11310 btrfs_extent_data_ref_offset(eb, dref),
11312 btrfs_extent_data_ref_count(eb, dref));
11315 case BTRFS_SHARED_BLOCK_REF_KEY:
11316 ret = check_shared_block_backref(fs_info, key.offset,
11320 case BTRFS_SHARED_DATA_REF_KEY:
11321 ret = check_shared_data_backref(fs_info, key.offset,
11329 if (++slot < btrfs_header_nritems(eb))
11336 * Helper function for later fs/subvol tree check. To determine if a tree
11337 * block should be checked.
11338 * This function will ensure only the direct referencer with lowest rootid to
11339 * check a fs/subvolume tree block.
11341 * Backref check at extent tree would detect errors like missing subvolume
11342 * tree, so we can do aggressive check to reduce duplicated checks.
11344 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11346 struct btrfs_root *extent_root = root->fs_info->extent_root;
11347 struct btrfs_key key;
11348 struct btrfs_path path;
11349 struct extent_buffer *leaf;
11351 struct btrfs_extent_item *ei;
11357 struct btrfs_extent_inline_ref *iref;
11360 btrfs_init_path(&path);
11361 key.objectid = btrfs_header_bytenr(eb);
11362 key.type = BTRFS_METADATA_ITEM_KEY;
11363 key.offset = (u64)-1;
11366 * Any failure in backref resolving means we can't determine
11367 * whom the tree block belongs to.
11368 * So in that case, we need to check that tree block
11370 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11374 ret = btrfs_previous_extent_item(extent_root, &path,
11375 btrfs_header_bytenr(eb));
11379 leaf = path.nodes[0];
11380 slot = path.slots[0];
11381 btrfs_item_key_to_cpu(leaf, &key, slot);
11382 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11384 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11385 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11387 struct btrfs_tree_block_info *info;
11389 info = (struct btrfs_tree_block_info *)(ei + 1);
11390 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11393 item_size = btrfs_item_size_nr(leaf, slot);
11394 ptr = (unsigned long)iref;
11395 end = (unsigned long)ei + item_size;
11396 while (ptr < end) {
11397 iref = (struct btrfs_extent_inline_ref *)ptr;
11398 type = btrfs_extent_inline_ref_type(leaf, iref);
11399 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11402 * We only check the tree block if current root is
11403 * the lowest referencer of it.
11405 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11406 offset < root->objectid) {
11407 btrfs_release_path(&path);
11411 ptr += btrfs_extent_inline_ref_size(type);
11414 * Normally we should also check keyed tree block ref, but that may be
11415 * very time consuming. Inlined ref should already make us skip a lot
11416 * of refs now. So skip search keyed tree block ref.
11420 btrfs_release_path(&path);
11425 * Traversal function for tree block. We will do:
11426 * 1) Skip shared fs/subvolume tree blocks
11427 * 2) Update related bytes accounting
11428 * 3) Pre-order traversal
11430 static int traverse_tree_block(struct btrfs_root *root,
11431 struct extent_buffer *node)
11433 struct extent_buffer *eb;
11434 struct btrfs_key key;
11435 struct btrfs_key drop_key;
11443 * Skip shared fs/subvolume tree block, in that case they will
11444 * be checked by referencer with lowest rootid
11446 if (is_fstree(root->objectid) && !should_check(root, node))
11449 /* Update bytes accounting */
11450 total_btree_bytes += node->len;
11451 if (fs_root_objectid(btrfs_header_owner(node)))
11452 total_fs_tree_bytes += node->len;
11453 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11454 total_extent_tree_bytes += node->len;
11456 /* pre-order tranversal, check itself first */
11457 level = btrfs_header_level(node);
11458 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11459 btrfs_header_level(node),
11460 btrfs_header_owner(node));
11464 "check %s failed root %llu bytenr %llu level %d, force continue check",
11465 level ? "node":"leaf", root->objectid,
11466 btrfs_header_bytenr(node), btrfs_header_level(node));
11469 btree_space_waste += btrfs_leaf_free_space(root, node);
11470 ret = check_leaf_items(root, node);
11475 nr = btrfs_header_nritems(node);
11476 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11477 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11478 sizeof(struct btrfs_key_ptr);
11480 /* Then check all its children */
11481 for (i = 0; i < nr; i++) {
11482 u64 blocknr = btrfs_node_blockptr(node, i);
11484 btrfs_node_key_to_cpu(node, &key, i);
11485 if (level == root->root_item.drop_level &&
11486 is_dropped_key(&key, &drop_key))
11490 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11491 * to call the function itself.
11493 eb = read_tree_block(root->fs_info, blocknr, 0);
11494 if (extent_buffer_uptodate(eb)) {
11495 ret = traverse_tree_block(root, eb);
11498 free_extent_buffer(eb);
11505 * Low memory usage version check_chunks_and_extents.
11507 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11509 struct btrfs_path path;
11510 struct btrfs_key key;
11511 struct btrfs_root *root1;
11512 struct btrfs_root *root;
11513 struct btrfs_root *cur_root;
11517 root = fs_info->fs_root;
11519 root1 = root->fs_info->chunk_root;
11520 ret = traverse_tree_block(root1, root1->node);
11523 root1 = root->fs_info->tree_root;
11524 ret = traverse_tree_block(root1, root1->node);
11527 btrfs_init_path(&path);
11528 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11530 key.type = BTRFS_ROOT_ITEM_KEY;
11532 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11534 error("cannot find extent treet in tree_root");
11539 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11540 if (key.type != BTRFS_ROOT_ITEM_KEY)
11542 key.offset = (u64)-1;
11544 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11545 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11548 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11549 if (IS_ERR(cur_root) || !cur_root) {
11550 error("failed to read tree: %lld", key.objectid);
11554 ret = traverse_tree_block(cur_root, cur_root->node);
11557 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11558 btrfs_free_fs_root(cur_root);
11560 ret = btrfs_next_item(root1, &path);
11566 btrfs_release_path(&path);
11570 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11574 if (!ctx.progress_enabled)
11575 fprintf(stderr, "checking extents\n");
11576 if (check_mode == CHECK_MODE_LOWMEM)
11577 ret = check_chunks_and_extents_v2(fs_info);
11579 ret = check_chunks_and_extents(fs_info);
11584 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11585 struct btrfs_root *root, int overwrite)
11587 struct extent_buffer *c;
11588 struct extent_buffer *old = root->node;
11591 struct btrfs_disk_key disk_key = {0,0,0};
11597 extent_buffer_get(c);
11600 c = btrfs_alloc_free_block(trans, root,
11601 root->fs_info->nodesize,
11602 root->root_key.objectid,
11603 &disk_key, level, 0, 0);
11606 extent_buffer_get(c);
11610 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11611 btrfs_set_header_level(c, level);
11612 btrfs_set_header_bytenr(c, c->start);
11613 btrfs_set_header_generation(c, trans->transid);
11614 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11615 btrfs_set_header_owner(c, root->root_key.objectid);
11617 write_extent_buffer(c, root->fs_info->fsid,
11618 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11620 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11621 btrfs_header_chunk_tree_uuid(c),
11624 btrfs_mark_buffer_dirty(c);
11626 * this case can happen in the following case:
11628 * 1.overwrite previous root.
11630 * 2.reinit reloc data root, this is because we skip pin
11631 * down reloc data tree before which means we can allocate
11632 * same block bytenr here.
11634 if (old->start == c->start) {
11635 btrfs_set_root_generation(&root->root_item,
11637 root->root_item.level = btrfs_header_level(root->node);
11638 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11639 &root->root_key, &root->root_item);
11641 free_extent_buffer(c);
11645 free_extent_buffer(old);
11647 add_root_to_dirty_list(root);
11651 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11652 struct extent_buffer *eb, int tree_root)
11654 struct extent_buffer *tmp;
11655 struct btrfs_root_item *ri;
11656 struct btrfs_key key;
11658 int level = btrfs_header_level(eb);
11664 * If we have pinned this block before, don't pin it again.
11665 * This can not only avoid forever loop with broken filesystem
11666 * but also give us some speedups.
11668 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11669 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11672 btrfs_pin_extent(fs_info, eb->start, eb->len);
11674 nritems = btrfs_header_nritems(eb);
11675 for (i = 0; i < nritems; i++) {
11677 btrfs_item_key_to_cpu(eb, &key, i);
11678 if (key.type != BTRFS_ROOT_ITEM_KEY)
11680 /* Skip the extent root and reloc roots */
11681 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11682 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11683 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11685 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11686 bytenr = btrfs_disk_root_bytenr(eb, ri);
11689 * If at any point we start needing the real root we
11690 * will have to build a stump root for the root we are
11691 * in, but for now this doesn't actually use the root so
11692 * just pass in extent_root.
11694 tmp = read_tree_block(fs_info, bytenr, 0);
11695 if (!extent_buffer_uptodate(tmp)) {
11696 fprintf(stderr, "Error reading root block\n");
11699 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11700 free_extent_buffer(tmp);
11704 bytenr = btrfs_node_blockptr(eb, i);
11706 /* If we aren't the tree root don't read the block */
11707 if (level == 1 && !tree_root) {
11708 btrfs_pin_extent(fs_info, bytenr,
11709 fs_info->nodesize);
11713 tmp = read_tree_block(fs_info, bytenr, 0);
11714 if (!extent_buffer_uptodate(tmp)) {
11715 fprintf(stderr, "Error reading tree block\n");
11718 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11719 free_extent_buffer(tmp);
11728 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11732 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11736 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11739 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11741 struct btrfs_block_group_cache *cache;
11742 struct btrfs_path path;
11743 struct extent_buffer *leaf;
11744 struct btrfs_chunk *chunk;
11745 struct btrfs_key key;
11749 btrfs_init_path(&path);
11751 key.type = BTRFS_CHUNK_ITEM_KEY;
11753 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11755 btrfs_release_path(&path);
11760 * We do this in case the block groups were screwed up and had alloc
11761 * bits that aren't actually set on the chunks. This happens with
11762 * restored images every time and could happen in real life I guess.
11764 fs_info->avail_data_alloc_bits = 0;
11765 fs_info->avail_metadata_alloc_bits = 0;
11766 fs_info->avail_system_alloc_bits = 0;
11768 /* First we need to create the in-memory block groups */
11770 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11771 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11773 btrfs_release_path(&path);
11781 leaf = path.nodes[0];
11782 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11783 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11788 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11789 btrfs_add_block_group(fs_info, 0,
11790 btrfs_chunk_type(leaf, chunk),
11791 key.objectid, key.offset,
11792 btrfs_chunk_length(leaf, chunk));
11793 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11794 key.offset + btrfs_chunk_length(leaf, chunk));
11799 cache = btrfs_lookup_first_block_group(fs_info, start);
11803 start = cache->key.objectid + cache->key.offset;
11806 btrfs_release_path(&path);
11810 static int reset_balance(struct btrfs_trans_handle *trans,
11811 struct btrfs_fs_info *fs_info)
11813 struct btrfs_root *root = fs_info->tree_root;
11814 struct btrfs_path path;
11815 struct extent_buffer *leaf;
11816 struct btrfs_key key;
11817 int del_slot, del_nr = 0;
11821 btrfs_init_path(&path);
11822 key.objectid = BTRFS_BALANCE_OBJECTID;
11823 key.type = BTRFS_BALANCE_ITEM_KEY;
11825 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11830 goto reinit_data_reloc;
11835 ret = btrfs_del_item(trans, root, &path);
11838 btrfs_release_path(&path);
11840 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11841 key.type = BTRFS_ROOT_ITEM_KEY;
11843 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11847 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11852 ret = btrfs_del_items(trans, root, &path,
11859 btrfs_release_path(&path);
11862 ret = btrfs_search_slot(trans, root, &key, &path,
11869 leaf = path.nodes[0];
11870 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11871 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11873 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11878 del_slot = path.slots[0];
11887 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11891 btrfs_release_path(&path);
11894 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11895 key.type = BTRFS_ROOT_ITEM_KEY;
11896 key.offset = (u64)-1;
11897 root = btrfs_read_fs_root(fs_info, &key);
11898 if (IS_ERR(root)) {
11899 fprintf(stderr, "Error reading data reloc tree\n");
11900 ret = PTR_ERR(root);
11903 record_root_in_trans(trans, root);
11904 ret = btrfs_fsck_reinit_root(trans, root, 0);
11907 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11909 btrfs_release_path(&path);
11913 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11914 struct btrfs_fs_info *fs_info)
11920 * The only reason we don't do this is because right now we're just
11921 * walking the trees we find and pinning down their bytes, we don't look
11922 * at any of the leaves. In order to do mixed groups we'd have to check
11923 * the leaves of any fs roots and pin down the bytes for any file
11924 * extents we find. Not hard but why do it if we don't have to?
11926 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11927 fprintf(stderr, "We don't support re-initing the extent tree "
11928 "for mixed block groups yet, please notify a btrfs "
11929 "developer you want to do this so they can add this "
11930 "functionality.\n");
11935 * first we need to walk all of the trees except the extent tree and pin
11936 * down the bytes that are in use so we don't overwrite any existing
11939 ret = pin_metadata_blocks(fs_info);
11941 fprintf(stderr, "error pinning down used bytes\n");
11946 * Need to drop all the block groups since we're going to recreate all
11949 btrfs_free_block_groups(fs_info);
11950 ret = reset_block_groups(fs_info);
11952 fprintf(stderr, "error resetting the block groups\n");
11956 /* Ok we can allocate now, reinit the extent root */
11957 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11959 fprintf(stderr, "extent root initialization failed\n");
11961 * When the transaction code is updated we should end the
11962 * transaction, but for now progs only knows about commit so
11963 * just return an error.
11969 * Now we have all the in-memory block groups setup so we can make
11970 * allocations properly, and the metadata we care about is safe since we
11971 * pinned all of it above.
11974 struct btrfs_block_group_cache *cache;
11976 cache = btrfs_lookup_first_block_group(fs_info, start);
11979 start = cache->key.objectid + cache->key.offset;
11980 ret = btrfs_insert_item(trans, fs_info->extent_root,
11981 &cache->key, &cache->item,
11982 sizeof(cache->item));
11984 fprintf(stderr, "Error adding block group\n");
11987 btrfs_extent_post_op(trans, fs_info->extent_root);
11990 ret = reset_balance(trans, fs_info);
11992 fprintf(stderr, "error resetting the pending balance\n");
11997 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11999 struct btrfs_path path;
12000 struct btrfs_trans_handle *trans;
12001 struct btrfs_key key;
12004 printf("Recowing metadata block %llu\n", eb->start);
12005 key.objectid = btrfs_header_owner(eb);
12006 key.type = BTRFS_ROOT_ITEM_KEY;
12007 key.offset = (u64)-1;
12009 root = btrfs_read_fs_root(root->fs_info, &key);
12010 if (IS_ERR(root)) {
12011 fprintf(stderr, "Couldn't find owner root %llu\n",
12013 return PTR_ERR(root);
12016 trans = btrfs_start_transaction(root, 1);
12018 return PTR_ERR(trans);
12020 btrfs_init_path(&path);
12021 path.lowest_level = btrfs_header_level(eb);
12022 if (path.lowest_level)
12023 btrfs_node_key_to_cpu(eb, &key, 0);
12025 btrfs_item_key_to_cpu(eb, &key, 0);
12027 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12028 btrfs_commit_transaction(trans, root);
12029 btrfs_release_path(&path);
12033 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12035 struct btrfs_path path;
12036 struct btrfs_trans_handle *trans;
12037 struct btrfs_key key;
12040 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12041 bad->key.type, bad->key.offset);
12042 key.objectid = bad->root_id;
12043 key.type = BTRFS_ROOT_ITEM_KEY;
12044 key.offset = (u64)-1;
12046 root = btrfs_read_fs_root(root->fs_info, &key);
12047 if (IS_ERR(root)) {
12048 fprintf(stderr, "Couldn't find owner root %llu\n",
12050 return PTR_ERR(root);
12053 trans = btrfs_start_transaction(root, 1);
12055 return PTR_ERR(trans);
12057 btrfs_init_path(&path);
12058 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12064 ret = btrfs_del_item(trans, root, &path);
12066 btrfs_commit_transaction(trans, root);
12067 btrfs_release_path(&path);
12071 static int zero_log_tree(struct btrfs_root *root)
12073 struct btrfs_trans_handle *trans;
12076 trans = btrfs_start_transaction(root, 1);
12077 if (IS_ERR(trans)) {
12078 ret = PTR_ERR(trans);
12081 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12082 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12083 ret = btrfs_commit_transaction(trans, root);
12087 static int populate_csum(struct btrfs_trans_handle *trans,
12088 struct btrfs_root *csum_root, char *buf, u64 start,
12091 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12096 while (offset < len) {
12097 sectorsize = fs_info->sectorsize;
12098 ret = read_extent_data(fs_info, buf, start + offset,
12102 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12103 start + offset, buf, sectorsize);
12106 offset += sectorsize;
12111 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12112 struct btrfs_root *csum_root,
12113 struct btrfs_root *cur_root)
12115 struct btrfs_path path;
12116 struct btrfs_key key;
12117 struct extent_buffer *node;
12118 struct btrfs_file_extent_item *fi;
12125 buf = malloc(cur_root->fs_info->sectorsize);
12129 btrfs_init_path(&path);
12133 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12136 /* Iterate all regular file extents and fill its csum */
12138 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12140 if (key.type != BTRFS_EXTENT_DATA_KEY)
12142 node = path.nodes[0];
12143 slot = path.slots[0];
12144 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12145 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12147 start = btrfs_file_extent_disk_bytenr(node, fi);
12148 len = btrfs_file_extent_disk_num_bytes(node, fi);
12150 ret = populate_csum(trans, csum_root, buf, start, len);
12151 if (ret == -EEXIST)
12157 * TODO: if next leaf is corrupted, jump to nearest next valid
12160 ret = btrfs_next_item(cur_root, &path);
12170 btrfs_release_path(&path);
12175 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12176 struct btrfs_root *csum_root)
12178 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12179 struct btrfs_path path;
12180 struct btrfs_root *tree_root = fs_info->tree_root;
12181 struct btrfs_root *cur_root;
12182 struct extent_buffer *node;
12183 struct btrfs_key key;
12187 btrfs_init_path(&path);
12188 key.objectid = BTRFS_FS_TREE_OBJECTID;
12190 key.type = BTRFS_ROOT_ITEM_KEY;
12191 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12200 node = path.nodes[0];
12201 slot = path.slots[0];
12202 btrfs_item_key_to_cpu(node, &key, slot);
12203 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12205 if (key.type != BTRFS_ROOT_ITEM_KEY)
12207 if (!is_fstree(key.objectid))
12209 key.offset = (u64)-1;
12211 cur_root = btrfs_read_fs_root(fs_info, &key);
12212 if (IS_ERR(cur_root) || !cur_root) {
12213 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12217 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12222 ret = btrfs_next_item(tree_root, &path);
12232 btrfs_release_path(&path);
12236 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12237 struct btrfs_root *csum_root)
12239 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12240 struct btrfs_path path;
12241 struct btrfs_extent_item *ei;
12242 struct extent_buffer *leaf;
12244 struct btrfs_key key;
12247 btrfs_init_path(&path);
12249 key.type = BTRFS_EXTENT_ITEM_KEY;
12251 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12253 btrfs_release_path(&path);
12257 buf = malloc(csum_root->fs_info->sectorsize);
12259 btrfs_release_path(&path);
12264 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12265 ret = btrfs_next_leaf(extent_root, &path);
12273 leaf = path.nodes[0];
12275 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12276 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12281 ei = btrfs_item_ptr(leaf, path.slots[0],
12282 struct btrfs_extent_item);
12283 if (!(btrfs_extent_flags(leaf, ei) &
12284 BTRFS_EXTENT_FLAG_DATA)) {
12289 ret = populate_csum(trans, csum_root, buf, key.objectid,
12296 btrfs_release_path(&path);
12302 * Recalculate the csum and put it into the csum tree.
12304 * Extent tree init will wipe out all the extent info, so in that case, we
12305 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12306 * will use fs/subvol trees to init the csum tree.
12308 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12309 struct btrfs_root *csum_root,
12310 int search_fs_tree)
12312 if (search_fs_tree)
12313 return fill_csum_tree_from_fs(trans, csum_root);
12315 return fill_csum_tree_from_extent(trans, csum_root);
12318 static void free_roots_info_cache(void)
12320 if (!roots_info_cache)
12323 while (!cache_tree_empty(roots_info_cache)) {
12324 struct cache_extent *entry;
12325 struct root_item_info *rii;
12327 entry = first_cache_extent(roots_info_cache);
12330 remove_cache_extent(roots_info_cache, entry);
12331 rii = container_of(entry, struct root_item_info, cache_extent);
12335 free(roots_info_cache);
12336 roots_info_cache = NULL;
12339 static int build_roots_info_cache(struct btrfs_fs_info *info)
12342 struct btrfs_key key;
12343 struct extent_buffer *leaf;
12344 struct btrfs_path path;
12346 if (!roots_info_cache) {
12347 roots_info_cache = malloc(sizeof(*roots_info_cache));
12348 if (!roots_info_cache)
12350 cache_tree_init(roots_info_cache);
12353 btrfs_init_path(&path);
12355 key.type = BTRFS_EXTENT_ITEM_KEY;
12357 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12360 leaf = path.nodes[0];
12363 struct btrfs_key found_key;
12364 struct btrfs_extent_item *ei;
12365 struct btrfs_extent_inline_ref *iref;
12366 int slot = path.slots[0];
12371 struct cache_extent *entry;
12372 struct root_item_info *rii;
12374 if (slot >= btrfs_header_nritems(leaf)) {
12375 ret = btrfs_next_leaf(info->extent_root, &path);
12382 leaf = path.nodes[0];
12383 slot = path.slots[0];
12386 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12388 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12389 found_key.type != BTRFS_METADATA_ITEM_KEY)
12392 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12393 flags = btrfs_extent_flags(leaf, ei);
12395 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12396 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12399 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12400 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12401 level = found_key.offset;
12403 struct btrfs_tree_block_info *binfo;
12405 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12406 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12407 level = btrfs_tree_block_level(leaf, binfo);
12411 * For a root extent, it must be of the following type and the
12412 * first (and only one) iref in the item.
12414 type = btrfs_extent_inline_ref_type(leaf, iref);
12415 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12418 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12419 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12421 rii = malloc(sizeof(struct root_item_info));
12426 rii->cache_extent.start = root_id;
12427 rii->cache_extent.size = 1;
12428 rii->level = (u8)-1;
12429 entry = &rii->cache_extent;
12430 ret = insert_cache_extent(roots_info_cache, entry);
12433 rii = container_of(entry, struct root_item_info,
12437 ASSERT(rii->cache_extent.start == root_id);
12438 ASSERT(rii->cache_extent.size == 1);
12440 if (level > rii->level || rii->level == (u8)-1) {
12441 rii->level = level;
12442 rii->bytenr = found_key.objectid;
12443 rii->gen = btrfs_extent_generation(leaf, ei);
12444 rii->node_count = 1;
12445 } else if (level == rii->level) {
12453 btrfs_release_path(&path);
12458 static int maybe_repair_root_item(struct btrfs_path *path,
12459 const struct btrfs_key *root_key,
12460 const int read_only_mode)
12462 const u64 root_id = root_key->objectid;
12463 struct cache_extent *entry;
12464 struct root_item_info *rii;
12465 struct btrfs_root_item ri;
12466 unsigned long offset;
12468 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12471 "Error: could not find extent items for root %llu\n",
12472 root_key->objectid);
12476 rii = container_of(entry, struct root_item_info, cache_extent);
12477 ASSERT(rii->cache_extent.start == root_id);
12478 ASSERT(rii->cache_extent.size == 1);
12480 if (rii->node_count != 1) {
12482 "Error: could not find btree root extent for root %llu\n",
12487 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12488 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12490 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12491 btrfs_root_level(&ri) != rii->level ||
12492 btrfs_root_generation(&ri) != rii->gen) {
12495 * If we're in repair mode but our caller told us to not update
12496 * the root item, i.e. just check if it needs to be updated, don't
12497 * print this message, since the caller will call us again shortly
12498 * for the same root item without read only mode (the caller will
12499 * open a transaction first).
12501 if (!(read_only_mode && repair))
12503 "%sroot item for root %llu,"
12504 " current bytenr %llu, current gen %llu, current level %u,"
12505 " new bytenr %llu, new gen %llu, new level %u\n",
12506 (read_only_mode ? "" : "fixing "),
12508 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12509 btrfs_root_level(&ri),
12510 rii->bytenr, rii->gen, rii->level);
12512 if (btrfs_root_generation(&ri) > rii->gen) {
12514 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12515 root_id, btrfs_root_generation(&ri), rii->gen);
12519 if (!read_only_mode) {
12520 btrfs_set_root_bytenr(&ri, rii->bytenr);
12521 btrfs_set_root_level(&ri, rii->level);
12522 btrfs_set_root_generation(&ri, rii->gen);
12523 write_extent_buffer(path->nodes[0], &ri,
12524 offset, sizeof(ri));
12534 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12535 * caused read-only snapshots to be corrupted if they were created at a moment
12536 * when the source subvolume/snapshot had orphan items. The issue was that the
12537 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12538 * node instead of the post orphan cleanup root node.
12539 * So this function, and its callees, just detects and fixes those cases. Even
12540 * though the regression was for read-only snapshots, this function applies to
12541 * any snapshot/subvolume root.
12542 * This must be run before any other repair code - not doing it so, makes other
12543 * repair code delete or modify backrefs in the extent tree for example, which
12544 * will result in an inconsistent fs after repairing the root items.
12546 static int repair_root_items(struct btrfs_fs_info *info)
12548 struct btrfs_path path;
12549 struct btrfs_key key;
12550 struct extent_buffer *leaf;
12551 struct btrfs_trans_handle *trans = NULL;
12554 int need_trans = 0;
12556 btrfs_init_path(&path);
12558 ret = build_roots_info_cache(info);
12562 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12563 key.type = BTRFS_ROOT_ITEM_KEY;
12568 * Avoid opening and committing transactions if a leaf doesn't have
12569 * any root items that need to be fixed, so that we avoid rotating
12570 * backup roots unnecessarily.
12573 trans = btrfs_start_transaction(info->tree_root, 1);
12574 if (IS_ERR(trans)) {
12575 ret = PTR_ERR(trans);
12580 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12584 leaf = path.nodes[0];
12587 struct btrfs_key found_key;
12589 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12590 int no_more_keys = find_next_key(&path, &key);
12592 btrfs_release_path(&path);
12594 ret = btrfs_commit_transaction(trans,
12606 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12608 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12610 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12613 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12617 if (!trans && repair) {
12620 btrfs_release_path(&path);
12630 free_roots_info_cache();
12631 btrfs_release_path(&path);
12633 btrfs_commit_transaction(trans, info->tree_root);
12640 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12642 struct btrfs_trans_handle *trans;
12643 struct btrfs_block_group_cache *bg_cache;
12647 /* Clear all free space cache inodes and its extent data */
12649 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12652 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12655 current = bg_cache->key.objectid + bg_cache->key.offset;
12658 /* Don't forget to set cache_generation to -1 */
12659 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12660 if (IS_ERR(trans)) {
12661 error("failed to update super block cache generation");
12662 return PTR_ERR(trans);
12664 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12665 btrfs_commit_transaction(trans, fs_info->tree_root);
12670 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12675 if (clear_version == 1) {
12676 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12678 "free space cache v2 detected, use --clear-space-cache v2");
12682 printf("Clearing free space cache\n");
12683 ret = clear_free_space_cache(fs_info);
12685 error("failed to clear free space cache");
12688 printf("Free space cache cleared\n");
12690 } else if (clear_version == 2) {
12691 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12692 printf("no free space cache v2 to clear\n");
12696 printf("Clear free space cache v2\n");
12697 ret = btrfs_clear_free_space_tree(fs_info);
12699 error("failed to clear free space cache v2: %d", ret);
12702 printf("free space cache v2 cleared\n");
12709 const char * const cmd_check_usage[] = {
12710 "btrfs check [options] <device>",
12711 "Check structural integrity of a filesystem (unmounted).",
12712 "Check structural integrity of an unmounted filesystem. Verify internal",
12713 "trees' consistency and item connectivity. In the repair mode try to",
12714 "fix the problems found. ",
12715 "WARNING: the repair mode is considered dangerous",
12717 "-s|--super <superblock> use this superblock copy",
12718 "-b|--backup use the first valid backup root copy",
12719 "--repair try to repair the filesystem",
12720 "--readonly run in read-only mode (default)",
12721 "--init-csum-tree create a new CRC tree",
12722 "--init-extent-tree create a new extent tree",
12723 "--mode <MODE> allows choice of memory/IO trade-offs",
12724 " where MODE is one of:",
12725 " original - read inodes and extents to memory (requires",
12726 " more memory, does less IO)",
12727 " lowmem - try to use less memory but read blocks again",
12729 "--check-data-csum verify checksums of data blocks",
12730 "-Q|--qgroup-report print a report on qgroup consistency",
12731 "-E|--subvol-extents <subvolid>",
12732 " print subvolume extents and sharing state",
12733 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12734 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12735 "-p|--progress indicate progress",
12736 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12740 int cmd_check(int argc, char **argv)
12742 struct cache_tree root_cache;
12743 struct btrfs_root *root;
12744 struct btrfs_fs_info *info;
12747 u64 tree_root_bytenr = 0;
12748 u64 chunk_root_bytenr = 0;
12749 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12753 int init_csum_tree = 0;
12755 int clear_space_cache = 0;
12756 int qgroup_report = 0;
12757 int qgroups_repaired = 0;
12758 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12762 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12763 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12764 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12765 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12766 static const struct option long_options[] = {
12767 { "super", required_argument, NULL, 's' },
12768 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12769 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12770 { "init-csum-tree", no_argument, NULL,
12771 GETOPT_VAL_INIT_CSUM },
12772 { "init-extent-tree", no_argument, NULL,
12773 GETOPT_VAL_INIT_EXTENT },
12774 { "check-data-csum", no_argument, NULL,
12775 GETOPT_VAL_CHECK_CSUM },
12776 { "backup", no_argument, NULL, 'b' },
12777 { "subvol-extents", required_argument, NULL, 'E' },
12778 { "qgroup-report", no_argument, NULL, 'Q' },
12779 { "tree-root", required_argument, NULL, 'r' },
12780 { "chunk-root", required_argument, NULL,
12781 GETOPT_VAL_CHUNK_TREE },
12782 { "progress", no_argument, NULL, 'p' },
12783 { "mode", required_argument, NULL,
12785 { "clear-space-cache", required_argument, NULL,
12786 GETOPT_VAL_CLEAR_SPACE_CACHE},
12787 { NULL, 0, NULL, 0}
12790 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12794 case 'a': /* ignored */ break;
12796 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12799 num = arg_strtou64(optarg);
12800 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12802 "super mirror should be less than %d",
12803 BTRFS_SUPER_MIRROR_MAX);
12806 bytenr = btrfs_sb_offset(((int)num));
12807 printf("using SB copy %llu, bytenr %llu\n", num,
12808 (unsigned long long)bytenr);
12814 subvolid = arg_strtou64(optarg);
12817 tree_root_bytenr = arg_strtou64(optarg);
12819 case GETOPT_VAL_CHUNK_TREE:
12820 chunk_root_bytenr = arg_strtou64(optarg);
12823 ctx.progress_enabled = true;
12827 usage(cmd_check_usage);
12828 case GETOPT_VAL_REPAIR:
12829 printf("enabling repair mode\n");
12831 ctree_flags |= OPEN_CTREE_WRITES;
12833 case GETOPT_VAL_READONLY:
12836 case GETOPT_VAL_INIT_CSUM:
12837 printf("Creating a new CRC tree\n");
12838 init_csum_tree = 1;
12840 ctree_flags |= OPEN_CTREE_WRITES;
12842 case GETOPT_VAL_INIT_EXTENT:
12843 init_extent_tree = 1;
12844 ctree_flags |= (OPEN_CTREE_WRITES |
12845 OPEN_CTREE_NO_BLOCK_GROUPS);
12848 case GETOPT_VAL_CHECK_CSUM:
12849 check_data_csum = 1;
12851 case GETOPT_VAL_MODE:
12852 check_mode = parse_check_mode(optarg);
12853 if (check_mode == CHECK_MODE_UNKNOWN) {
12854 error("unknown mode: %s", optarg);
12858 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12859 if (strcmp(optarg, "v1") == 0) {
12860 clear_space_cache = 1;
12861 } else if (strcmp(optarg, "v2") == 0) {
12862 clear_space_cache = 2;
12863 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12866 "invalid argument to --clear-space-cache, must be v1 or v2");
12869 ctree_flags |= OPEN_CTREE_WRITES;
12874 if (check_argc_exact(argc - optind, 1))
12875 usage(cmd_check_usage);
12877 if (ctx.progress_enabled) {
12878 ctx.tp = TASK_NOTHING;
12879 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12882 /* This check is the only reason for --readonly to exist */
12883 if (readonly && repair) {
12884 error("repair options are not compatible with --readonly");
12889 * Not supported yet
12891 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12892 error("low memory mode doesn't support repair yet");
12897 cache_tree_init(&root_cache);
12899 if((ret = check_mounted(argv[optind])) < 0) {
12900 error("could not check mount status: %s", strerror(-ret));
12904 error("%s is currently mounted, aborting", argv[optind]);
12910 /* only allow partial opening under repair mode */
12912 ctree_flags |= OPEN_CTREE_PARTIAL;
12914 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12915 chunk_root_bytenr, ctree_flags);
12917 error("cannot open file system");
12923 global_info = info;
12924 root = info->fs_root;
12925 uuid_unparse(info->super_copy->fsid, uuidbuf);
12927 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12930 * Check the bare minimum before starting anything else that could rely
12931 * on it, namely the tree roots, any local consistency checks
12933 if (!extent_buffer_uptodate(info->tree_root->node) ||
12934 !extent_buffer_uptodate(info->dev_root->node) ||
12935 !extent_buffer_uptodate(info->chunk_root->node)) {
12936 error("critical roots corrupted, unable to check the filesystem");
12942 if (clear_space_cache) {
12943 ret = do_clear_free_space_cache(info, clear_space_cache);
12949 * repair mode will force us to commit transaction which
12950 * will make us fail to load log tree when mounting.
12952 if (repair && btrfs_super_log_root(info->super_copy)) {
12953 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12959 ret = zero_log_tree(root);
12962 error("failed to zero log tree: %d", ret);
12967 if (qgroup_report) {
12968 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12970 ret = qgroup_verify_all(info);
12977 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12978 subvolid, argv[optind], uuidbuf);
12979 ret = print_extent_state(info, subvolid);
12984 if (init_extent_tree || init_csum_tree) {
12985 struct btrfs_trans_handle *trans;
12987 trans = btrfs_start_transaction(info->extent_root, 0);
12988 if (IS_ERR(trans)) {
12989 error("error starting transaction");
12990 ret = PTR_ERR(trans);
12995 if (init_extent_tree) {
12996 printf("Creating a new extent tree\n");
12997 ret = reinit_extent_tree(trans, info);
13003 if (init_csum_tree) {
13004 printf("Reinitialize checksum tree\n");
13005 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13007 error("checksum tree initialization failed: %d",
13014 ret = fill_csum_tree(trans, info->csum_root,
13018 error("checksum tree refilling failed: %d", ret);
13023 * Ok now we commit and run the normal fsck, which will add
13024 * extent entries for all of the items it finds.
13026 ret = btrfs_commit_transaction(trans, info->extent_root);
13031 if (!extent_buffer_uptodate(info->extent_root->node)) {
13032 error("critical: extent_root, unable to check the filesystem");
13037 if (!extent_buffer_uptodate(info->csum_root->node)) {
13038 error("critical: csum_root, unable to check the filesystem");
13044 ret = do_check_chunks_and_extents(info);
13048 "errors found in extent allocation tree or chunk allocation");
13050 ret = repair_root_items(info);
13053 error("failed to repair root items: %s", strerror(-ret));
13057 fprintf(stderr, "Fixed %d roots.\n", ret);
13059 } else if (ret > 0) {
13061 "Found %d roots with an outdated root item.\n",
13064 "Please run a filesystem check with the option --repair to fix them.\n");
13070 if (!ctx.progress_enabled) {
13071 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13072 fprintf(stderr, "checking free space tree\n");
13074 fprintf(stderr, "checking free space cache\n");
13076 ret = check_space_cache(root);
13079 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13080 error("errors found in free space tree");
13082 error("errors found in free space cache");
13087 * We used to have to have these hole extents in between our real
13088 * extents so if we don't have this flag set we need to make sure there
13089 * are no gaps in the file extents for inodes, otherwise we can just
13090 * ignore it when this happens.
13092 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13093 if (!ctx.progress_enabled)
13094 fprintf(stderr, "checking fs roots\n");
13095 if (check_mode == CHECK_MODE_LOWMEM)
13096 ret = check_fs_roots_v2(root->fs_info);
13098 ret = check_fs_roots(info, &root_cache);
13101 error("errors found in fs roots");
13105 fprintf(stderr, "checking csums\n");
13106 ret = check_csums(root);
13109 error("errors found in csum tree");
13113 fprintf(stderr, "checking root refs\n");
13114 /* For low memory mode, check_fs_roots_v2 handles root refs */
13115 if (check_mode != CHECK_MODE_LOWMEM) {
13116 ret = check_root_refs(root, &root_cache);
13119 error("errors found in root refs");
13124 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13125 struct extent_buffer *eb;
13127 eb = list_first_entry(&root->fs_info->recow_ebs,
13128 struct extent_buffer, recow);
13129 list_del_init(&eb->recow);
13130 ret = recow_extent_buffer(root, eb);
13133 error("fails to fix transid errors");
13138 while (!list_empty(&delete_items)) {
13139 struct bad_item *bad;
13141 bad = list_first_entry(&delete_items, struct bad_item, list);
13142 list_del_init(&bad->list);
13144 ret = delete_bad_item(root, bad);
13150 if (info->quota_enabled) {
13151 fprintf(stderr, "checking quota groups\n");
13152 ret = qgroup_verify_all(info);
13155 error("failed to check quota groups");
13159 ret = repair_qgroups(info, &qgroups_repaired);
13162 error("failed to repair quota groups");
13168 if (!list_empty(&root->fs_info->recow_ebs)) {
13169 error("transid errors in file system");
13174 printf("found %llu bytes used, ",
13175 (unsigned long long)bytes_used);
13177 printf("error(s) found\n");
13179 printf("no error found\n");
13180 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13181 printf("total tree bytes: %llu\n",
13182 (unsigned long long)total_btree_bytes);
13183 printf("total fs tree bytes: %llu\n",
13184 (unsigned long long)total_fs_tree_bytes);
13185 printf("total extent tree bytes: %llu\n",
13186 (unsigned long long)total_extent_tree_bytes);
13187 printf("btree space waste bytes: %llu\n",
13188 (unsigned long long)btree_space_waste);
13189 printf("file data blocks allocated: %llu\n referenced %llu\n",
13190 (unsigned long long)data_bytes_allocated,
13191 (unsigned long long)data_bytes_referenced);
13193 free_qgroup_counts();
13194 free_root_recs_tree(&root_cache);
13198 if (ctx.progress_enabled)
13199 task_deinit(ctx.info);